diff --git a/.editorconfig b/.editorconfig
new file mode 100644
index 0000000..711388e
--- /dev/null
+++ b/.editorconfig
@@ -0,0 +1,20 @@
+# Editor Configuration (http://editorconfig.org)
+root = true
+
+[*]
+charset = utf-8
+indent_style = space
+indent_size = 4
+end_of_line = lf
+insert_final_newline = true
+trim_trailing_whitespace = true
+max_line_length = 88
+
+[*.{json,yml}]
+indent_size = 2
+
+[*.{md,rst}]
+trim_trailing_whitespace = false
+
+[Makefile]
+indent_style = tab
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..83f1817
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,209 @@
+### Python template
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+### macOS template
+# General
+.DS_Store
+.AppleDouble
+.LSOverride
+
+# Icon must end with two \r
+Icon
+
+# Thumbnails
+._*
+
+# Files that might appear in the root of a volume
+.DocumentRevisions-V100
+.fseventsd
+.Spotlight-V100
+.TemporaryItems
+.Trashes
+.VolumeIcon.icns
+.com.apple.timemachine.donotpresent
+
+# Directories potentially created on remote AFP share
+.AppleDB
+.AppleDesktop
+Network Trash Folder
+Temporary Items
+.apdisk
+
+### Linux template
+*~
+
+# temporary files which can be created if a process still has a handle open of a deleted file
+.fuse_hidden*
+
+# KDE directory preferences
+.directory
+
+# Linux trash folder which might appear on any partition or disk
+.Trash-*
+
+# .nfs files are created when an open file is removed but is still being accessed
+.nfs*
+
+### Windows template
+# Windows thumbnail cache files
+Thumbs.db
+Thumbs.db:encryptable
+ehthumbs.db
+ehthumbs_vista.db
+
+# Dump file
+*.stackdump
+
+# Folder config file
+[Dd]esktop.ini
+
+# Recycle Bin used on file shares
+$RECYCLE.BIN/
+
+# Windows Installer files
+*.cab
+*.msi
+*.msix
+*.msm
+*.msp
+
+# Windows shortcuts
+*.lnk
+
diff --git a/README.md b/README.md
index f28093d..fcb875f 100644
--- a/README.md
+++ b/README.md
@@ -1 +1,3 @@
-Our documentation is on Read The Docs: <https://coptr.readthedocs.io>
\ No newline at end of file
+CoPTR is a tool for estimating peak-to-trough ratios from metagenomic sequencing.
+
+You can find our documentation on Read The Docs: <https://coptr.readthedocs.io>
\ No newline at end of file
diff --git a/coptr.py b/coptr.py
deleted file mode 100644
index 2d1161e..0000000
--- a/coptr.py
+++ /dev/null
@@ -1,387 +0,0 @@
-"""
-This file is part of CoPTR.
-
-CoPTR is free software: you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation, either version 3 of the License, or
-(at your option) any later version.
-
-CoPTR is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with CoPTR.  If not, see <https://www.gnu.org/licenses/>.
-"""
-
-import argparse
-import numpy as np
-import os
-import os.path
-import pickle as pkl
-import sys
-
-from src.bam_processor import BamProcessor, CoverageMapRef, CoverageMapContig
-from src.coptr_contig import estimate_ptrs_coptr_contig
-from src.coptr_ref import estimate_ptrs_coptr_ref
-from src.compute_read_counts import compute_read_counts
-from src.print import print_error, print_info
-from src.read_mapper import ReadMapper
-from src.util import get_fastq_name
-
-VERSION="1.0.0"
-
-class ProgramOptions:
-
-    def __init__(self):
-        parser = argparse.ArgumentParser(
-            description="CoPTR (v{}): Compute PTRs from complete reference genomes and assemblies.".format(VERSION),
-            usage="""coptr.py <command> [options]
-
-command: index            create a bowtie2 index for a reference database
-         map              map reads against a reference database
-         merge            merge BAM files from reads mapped to multiple indexes
-         extract          compute coverage maps from bam files
-         estimate         estimate PTRs from coverage maps
-         count            compute read counts for each genome after filtering
-"""
-        )
-        self.default_bt2_k = 10
-
-        if len(sys.argv[1:]) < 1:
-            parser.print_help()
-            exit(1)
-
-        parser.add_argument("command", type=str, help="Command to run.")
-        args = parser.parse_args(sys.argv[1:2])
-
-        if not hasattr(self, args.command):
-            print_error("Main", "Unrecognized command.", quit=False)
-            parser.print_help()
-            exit(1)
-        getattr(self, args.command)()
-
-
-    def index(self):
-        parser = argparse.ArgumentParser(usage="coptr.py index [-h] [--bt2-bmax BT2_BMAX] [--bt2-dcv BT2_DCV] [--bt2-threads BT2_THREADS] [--bt2-packed] ref-fasta index-out")
-        parser.add_argument("ref_fasta", help=
-        """File or folder containing fasta to index. If a folder, the extension for each
-        fasta must be one of [.fasta, .fna, .fa]
-        """
-        )
-        parser.add_argument("index_out", help="Filepath to store index.")
-        parser.add_argument("--bt2-bmax", default=None, help="Set the --bmax arguement for bowtie2-build. Used to control memory useage.")
-        parser.add_argument("--bt2-dcv", default=None, help="Set the --dcv argument for bowtie2-build. Used to control memory usage.")
-        parser.add_argument("--bt2-threads", default="1", help="Number of threads to pass to bowtie2-build.")
-        parser.add_argument("--bt2-packed", action="store_true", help="Set the --packed flag for bowtie2-build. Used to control memory usage.")
-
-        if len(sys.argv[2:]) < 1:
-            parser.print_help()
-            exit(1)
-
-        args = parser.parse_args(sys.argv[2:])
-        read_mapper = ReadMapper()
-        read_mapper.index(args.ref_fasta, args.index_out, args.bt2_bmax, args.bt2_dcv, args.bt2_threads, args.bt2_packed)
-
-
-    def map(self):
-        parser = argparse.ArgumentParser(usage="coptr.py map [-h] [--threads INT] [--bt2-k INT] [--paired] index input out-folder")
-        parser.add_argument("index", help="Name of database index.")
-        parser.add_argument("input", help=
-        """File or folder containing fastq reads to map. If a folder, the extension for
-        each fastq must be one of [.fastq, .fq, .fastq.gz, fq.gz]
-        """
-        )
-        parser.add_argument("out_folder",
-            help="Folder to save mapped reads. BAM files are output here."
-        )
-        parser.add_argument("--paired", action="store_true",
-            help="Set for paired end reads. Assumes fastq files end in _1.* and _2.*")
-        parser.add_argument("--threads", type=int, default=1, 
-            help="Number of threads for bowtie2 mapping."
-        )
-        parser.add_argument("--bt2-k", type=int, default=self.default_bt2_k,
-            help="(Default 10). Number of alignments to report. Passed to -k flag of bowtie2.",
-
-        )
-
-        if len(sys.argv[2:]) < 1:
-            parser.print_help()
-            exit(1)
-
-
-        args = parser.parse_args(sys.argv[2:])
-        read_mapper = ReadMapper()
-        read_mapper.map(args.index, args.input, args.out_folder, args.paired, args.threads, args.bt2_k)
-
-
-    def merge(self):
-        parser = argparse.ArgumentParser(usage="coptr.py merge [-h] in-bam1 in-bam2 ... in-bamN out-bam")
-        parser.add_argument("in-bams", nargs="+",
-            help="A space separated list of BAM files to merge. Assumes same reads were mapped against different indexes. " +
-                 "Only keeps read 1 of paired end sequencing, since this is used downstream.")
-        parser.add_argument("out-bam", help="Path to merged BAM.")
-
-        if len(sys.argv[2:]) < 2:
-            parser.print_help()
-            exit(1)
-
-        args = vars(parser.parse_args(sys.argv[2:]))
-        in_bams = args["in-bams"]
-        out_bam = args["out-bam"]
-        bam_processor = BamProcessor()
-        bam_processor.merge(in_bams, out_bam)
-
-
-    def extract(self):
-        parser = argparse.ArgumentParser(usage="coptr.py extract [-h] [--ref-genome-regex REF_GENOME_REGEX] [--check-regex] in-folder out-folder")
-        parser.add_argument("in_folder", help="Folder with BAM files.")
-        parser.add_argument("out_folder", help="Folder to store coverage maps.")
-        parser.add_argument("--ref-genome-regex", default="[^\|]+",
-            help="Regular expression extracting a reference genome id from the sequence id in a bam file.",
-        )
-        parser.add_argument("--check-regex", action="store_true", default=False,
-            help="Check the regular expression by counting reference genomes without processing."
-        )
-        parser.add_argument("--bt2-k", type=int, default=self.default_bt2_k,
-            help="Maximum number of alignments.",
-
-        )
-
-        if len(sys.argv[2:]) < 1:
-            parser.print_help()
-            exit(1)
-
-        args = parser.parse_args(sys.argv[2:])
-
-        bam_processor = BamProcessor(args.ref_genome_regex)
-        ref_sequences = set()
-        ref_genomes = set()
-        for f in sorted(os.listdir(args.in_folder)):
-            fname, ext = os.path.splitext(f)
-            if ext == ".bam":
-                fpath = os.path.join(args.in_folder, f)
-                seq, gen = bam_processor.get_ref_names(fpath)
-                ref_sequences.update(seq)
-                ref_genomes.update(gen)
-
-                if os.path.isfile(os.path.join(args.out_folder, get_fastq_name(f) + ".cm.pkl")):
-                    print_info("BamProcessor", "output for {} already found, skipping".format(fname))
-                    continue
-
-                # don't process the rest of the bam file if we just want to 
-                # sanity check the regular expression
-                if args.check_regex:
-                    continue
-
-                coverage_maps = bam_processor.process_bam(fpath, args.bt2_k)
-                with open(os.path.join(args.out_folder, get_fastq_name(f) + ".cm.pkl"), "wb") as f:
-                    pkl.dump(coverage_maps, f)
-
-        print_info("BamProcessor", "found {} reference sequences corresponding to {} genomes".format(len(ref_sequences), len(ref_genomes)))
-        if args.check_regex:
-            print_info("BamProcessor", "reference genome ids:")
-            for ref in sorted(ref_genomes):
-                print("\t", ref, file=sys.stderr)
-
-
-    def estimate(self):
-        parser = argparse.ArgumentParser(usage=
-        """usage: coptr.py estimate [-h] [--min-reads MIN_READS] [--min-cov MIN_COV] [--threads THREADS] [--plot OUTFOLDER] [--batch-size INT] [--restart] coverage-map-folder out-file
-        """
-        )
-        parser.add_argument("coverage_map_folder", help="Folder with coverage maps computed from 'extract'.")
-        parser.add_argument("out_file", help="Filename to store PTR table.")
-        parser.add_argument("--min-reads", type=float,
-            help="Minimum number of reads required to compute a PTR (default 5000).", default=5000
-        )
-        parser.add_argument("--min-cov", type=float,
-            help="Fraction of nonzero bins required to compute a PTR (default 0.75).", default=0.75
-        )
-        parser.add_argument("--min-samples", type=float,
-            help="CoPTRContig only. Minimum number of samples required to reorder bins (default 5).", default=5
-        )
-        parser.add_argument("--threads", type=int, help="Number of threads to use (default 1).", default=1)
-        parser.add_argument("--plot", default=None, help="Plot model fit and save the results.")
-        parser.add_argument("--restart", default=False, action="store_true",
-            help="Restarts the estimation step using the genomes in the coverage-maps-genome folder."
-        )
-
-        if len(sys.argv[2:]) < 1:
-            parser.print_help()
-            exit(1)
-
-        args = parser.parse_args(sys.argv[2:])
-        sample_ids = set()
-        ref_genome_ids = set()
-        assembly_genome_ids = set()
-
-
-        grouped_coverage_map_folder = os.path.join(args.coverage_map_folder, "coverage-maps-genome")
-        if not args.restart and not os.path.isdir(grouped_coverage_map_folder):
-            os.mkdir(grouped_coverage_map_folder)
-
-        if args.restart:
-            print_info("CoPTR", "restarting from files in " + grouped_coverage_map_folder)
-            for cm_file in sorted(os.listdir(grouped_coverage_map_folder)):
-                _, ext = os.path.splitext(cm_file)
-                if ext != ".pkl": continue
-
-                print_info("CoPTR", "checking " + cm_file)
-                with open(os.path.join(grouped_coverage_map_folder, cm_file), "rb") as f:
-                    try:
-                        while True:
-                            cm = pkl.load(f)
-                            if cm.is_assembly:
-                                assembly_genome_ids.add(cm.genome_id)
-                            else:
-                                ref_genome_ids.add(cm.genome_id)
-                            sample_ids.add(cm.sample_id)
-
-                    except EOFError:
-                        pass
-
-        else:
-            print_info("CoPTR", "grouping reads by reference genome")
-            print_info("CoPTR", "saving to " + grouped_coverage_map_folder)
-
-            # first construct a list of genome_ids for ptr estimates
-            for f in sorted(os.listdir(args.coverage_map_folder)):
-                fname, ext = os.path.splitext(f)
-                if ext != ".pkl": continue
-                fpath = os.path.join(args.coverage_map_folder, f)
-
-                print_info("CoPTR", "\tprocessing {}".format(f))
-
-                with open(fpath, "rb") as file:
-                    coverage_maps = pkl.load(file)
-
-                    for ref_id in coverage_maps:
-                        sample_ids.add(coverage_maps[ref_id].sample_id)
-
-                        # don't load coverage maps of species in reference database without reads
-                        if coverage_maps[ref_id].count_reads() < args.min_reads:
-                            continue
-
-                        write_mode = "ab+" if ref_id in ref_genome_ids or ref_id in assembly_genome_ids else "wb"
-
-                        # append to genome file
-                        with open(os.path.join(grouped_coverage_map_folder, ref_id + ".cm.pkl"), write_mode) as tofile:
-                            pkl.dump(coverage_maps[ref_id], tofile)
-
-                        if coverage_maps[ref_id].is_assembly:
-                            assembly_genome_ids.add(ref_id)
-                        else:
-                            ref_genome_ids.add(ref_id)
-
-                    del coverage_maps
-            print_info("CoPTR", "done grouping by reference genome")
-            print_info("CoPTR", "the --restart flag can be used to start from here")
-
-        sample_ids = sorted(list(sample_ids))
-        results_ref = estimate_ptrs_coptr_ref(
-            ref_genome_ids, grouped_coverage_map_folder, args.min_reads, args.min_cov, 
-            threads=args.threads, plot_folder=args.plot
-        )
-        results_contig = estimate_ptrs_coptr_contig(
-            assembly_genome_ids, grouped_coverage_map_folder, args.min_reads, args.min_samples, 
-            threads=args.threads, plot_folder=args.plot
-        )
-
-        out_file = args.out_file
-        _, ext = os.path.splitext(out_file)
-        if ext != ".csv":
-            out_file += ".csv"
-
-        print_info("CoPTR", "writing {}".format(out_file))
-
-        with open(out_file, "w") as f:
-            # write the header
-            f.write("log2(PTR):genome_id/sample_id")
-            for sample_id in sample_ids:
-                f.write(",{}".format(sample_id))
-            f.write("\n")
-
-            for genome_id in sorted(results_ref):
-                # don't write rows without estimates
-                estimates = [result.estimate for result in results_ref[genome_id]]
-                if np.all(np.isnan(estimates)):
-                    continue
-
-                f.write(genome_id + ",")
-                row = ["" for s in sample_ids]
-                for result in results_ref[genome_id]:
-                    if not np.isnan(result.estimate):
-                        row[sample_ids.index(result.sample_id)] = str(result.estimate)
-                f.write(",".join(row) + "\n")
-
-            for genome_id in sorted(results_contig):
-                # don't write rows without estimates
-                estimates = [result.estimate for result in results_contig[genome_id]]
-                if np.all(np.isnan(estimates)):
-                    continue
-
-                f.write(genome_id + ",")
-                row = ["" for s in sample_ids]
-                for result in results_contig[genome_id]:
-                    if not np.isnan(result.estimate):
-                        row[sample_ids.index(result.sample_id)] = str(result.estimate)
-                f.write(",".join(row) + "\n")
-
-        print_info("CoPTR", "done!")
-        print_info("CoPTR", "you may remove the folder {}".format(grouped_coverage_map_folder))
-        # print_info("CoPTR", "cleaning up {}".format(grouped_coverage_map_folder))
-        # for ref_id in ref_genome_ids.union(assembly_genome_ids):
-        #     filename = os.path.join(grouped_coverage_map_folder, ref_id + ".cm.pkl")
-        #     os.remove(filename)
-        # os.rmdir(grouped_coverage_map_folder)
-
-
-    def count(self):
-        parser = argparse.ArgumentParser(usage=
-        """usage: coptr.py count [-h] coverage-map-folder out-file
-        """
-        )
-        parser.add_argument("coverage_map_folder", help="Folder with coverage maps computed from 'extract'.")
-        parser.add_argument("out_file", help="Filename to store PTR table.")
-
-        if len(sys.argv[2:]) < 1:
-            parser.print_help()
-            exit(1)
-
-        args = parser.parse_args(sys.argv[2:])
-
-        print_info("CoPTR", "estimating relative abundances")
-
-        counts, genome_ids = compute_read_counts(args.coverage_map_folder)
-
-        out_file = args.out_file
-        _, ext = os.path.splitext(out_file)
-        if ext != ".csv":
-            out_file += ".csv"
-
-        print_info("CoPTR", "writing {}".format(out_file))
-
-        with open(out_file, "w") as f:
-            # write the header
-            f.write("count:genome_id/sample_id")
-            for sample_id in counts:
-                f.write(",{}".format(sample_id))
-            f.write("\n")
-
-            for genome_id in sorted(genome_ids):
-                row = [genome_id]
-                for sample_id in counts:
-                    if genome_id in counts[sample_id]:
-                        row.append(str(counts[sample_id][genome_id]))
-                    else:
-                        row.append(str(0))
-                row = ",".join(row) + "\n"
-                f.write(row)
-
-        print_info("CoPTR", "done!")
-
-
-if __name__ == "__main__":
-    ProgramOptions()
\ No newline at end of file
diff --git a/coptr-env.yml b/coptr.yml
similarity index 100%
rename from coptr-env.yml
rename to coptr.yml
diff --git a/docs/conf.py b/docs/conf.py
index 2502cab..451768a 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -16,6 +16,8 @@
 
 import os
 import sys
+
+
 sys.path.insert(0, os.path.abspath('..'))
 
 
diff --git a/docs/installation.rst b/docs/installation.rst
index 34cd198..822cbb1 100644
--- a/docs/installation.rst
+++ b/docs/installation.rst
@@ -18,17 +18,18 @@ You can then create a new environment to run coPTR:
     .. code-block::
 
       # creates an environment called coptr
-      $ conda env create -f coptr-env.yml
+      $ conda env create -f coptr.yml
 
       # you only need to create this once
       # to activate the environment:
       $ conda activate coptr
+      $ pip install .
 
-To check if you can run coPTR:
+To check if you can run CoPTR:
 
     .. code-block::
 
-      $ python coptr.py
+      $ coptr
 
       usage: coptr.py <command> [options]
 
@@ -50,7 +51,7 @@ To check if you can run coPTR:
 
 Dependencies
 ------------
-Installation using conda will install all required dependencies. However,
+Installation using conda iswill install all required dependencies. However,
 CoPTR's dependencies are listed below if you wish to install them manually.
 
 * bowtie2 (>=2.4.1): CoPTR assumes bowtie2 is install and accessible from your PATH variable
diff --git a/docs/modules.rst b/docs/modules.rst
index 675f313..c035a51 100644
--- a/docs/modules.rst
+++ b/docs/modules.rst
@@ -1,26 +1,23 @@
 Modules
 =======
 
-.. automodule:: src.bam_processor
+.. automodule:: src.coptr.bam_processor
    :members:
 
-.. automodule:: src.coptr_contig
+.. automodule:: src.coptr.coptr_contig
    :members:
 
-.. automodule:: src.coptr_ref
+.. automodule:: src.coptr.coptr_ref
    :members:
 
-.. automodule:: src.poisson_pca
+.. automodule:: src.coptr.poisson_pca
    :members:
 
-.. automodule:: src.print
+.. automodule:: src.coptr.read_mapper
    :members:
 
-.. automodule:: src.read_mapper
+.. automodule:: src.coptr.read_assigner
    :members:
 
-.. automodule:: src.read_assigner
-   :members:
-
-.. automodule:: src.util
+.. automodule:: src.coptr.util
    :members:
\ No newline at end of file
diff --git a/docs/quick-start.rst b/docs/quick-start.rst
index 9e7df36..233d129 100644
--- a/docs/quick-start.rst
+++ b/docs/quick-start.rst
@@ -18,21 +18,24 @@ Detailed instructions: :doc:`Tutorial <tutorial>`.
       tar -xzvf example-data.tar.gz
 
       # Set up the environment:
-      conda env create -f coptr-env.yml
+      conda env create -f coptr.yml
       conda activate coptr
+
+      # Install coptr
+      pip install .
       
       # Index the reference database:
-      python coptr.py index example-data/ref-db example-data/ref-db/example-index
+      coptr index example-data/ref-db example-data/ref-db/example-index
       
       # Map reads against the reference:
-      python coptr.py map example-data/ref-db/example-index example-data/fastq example-data/bam
+      coptr map example-data/ref-db/example-index example-data/fastq example-data/bam
       
       # Extract read positions:
-      python coptr.py extract example-data/bam example-data/coverage-maps
+      coptr extract example-data/bam example-data/coverage-maps
       
       # Estimate ptrs:
       # Note the min-reads flag is optional. We recommend the default setting (5000 reads).
-      python coptr.py estimate example-data/coverage-maps out.csv --min-reads 2500
+      coptr estimate example-data/coverage-maps out.csv --min-reads 2500
 
       # View the output:
       cat out.csv
diff --git a/docs/tutorial.rst b/docs/tutorial.rst
index 97afc88..db7b53c 100644
--- a/docs/tutorial.rst
+++ b/docs/tutorial.rst
@@ -58,14 +58,14 @@ Indexing the reference database
 
 .. code-block:: text
 
-    $ python coptr.py index example-data/ref-db example-data/ref-db/example-index
-    [INFO] (Sep 10, 2020 18:06:41) ReadMapper: found 2 files totaling 0.006 Gb
-    [INFO] (Sep 10, 2020 18:06:41) ReadMapper: copying to fasta files to coptr-fna-1599775601.629647.fna with prepended genome ids (filenames)
-    [INFO] (Sep 10, 2020 18:06:41) ReadMapper: writing 2 reference genome ids to example-data/ref-db/example-index.genomes
-    [INFO] (Sep 10, 2020 18:06:41) ReadMapper: bowtie2-build coptr-fna-1599775601.629647.fna example-data/ref-db/example-index --noref --threads 1
+    $ coptr index example-data/ref-db example-data/ref-db/example-index
+    [INFO] [Jun 28, 2021 15:46:57] [coptr.read_mapper] Found 2 files totaling 0.00611 GB.
+    [INFO] [Jun 28, 2021 15:46:57] [coptr.read_mapper] Copying FASTA files to coptr-fna-2021-06-28T19:46:57+00:00.fna with prepended genome ids (filenames).
+    [INFO] [Jun 28, 2021 15:46:57] [coptr.read_mapper] Writing 2 reference genome ids to example-data/ref-db/example-index.genomes.
+    [INFO] [Jun 28, 2021 15:46:57] [coptr.read_mapper] bowtie2-build coptr-fna-2021-06-28T19:46:57+00:00.fna example-data/ref-db/example-index --threads 1
     ...bowtie2 output...
-    [INFO] (Sep 10, 2020 18:06:44) ReadMapper: indexed 2 fasta files for reference database.
-    [INFO] (Sep 10, 2020 18:06:44) ReadMapper: cleaning up coptr-fna-1599775601.629647.fna
+    [INFO] [Jun 28, 2021 15:47:00] [coptr.read_mapper] Indexed 2 FASTA files for the reference database.
+    [INFO] [Jun 28, 2021 15:47:00] [coptr.read_mapper] Cleaning up coptr-fna-2021-06-28T19:46:57+00:00.fna.
 
 
 
@@ -76,11 +76,11 @@ file, with the filename prepended to each sequencing id. This is how CoPTR
 keeps track of contigs from the same assembly.
 
 Indexing a database of fasta files can be accomplished by calling
-``coptr.py index``:
+``coptr index``:
 
 .. code-block:: text
 
-    usage: coptr.py index [-h] [--bt2-bmax BT2_BMAX] [--bt2-dcv BT2_DCV] [--bt2-threads BT2_THREADS] [--bt2-packed] ref-fasta index-out
+    usage: coptr index [-h] [--bt2-bmax BT2_BMAX] [--bt2-dcv BT2_DCV] [--bt2-threads BT2_THREADS] [--bt2-packed] ref-fasta index-out
 
     positional arguments:
       ref_fasta             File or folder containing fasta to index. If a folder,
@@ -100,20 +100,20 @@ Indexing a database of fasta files can be accomplished by calling
                             control memory usage.
 
 It takes two arguments. The first ``ref-fasta`` is either a folder containing
-fasta for the database. If it is a folder, coPTR will scan the folder for
+fasta for the database. If it is a folder, CoPTR will scan the folder for
 all files ending in ``.fasta``, ``.fna``, or ``.fa``. CoPTR will combine these
 into a single fasta file, prepending the filename to each sequence id in order
-to track contigs from the same reference genome. It then calls the ```bowtie2-build```
+to track contigs from the same reference genome. It then calls the ``bowtie2-build``
 to index this file.
 
-coPTR additionally outputs an ```index_name.genomes``` file with a list of ids for each
+coPTR additionally outputs an ``index_name.genomes`` file with a list of ids for each
 reference genome.
 
 Notes on memory usage
 ---------------------
 Sometimes the database is too large for a single index. You can split the
 reference database into multiple parts, and index each separately. After
-read mapping, the resulting BAM files can be merged with ```coptr.py merge```.
+read mapping, the resulting BAM files can be merged with ```coptr merge```.
 
 Mapping reads
 =============
@@ -122,20 +122,20 @@ Mapping reads
 
 .. code-block:: text
 
-    $ python coptr.py map example-data/ref-db/example-index example-data/fastq example-data/bam
-    [INFO] (Aug 31, 2020 12:12:10) ReadMapper: mapping example-data/fastq/ERR969281.fastq.gz to example-data/bam/ERR969281.sam
-    [INFO] (Aug 31, 2020 12:12:10) ReadMapper: bowtie2 -x example-data/ref-db/example-index example-data/fastq/ERR969281.fastq.gz --no-unal -p 1
+    $ coptr map example-data/ref-db/example-index example-data/fastq example-data/bam
+    [INFO] [Jun 28, 2021 15:48:26] [coptr.read_mapper] Mapping example-data/fastq to example-data/bam/ERR969281.sam
+    [INFO] [Jun 28, 2021 15:48:26] [coptr.read_mapper] bowtie2 -x example-data/ref-db/example-index example-data/fastq/ERR969281.fastq.gz --no-unal -p 1 -k 10
     10818 reads; of these:
       10818 (100.00%) were unpaired; of these:
         4071 (37.63%) aligned 0 times
         6709 (62.02%) aligned exactly 1 time
         38 (0.35%) aligned >1 times
     62.37% overall alignment rate
-    [INFO] (Aug 31, 2020 12:12:11) ReadMapper: converting example-data/bam/ERR969281.sam to example-data/bam/ERR969281.bam
-    [INFO] (Aug 31, 2020 12:12:11) ReadMapper: cleaning up example-data/bam/ERR969281.sam
+    [INFO] [Jun 28, 2021 15:48:27] [coptr.read_mapper] Converting example-data/bam/ERR969281.sam to example-data/bam/ERR969281.bam.
+    [INFO] [Jun 28, 2021 15:48:27] [coptr.read_mapper] Cleaning up example-data/bam/ERR969281.sam.
     ....
-    [INFO] (Aug 31, 2020 12:12:24) ReadMapper: converting example-data/bam/ERR969285.sam to example-data/bam/ERR969285.bam
-    [INFO] (Aug 31, 2020 12:12:24) ReadMapper: cleaning up example-data/bam/ERR969285.sam
+    [INFO] [Jun 28, 2021 15:48:40] [coptr.read_mapper] Converting example-data/bam/ERR969430.sam to example-data/bam/ERR969430.bam.
+    [INFO] [Jun 28, 2021 15:48:40] [coptr.read_mapper] Cleaning up example-data/bam/ERR969430.sam.
 
 
 Once you have indexed a reference database. You can then map reads against
@@ -144,7 +144,7 @@ convenient:
 
 .. code-block:: text
 
-    usage: coptr.py map [-h] [--threads INT] [--paired] index input out-folder
+    usage: coptr map [-h] [--threads INT] [--paired] index input out-folder
 
     positional arguments:
       index              Name of database index.
@@ -161,12 +161,13 @@ convenient:
       --bt2-k BT2_K      (Default 10). Number of alignments to report. Passed to -k flag of
                          bowtie2.
 
-The name of the database index corresponds to the name used from ``coptr.py index``.
+The name of the database index corresponds to the name used from ``coptr index``.
 The input can either be a single fastq file, or a folder of fastq files to map.
 It also takes an optional ``--threads`` argument that allows bowtie2 to use
 multiple threads. Reads are output as ``bam`` files to save space.
 
-For paired end sequencing, it is recommend to only map reads from a single mate-pair.
+For paired-end sequencing, we recommand mapping reads from end only (e.g. the files
+ending in either _1.* or _2.*).
 
 
 Merging mapped reads from multiple indexes
@@ -174,11 +175,11 @@ Merging mapped reads from multiple indexes
 For large reference databases, it is sometimes necessary to create several
 indexes for subsets of the data and map reads against each index. Results
 from each index need to be merged to select reads with the best MAPQ across
-indexes. You can use ```coptr.py merge``` to merge multiple bam files.
+indexes. You can use ```coptr merge``` to merge multiple bam files.
 
 .. code-block:: text
 
-    usage: coptr.py merge [-h] in-bam1 in-bam2 ... in-bamN out-bam
+    usage: coptr merge [-h] in-bam1 in-bam2 ... in-bamN out-bam
 
     positional arguments:
       in-bams     A space separateed list of BAM files to merge. Assumes same
@@ -195,13 +196,13 @@ Computing coverage maps
 
 .. code-block:: text
 
-    $ python coptr.py extract example-data/bam example-data/coverage-maps
-    [INFO] (Jan 18, 2021 10:31:43) BamProcessor: processing example-data/bam/ERR969281.bam
-    [INFO] (Jan 18, 2021 10:31:43) BamProcessor: determining reference genomes
-    [INFO] (Jan 18, 2021 10:31:43) BamProcessor: collecting multi-mapped reads
-    [INFO] (Jan 18, 2021 10:31:43) BamProcessor: grouping reads by reference genome
+    $ coptr extract example-data/bam example-data/coverage-maps
+    [INFO] [Jun 28, 2021 15:49:48] [coptr.bam_processor] Processing example-data/bam/ERR969281.bam.
+    [INFO] [Jun 28, 2021 15:49:48] [coptr.bam_processor] Determining reference genomes.
+    [INFO] [Jun 28, 2021 15:49:48] [coptr.bam_processor] Collecting multi-mapped reads.
+    [INFO] [Jun 28, 2021 15:49:48] [coptr.bam_processor] Grouping reads by reference genome.
     ...
-    [INFO] (Aug 31, 2020 12:13:56) BamProcessor: found 190 reference sequences corresponding to 2 genomes
+    [INFO] [Jun 28, 2021 15:50:00] [coptr.cli] Found 190 reference sequences corresponding to 2 genomes.
 
 Once reads have been mapped, the next step is to compute the coverage along
 each reference genome. In this step, starting positions of each read are
@@ -210,7 +211,7 @@ assembly are collected.
 
 .. code-block:: text
 
-    usage: usage: coptr.py extract [-h] [--ref-genome-regex REF_GENOME_REGEX] [--check-regex]
+    usage: usage: coptr extract [-h] [--ref-genome-regex REF_GENOME_REGEX] [--check-regex]
                     in-folder out-folder
 
     positional arguments:
@@ -227,7 +228,7 @@ assembly are collected.
 
 The important argument here is the ``--ref-genome-regex``. This is a regular
 expression that extracts the reference genome id from a sequence id. The default
-argument will work with the index created by ```coptr.py index```, and works by
+argument will work with the index created by ```coptr index```, and works by
 prepending the name of the fasta file, and special character ```|``` to each
 sequence id.
 
@@ -239,7 +240,7 @@ extracts a genome id from a sequence id in a fasta file. It is used to group
 contigs together. The default argument ``[^\|]+`` matches all characters up
 to the first ``|``, and uses them as a genome id.
 
-You can check your regular expression using the ``--check-regex-flag``, which
+You can check your regular expression using the ``--check-regex`` flag, which
 skips the extract step and instead outputs a list of all genome ids.
 
 
@@ -250,28 +251,28 @@ Estimating PTRs
 
 .. code-block:: text
 
-    # python coptr.py estimate example-data/coverage-maps out --min-reads 2500
-    [INFO] (Jan 18, 2021 10:36:05) CoPTR: grouping reads by reference genome
-    [INFO] (Jan 18, 2021 10:36:05) CoPTR: saving to example-data/coverage-maps/coverage-maps-genome
-    [INFO] (Jan 18, 2021 10:36:05) CoPTR:   processing ERR969281.cm.pkl
-    [INFO] (Jan 18, 2021 10:36:05) CoPTR:   processing ERR969282.cm.pkl
-    [INFO] (Jan 18, 2021 10:36:05) CoPTR:   processing ERR969283.cm.pkl
-    [INFO] (Jan 18, 2021 10:36:05) CoPTR:   processing ERR969285.cm.pkl
-    [INFO] (Jan 18, 2021 10:36:05) CoPTR:   processing ERR969286.cm.pkl
-    [INFO] (Jan 18, 2021 10:36:05) CoPTR:   processing ERR969428.cm.pkl
-    [INFO] (Jan 18, 2021 10:36:05) CoPTR:   processing ERR969429.cm.pkl
-    [INFO] (Jan 18, 2021 10:36:05) CoPTR:   processing ERR969430.cm.pkl
-    [INFO] (Jan 18, 2021 10:36:05) CoPTR: done grouping by reference genome
-    [INFO] (Jan 18, 2021 10:36:05) CoPTR: the --restart flag can be used to start from here
-    [INFO] (Jan 18, 2021 10:36:05) CoPTRRef: checking reference genomes
-    [INFO] (Jan 18, 2021 10:36:15) CoPTRRef: running l-gasseri-ref
-    [INFO] (Jan 18, 2021 10:36:16) CoPTRRef: finished l-gasseri-ref
-    [INFO] (Jan 18, 2021 10:36:16) CoPTRContig: checking reference genomes
-    [INFO] (Jan 18, 2021 10:36:16) CoPTRContig: running e-coli-mag
-    [INFO] (Jan 18, 2021 10:36:16) CoPTRContig: finished e-coli-mag
-    [INFO] (Jan 18, 2021 10:36:16) CoPTR: writing out.csv
-    [INFO] (Jan 18, 2021 10:36:16) CoPTR: done!
-    [INFO] (Jan 18, 2021 10:36:16) CoPTR: you may remove the folder example-data/coverage-maps/coverage-maps-genome
+    $ coptr estimate example-data/coverage-maps out --min-reads 2500
+    [INFO] [Jun 28, 2021 15:50:15] [coptr.cli] Grouping reads by reference genome.
+    [INFO] [Jun 28, 2021 15:50:15] [coptr.cli] Saving to example-data/coverage-maps/coverage-maps-genome:
+    [INFO] [Jun 28, 2021 15:50:15] [coptr.cli]  ERR969281.cm.pkl
+    [INFO] [Jun 28, 2021 15:50:15] [coptr.cli]  ERR969282.cm.pkl
+    [INFO] [Jun 28, 2021 15:50:15] [coptr.cli]  ERR969283.cm.pkl
+    [INFO] [Jun 28, 2021 15:50:15] [coptr.cli]  ERR969285.cm.pkl
+    [INFO] [Jun 28, 2021 15:50:15] [coptr.cli]  ERR969286.cm.pkl
+    [INFO] [Jun 28, 2021 15:50:15] [coptr.cli]  ERR969428.cm.pkl
+    [INFO] [Jun 28, 2021 15:50:15] [coptr.cli]  ERR969429.cm.pkl
+    [INFO] [Jun 28, 2021 15:50:15] [coptr.cli]  ERR969430.cm.pkl
+    [INFO] [Jun 28, 2021 15:50:15] [coptr.cli] Grouping by reference genome: Complete.
+    [INFO] [Jun 28, 2021 15:50:15] [coptr.cli] The --restart flag can be used to start from here.
+    [INFO] [Jun 28, 2021 15:50:15] [coptr.coptr_ref] Checking reference genomes.
+    [INFO] [Jun 28, 2021 15:50:24] [coptr.coptr_ref] Running l-gasseri-ref.
+    [INFO] [Jun 28, 2021 15:50:25] [coptr.coptr_ref] Finished l-gasseri-ref.
+    [INFO] [Jun 28, 2021 15:50:25] [coptr.coptr_contig] Checking reference genomes.
+    [INFO] [Jun 28, 2021 15:50:25] [coptr.coptr_contig] Running e-coli-mag.
+    [INFO] [Jun 28, 2021 15:50:25] [coptr.coptr_contig] Finished e-coli-mag.
+    [INFO] [Jun 28, 2021 15:50:25] [coptr.cli] Writing out.csv.
+    [INFO] [Jun 28, 2021 15:50:25] [coptr.cli] Done.
+    [INFO] [Jun 28, 2021 15:50:25] [coptr.cli] You may now remove the folder example-data/coverage-maps/coverage-maps-genome.
 
 The final stage is to estimate PTR ratios from coverage maps. This is accomplished
 with the ``estimate`` command. **It is strongly recommended that you perform this step
@@ -279,23 +280,22 @@ on all samples at once.**
 
 .. code-block:: text
 
-    usage: coptr.py estimate [-h] [--min-reads MIN_READS] [--min-cov MIN_COV] [--threads THREADS] coverage-map-folder out-file
-
-    positional arguments:
-      coverage_map_folder   Folder with coverage maps computed from 'extract'.
-      out_file              Filename to store PTR table.
-
-    optional arguments:
-      -h, --help            show this help message and exit
-      --min-reads MIN_READS
-                            Minimum number of reads required to compute a PTR
-                            (default 5000).
-      --min-cov MIN_COV     Fraction of nonzero 10Kb bins required to compute a
-                            PTR (default 0.75).
-      --min-samples MIN_SAMPLES
-                            CoPTRContig only. Minimum number of samples required
-                            to reorder bins (default 5).
-      --threads THREADS     Number of threads to use (default 1).
+  usage: usage: coptr estimate [-h] [--min-reads MIN_READS] [--min-cov MIN_COV] [--min-samples MIN_SAMPLES] [--threads THREADS] [--plot OUTFOLDER] [--restart] coverage-map-folder out-file
+          
+
+  positional arguments:
+    coverage_map_folder   Folder with coverage maps computed from 'extract'.
+    out_file              Filename to store PTR table.
+
+  optional arguments:
+    -h, --help            show this help message and exit
+    --min-reads MIN_READS
+                          Minimum number of reads required to compute a PTR (default 5000).
+    --min-cov MIN_COV     Fraction of nonzero bins required to compute a PTR (default 0.75).
+    --min-samples MIN_SAMPLES
+                          CoPTRContig only. Minimum number of samples required to reorder bins (default 5).
+    --plot OUTFOLDER      Plot model fit for each PTR.
+    --restart             Restarts the estimation step using the genomes in the coverage-maps-genome folder.
 
 This combines all coverage maps by species, then estimates PTRs for each species.
 We have tried to set sensible default parameters for PTR estimatation. We set
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..2fbbd37
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,17 @@
+[build-system]
+build-backend = 'setuptools.build_meta'
+requires = [
+  'setuptools>=40.6.0',
+  'wheel'
+]
+
+[tool.black]
+line-length = 88
+python-version = ['py37']
+
+[tool.isort]
+profile = 'black'
+skip = ['__init__.py']
+lines_after_imports = 2
+known_first_party = 'coptr'
+known_third_party = ['numpy', 'pysam', 'scipy']
diff --git a/setup.cfg b/setup.cfg
new file mode 100644
index 0000000..75d0e7e
--- /dev/null
+++ b/setup.cfg
@@ -0,0 +1,58 @@
+[metadata]
+name = coptr
+version = attr: coptr.__version__
+url = https://github.com/tyjo/coptr
+download_url = https://pypi.org/project/coptr/
+project_urls =
+    Source Code = https://github.com/tyjo/coptr
+    Documentation = https://coptr.readthedocs.io
+    Bug Tracker = https://github.com/tyjo/coptr/issues
+author = Tyler Joseph
+author_email = tj2357@columbia.edu
+# Please consult https://pypi.org/classifiers/ for a full list.
+classifiers =
+    Development Status :: 3 - Alpha
+    Environment :: Console
+    Intended Audience :: Science/Research
+    License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)
+    Natural Language :: English
+    Operating System :: OS Independent
+    Programming Language :: Python :: 3.6
+    Programming Language :: Python :: 3.7
+    Programming Language :: Python :: 3.8
+    Programming Language :: Python :: 3.9
+    Programming Language :: Python :: 3 :: Only
+    Topic :: Scientific/Engineering :: Bio-Informatics
+license = GPL-3.0-or-later
+description = Accurate and robust inference of microbial growth dynamics from metagenomic sequencing reads.
+long_description = file: README.md
+long_description_content_type = text/markdown
+keywords =
+    growth rate
+    peak-to-trough ratio
+
+[options]
+zip_safe = True
+install_requires =
+    numpy >= 1.19.1
+    pysam >= 0.16.0.1
+    scipy >= 1.5.2
+    matplotlib >= 3.3.2
+python_requires = >=3.6
+tests_require =
+    tox
+packages = find:
+package_dir =
+     = src
+
+[options.entry_points]
+console_scripts =
+    coptr = coptr.cli:cli
+
+[options.packages.find]
+where = src
+
+[options.extras_require]
+development =
+    black
+    isort
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..4f876ca
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,27 @@
+#!/usr/bin/env python
+
+
+# This file is part of CoPTR.
+#
+# CoPTR is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# CoPTR is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with CoPTR.  If not, see <https://www.gnu.org/licenses/>.
+
+
+"""Set up the CoPTR for Python package."""
+
+
+from setuptools import setup
+
+
+# All other arguments are defined in `setup.cfg`.
+setup()
diff --git a/src/__init__.py b/src/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/src/coptr/__init__.py b/src/coptr/__init__.py
new file mode 100644
index 0000000..1436d8f
--- /dev/null
+++ b/src/coptr/__init__.py
@@ -0,0 +1 @@
+__version__ = "1.1.6"
diff --git a/src/bam_processor.py b/src/coptr/bam_processor.py
similarity index 83%
rename from src/bam_processor.py
rename to src/coptr/bam_processor.py
index 16f9693..1dd377f 100644
--- a/src/bam_processor.py
+++ b/src/coptr/bam_processor.py
@@ -1,7 +1,7 @@
 """
 bam_processor.py
 ======================
-Extract coordinates along each reference sequence and group reference 
+Extract coordinates along each reference sequence and group reference
 sequences by genome.
 """
 
@@ -24,23 +24,23 @@
 
 import array
 import bisect
+import logging
 import math
-import numpy as np
 import os.path
-import pysam
 import re
-import sys
 
-from scipy.sparse import lil_matrix, csr_matrix
+import numpy as np
+import pysam
+from scipy.sparse import csr_matrix
+
+from .read_assigner import ReadAssigner
 
-from src.print import print_info
-from src.read_assigner import ReadAssigner
 
+logger = logging.getLogger(__name__)
 
 
 class ReadContainer:
-    """Container to store read positions and metadata.
-    """
+    """Container to store read positions and metadata."""
 
     def __init__(self):
         # read query_id -> [ids in read_data]
@@ -55,9 +55,8 @@ def __init__(self):
         self.index_ref_name = {}
         self.next_ref_idx = 0
 
-
     def check_add_mapping(self, query_id, ref_name, ref_position, score):
-        """Stores a mapping if it has an alignment score greater than or equal 
+        """Stores a mapping if it has an alignment score greater than or equal
         to the mappings seen so far.
 
         Parameters
@@ -76,12 +75,12 @@ def check_add_mapping(self, query_id, ref_name, ref_position, score):
             best_score = -np.inf
         else:
             row_ids = list(self.reads[query_id])
-            best_score = np.max(self.read_data[row_ids,2])
+            best_score = np.max(self.read_data[row_ids, 2])
 
         # we've run out of room in the read data matrix,
         if self.next_row_idx == self.read_data.shape[0]:
-            new_read_data = np.zeros((2*self.next_row_idx, 3))
-            new_read_data[:self.next_row_idx, :] = self.read_data
+            new_read_data = np.zeros((2 * self.next_row_idx, 3))
+            new_read_data[: self.next_row_idx, :] = self.read_data
             self.read_data = new_read_data
 
         if ref_name not in self.ref_names_index:
@@ -100,10 +99,9 @@ def check_add_mapping(self, query_id, ref_name, ref_position, score):
             self.reads[query_id].append(self.next_row_idx)
             self.next_row_idx += 1
 
-
     def get_mappings(self, query_id):
         """Return the highest scoring mappings for a read.
-        
+
         Parameters
         ----------
             query_id : str
@@ -117,10 +115,10 @@ def get_mappings(self, query_id):
                 A list of positions in each reference sequences.
         """
         row_ids = list(self.reads[query_id])
-        best_score = np.max(self.read_data[row_ids,2])
+        best_score = np.max(self.read_data[row_ids, 2])
         ref_names = []
         ref_positions = []
-        for row in self.read_data[row_ids,:]:
+        for row in self.read_data[row_ids, :]:
             ref_name_idx = int(row[0])
             ref_pos = int(row[1])
             score = int(row[2])
@@ -182,7 +180,6 @@ def __init__(self, regex="[^\|]+", is_bam=True):
             self.read_mode = "r"
             self.write_mode = "w"
 
-
     def get_ref_names(self, bam_file):
         """Get the names of reference sequences and reference genomes.
 
@@ -220,7 +217,6 @@ def get_ref_names(self, bam_file):
 
         return ref_seqs, ref_genomes
 
-
     def extract_reads(self, bam_file):
         """Get the read positions from sam_file for each reference genome.
 
@@ -240,7 +236,7 @@ def extract_reads(self, bam_file):
                 and the value is the length of that reference sequence
         """
         lengths = self.get_ref_seq_lengths(bam_file)
-        
+
         read_container = ReadContainer()
 
         alignment_file = pysam.AlignmentFile(bam_file, self.read_mode)
@@ -262,7 +258,7 @@ def extract_reads(self, bam_file):
             # This minimum scores means that reads with perfect
             # quality scores that fall below min_identity will
             # be discarded
-            min_score = -6*(1-self.min_identity)*aln.query_alignment_length
+            min_score = -6 * (1 - self.min_identity) * aln.query_alignment_length
             alignment_score = aln.get_tag("AS")
 
             if alignment_score < min_score:
@@ -275,7 +271,6 @@ def extract_reads(self, bam_file):
         alignment_file.close()
         return read_container, lengths
 
-
     def get_ref_seq_lengths(self, bam_file):
         """Extract the lengths of each reference sequence from a bam file.
 
@@ -301,8 +296,9 @@ def get_ref_seq_lengths(self, bam_file):
         alignment_file.close()
         return ref_seq_lengths
 
-
-    def compute_bin_coverage(self, genome_ids, read_container, lengths, ref_seq_genome_id):
+    def compute_bin_coverage(
+        self, genome_ids, read_container, lengths, ref_seq_genome_id
+    ):
         """Compute fraction of bins with reads for each reference genome.
 
         Parameters
@@ -328,7 +324,8 @@ def compute_bin_coverage(self, genome_ids, read_container, lengths, ref_seq_geno
         for read_id in sorted(read_container.reads):
             ref_names, ref_positions = read_container.get_mappings(read_id)
 
-            if len(ref_names) > 1: continue
+            if len(ref_names) > 1:
+                continue
             bin_idx = math.floor(ref_positions[0] / 5000)
             binned_counts[ref_names[0]][bin_idx] += 1
 
@@ -337,8 +334,12 @@ def compute_bin_coverage(self, genome_ids, read_container, lengths, ref_seq_geno
         for ref_seq in binned_counts:
             genome_id = ref_seq_genome_id[ref_seq]
 
-            covered_bins[genome_id] = covered_bins.get(genome_id, 0) + (binned_counts[ref_seq] != 0).sum()
-            total_bins[genome_id] = total_bins.get(genome_id, 0) + binned_counts[ref_seq].size
+            covered_bins[genome_id] = (
+                covered_bins.get(genome_id, 0) + (binned_counts[ref_seq] != 0).sum()
+            )
+            total_bins[genome_id] = (
+                total_bins.get(genome_id, 0) + binned_counts[ref_seq].size
+            )
 
         # now add contigs without reads
         for ref_seq in ref_seq_genome_id:
@@ -350,13 +351,13 @@ def compute_bin_coverage(self, genome_ids, read_container, lengths, ref_seq_geno
         coverage_frac = {}
         for genome_id in genome_ids:
             if genome_id in covered_bins:
-                coverage_frac[genome_id] = covered_bins[genome_id] / total_bins[genome_id]
+                coverage_frac[genome_id] = (
+                    covered_bins[genome_id] / total_bins[genome_id]
+                )
             else:
                 coverage_frac[genome_id] = 0
         return coverage_frac
 
-
-
     def assign_multimapped_reads(self, read_container, lengths, max_alignments):
         """Assign multiple mapped reads to a single genome.
 
@@ -372,7 +373,7 @@ def assign_multimapped_reads(self, read_container, lengths, max_alignments):
                 A dictionary whose key is a sequence id, and
                 value are the read positions along that sequence.
         """
-        print_info("BamProcessor", "determining reference genomes")
+        logger.info("Determining reference genomes.")
         genome_ids = set()
 
         # sequence_id -> genome_id
@@ -387,9 +388,11 @@ def assign_multimapped_reads(self, read_container, lengths, max_alignments):
             ref_seq_genome_id[ref_name] = genome_id
             genome_ids.add(genome_id)
 
-        genome_coverage = self.compute_bin_coverage(genome_ids, read_container, lengths, ref_seq_genome_id)
+        genome_coverage = self.compute_bin_coverage(
+            genome_ids, read_container, lengths, ref_seq_genome_id
+        )
 
-        print_info("BamProcessor", "collecting multi-mapped reads")
+        logger.info("Collecting multi-mapped reads.")
         genome_ids = sorted(list(genome_ids))
         # reads that fail filtering criteria
         discarded_reads = set()
@@ -405,7 +408,7 @@ def assign_multimapped_reads(self, read_container, lengths, max_alignments):
         for read_id in sorted(read_container.reads):
             ref_names, ref_positions = read_container.get_mappings(read_id)
             ref_genomes = [ref_seq_genome_id[r] for r in ref_names]
-            
+
             # the read maps twice to the same genome
             if len(ref_genomes) != np.unique(ref_genomes).size:
                 discarded_reads.add(read_id)
@@ -430,11 +433,10 @@ def assign_multimapped_reads(self, read_container, lengths, max_alignments):
                 nreads += 1
                 indptr.append(len(indices))
 
-
         if len(data) > 0:
             X = csr_matrix((data, indices, indptr), shape=(nreads, len(genome_ids)))
 
-            print_info("BamProcessor", "assigning multi-mapped reads")
+            logger.info("Assigning multi-mapped reads.")
             read_assigner = ReadAssigner(X, prior_counts)
             assignments = read_assigner.assign_reads()
 
@@ -464,7 +466,7 @@ def assign_multimapped_reads(self, read_container, lengths, max_alignments):
                         ref_seq_genome_id[ref_name] = genome_id
                     genome_id = ref_seq_genome_id[ref_name]
                     mapping_genome_ids.append(genome_id)
-                
+
                 assignment_id = assignments[current_row]
                 assigned_genome = genome_ids[assignment_id]
                 sequence_id = mapping_genome_ids.index(assigned_genome)
@@ -482,8 +484,6 @@ def assign_multimapped_reads(self, read_container, lengths, max_alignments):
 
         return read_positions, ref_seq_genome_id
 
-
-
     def process_bam(self, bam_file, max_alignments=10):
         """Extract read coordinates along each reference sequence.
 
@@ -501,7 +501,7 @@ def process_bam(self, bam_file, max_alignments=10):
                 A dictionary whose key is a reference genome id,
                 and value is a CoverageMap.
         """
-        print_info("BamProcessor", "processing {}".format(bam_file))
+        logger.info("Processing %s.", bam_file)
 
         # extract read positions
         # reads : dict[query_seq_id] -> Read
@@ -511,7 +511,9 @@ def process_bam(self, bam_file, max_alignments=10):
         # assign multiply mapped reads to reference sequences
         # 1. dict[ref_seq_id] -> int (read position)
         # 2. dict[ref_seq_id] -> genome_id
-        read_positions, ref_seq_genome_id = self.assign_multimapped_reads(reads, lengths, max_alignments)
+        read_positions, ref_seq_genome_id = self.assign_multimapped_reads(
+            reads, lengths, max_alignments
+        )
 
         # fill in read_positions with remaining sequence ids
         for seq_id in lengths:
@@ -524,7 +526,7 @@ def process_bam(self, bam_file, max_alignments=10):
         contig_read_positions = {}
         contig_lengths = {}
 
-        print_info("BamProcessor", "grouping reads by reference genome")
+        logger.info("Grouping reads by reference genome.")
         # group by reference genome
         # for contigs, this will group all contigs together by species
         for ref in sorted(lengths):
@@ -545,23 +547,29 @@ def process_bam(self, bam_file, max_alignments=10):
             contig_read_positions[genome_id][ref] = read_positions[ref]
             contig_lengths[genome_id][ref] = lengths[ref]
 
-
         # now, store coverage maps for each refence genome
         coverage_maps = {}
         for genome_id in contig_read_positions:
             is_assembly = len(contig_read_positions[genome_id]) > 1
 
             if is_assembly:
-                coverage_maps[genome_id] = \
-                    CoverageMapContig(bam_file, genome_id, contig_read_positions[genome_id], contig_lengths[genome_id])
+                coverage_maps[genome_id] = CoverageMapContig(
+                    bam_file,
+                    genome_id,
+                    contig_read_positions[genome_id],
+                    contig_lengths[genome_id],
+                )
             else:
                 ref_id = list(contig_read_positions[genome_id].keys())[0]
-                coverage_maps[genome_id] = \
-                    CoverageMapRef(bam_file, genome_id, contig_read_positions[genome_id][ref_id], contig_lengths[genome_id][ref_id])
+                coverage_maps[genome_id] = CoverageMapRef(
+                    bam_file,
+                    genome_id,
+                    contig_read_positions[genome_id][ref_id],
+                    contig_lengths[genome_id][ref_id],
+                )
 
         return coverage_maps
 
-
     def merge(self, bam_files, out_bam):
         """Merge many bam files from different indexes into one, taking the
         reads with the highest mapping quality from each bam.
@@ -573,8 +581,8 @@ def merge(self, bam_files, out_bam):
             out_bam : str
                 Location to store the merged bam file.
         """
-        print_info("BamProcessor", "merging bam_files {}".format(bam_files))
-        print_info("BamProcessor", "keeping reads with highest alignment score")
+        logger.info("Merging BAM files %s.", ", ".join(bam_files))
+        logger.info("Keeping reads with highest alignment score.")
         # bam header: SN => LN
         seq_len = {}
         # read_id => best_score
@@ -599,15 +607,14 @@ def merge(self, bam_files, out_bam):
             bf.close()
 
         # combined header
-        header = { 
-            "HD" : {"VN": "1.0", "SO": "unsorted"},
-            "SQ" : [{"SN": sq, "LN" : seq_len[sq]} for sq in sorted(seq_len)]
+        header = {
+            "HD": {"VN": "1.0", "SO": "unsorted"},
+            "SQ": [{"SN": sq, "LN": seq_len[sq]} for sq in sorted(seq_len)],
         }
 
         seq_names = sorted(seq_len.keys())
 
-
-        print_info("BamProcessor", "writing merged file {}".format(out_bam))
+        logger.info("Writing merged file %s.", out_bam)
         out = pysam.AlignmentFile(out_bam, self.write_mode, header=header)
         for bam_file in bam_files:
             inf = pysam.AlignmentFile(bam_file, self.read_mode)
@@ -616,13 +623,19 @@ def merge(self, bam_files, out_bam):
                     continue
 
                 alignment_score = in_aln.get_tag("AS")
-                if in_aln.query_name in score and alignment_score == score[in_aln.query_name] and not in_aln.is_read2:
+                if (
+                    in_aln.query_name in score
+                    and alignment_score == score[in_aln.query_name]
+                    and not in_aln.is_read2
+                ):
                     # reference name will be wrong if it is not expicility set
                     out_aln = pysam.AlignedSegment()
                     out_aln.query_name = in_aln.query_name
                     out_aln.query_sequence = in_aln.query_sequence
                     out_aln.flag = in_aln.flag
-                    out_aln.reference_id = bisect.bisect_left(seq_names, in_aln.reference_name)
+                    out_aln.reference_id = bisect.bisect_left(
+                        seq_names, in_aln.reference_name
+                    )
                     out_aln.reference_start = in_aln.reference_start
                     out_aln.cigar = in_aln.cigar
                     out_aln.template_length = in_aln.template_length
@@ -630,14 +643,14 @@ def merge(self, bam_files, out_bam):
                     out_aln.tags = in_aln.tags
 
                     # check that reference sequence is set correctly
-                    assert seq_names[out_aln.reference_id] == in_aln.reference_name, "missing reference sequence from {}".format(bam_file)
+                    assert (
+                        seq_names[out_aln.reference_id] == in_aln.reference_name
+                    ), "missing reference sequence from {}".format(bam_file)
 
                     out.write(out_aln)
             inf.close()
         out.close()
-        print_info("BamProcessor", "finished writing {}".format(out_bam))
-
-
+        logger.info("Finished writing %s.", out_bam)
 
 
 class CoverageMap:
@@ -664,7 +677,6 @@ def __init__(self, bam_file, genome_id, is_assembly):
         self.is_assembly = is_assembly
 
 
-
 class CoverageMapRef(CoverageMap):
     """Data structure to store read positions from complete reference genomes.
 
@@ -678,41 +690,57 @@ class CoverageMapRef(CoverageMap):
             A list of coordinates, one per read.
     """
 
-    __slots__ = ("bam_file", "genome_id", "sample_id", "is_assembly", "read_positions", "length")
-
+    __slots__ = (
+        "bam_file",
+        "genome_id",
+        "sample_id",
+        "is_assembly",
+        "read_positions",
+        "length",
+    )
 
     def __getstate__(self):
-        return self.bam_file, self.genome_id, self.sample_id, self.is_assembly, \
-               self.read_positions, self.length
-
+        return (
+            self.bam_file,
+            self.genome_id,
+            self.sample_id,
+            self.is_assembly,
+            self.read_positions,
+            self.length,
+        )
 
     def __setstate__(self, state):
         if type(state) == list or type(state) == tuple:
-            self.bam_file, self.genome_id, self.sample_id, self.is_assembly, \
-            self.read_positions, self.length = state
+            (
+                self.bam_file,
+                self.genome_id,
+                self.sample_id,
+                self.is_assembly,
+                self.read_positions,
+                self.length,
+            ) = state
         else:
             for key in state:
                 setattr(self, key, state[key])
 
-
     def __init__(self, bam_file, genome_id, read_positions, length):
         super().__init__(bam_file, genome_id, is_assembly=False)
         self.read_positions = np.array(read_positions)
         self.length = length
 
-
     def __str__(self):
-        return "CoverageMapRef(bam_file={}, genome_id={}, sample_id={}, reads={})".format(
-            self.bam_file, self.genome_id, self.sample_id, len(self.read_positions)
+        return (
+            "CoverageMapRef(bam_file={}, genome_id={}, sample_id={}, reads={})".format(
+                self.bam_file, self.genome_id, self.sample_id, len(self.read_positions)
+            )
         )
 
     def __repr__(self):
         return self.__str__()
 
-
     def get_length(self):
         """Get the lenth of the genome.
-        
+
         Returns
         -------
             length : int
@@ -720,7 +748,6 @@ def get_length(self):
         """
         return self.length
 
-
     def get_reads(self):
         """Get the read coordinates along the reference genome.
 
@@ -732,7 +759,6 @@ def get_reads(self):
         reads = np.copy(self.read_positions)
         return reads
 
-
     def count_reads(self):
         """Return number of mapped reads.
 
@@ -744,7 +770,6 @@ def count_reads(self):
         return np.array(self.read_positions).size
 
 
-
 class CoverageMapContig(CoverageMap):
     """Data structure to store read positions from assemblies.
 
@@ -763,30 +788,65 @@ class CoverageMapContig(CoverageMap):
             and value is the length of that contig.
     """
 
-    __slots__ = ("bam_file", "genome_id", "sample_id", "is_assembly",
-                 "contig_ids", "contig_read_positions", "contig_lengths", "bin_size",
-                 "binned_reads", "total_bins", "frac_nonzero", "reads", "passed_qc_flag")
-
+    __slots__ = (
+        "bam_file",
+        "genome_id",
+        "sample_id",
+        "is_assembly",
+        "contig_ids",
+        "contig_read_positions",
+        "contig_lengths",
+        "bin_size",
+        "binned_reads",
+        "total_bins",
+        "frac_nonzero",
+        "reads",
+        "passed_qc_flag",
+    )
 
     def __getstate__(self):
-        return self.bam_file, self.genome_id, self.sample_id, self.is_assembly, \
-               self.contig_ids, self.contig_read_positions, self.contig_lengths, self.bin_size, \
-               self.binned_reads, self.total_bins, self.frac_nonzero, self.reads, self.passed_qc_flag
-
+        return (
+            self.bam_file,
+            self.genome_id,
+            self.sample_id,
+            self.is_assembly,
+            self.contig_ids,
+            self.contig_read_positions,
+            self.contig_lengths,
+            self.bin_size,
+            self.binned_reads,
+            self.total_bins,
+            self.frac_nonzero,
+            self.reads,
+            self.passed_qc_flag,
+        )
 
     def __setstate__(self, state):
         if type(state) == list or type(state) == tuple:
-            self.bam_file, self.genome_id, self.sample_id, self.is_assembly, \
-            self.contig_ids, self.contig_read_positions, self.contig_lengths, self.bin_size, \
-            self.binned_reads, self.total_bins, self.frac_nonzero, self.reads, self.passed_qc_flag = state
+            (
+                self.bam_file,
+                self.genome_id,
+                self.sample_id,
+                self.is_assembly,
+                self.contig_ids,
+                self.contig_read_positions,
+                self.contig_lengths,
+                self.bin_size,
+                self.binned_reads,
+                self.total_bins,
+                self.frac_nonzero,
+                self.reads,
+                self.passed_qc_flag,
+            ) = state
         else:
             for key in state:
                 setattr(self, key, state[key])
 
         # load old pickle files
         for contig in self.contig_read_positions:
-            self.contig_read_positions[contig] = array.array("I", self.contig_read_positions[contig])
-
+            self.contig_read_positions[contig] = array.array(
+                "I", self.contig_read_positions[contig]
+            )
 
     def __init__(self, bam_file, genome_id, contig_read_positions, contig_lengths):
         super().__init__(bam_file, genome_id, is_assembly=True)
@@ -795,7 +855,9 @@ def __init__(self, bam_file, genome_id, contig_read_positions, contig_lengths):
 
         self.contig_read_positions = {}
         for contig in contig_read_positions:
-            self.contig_read_positions[contig] = array.array("I", contig_read_positions[contig])
+            self.contig_read_positions[contig] = array.array(
+                "I", contig_read_positions[contig]
+            )
 
         self.bin_size = None
         self.binned_reads = {}
@@ -808,17 +870,19 @@ def __init__(self, bam_file, genome_id, contig_read_positions, contig_lengths):
         # flag for qc check
         self.passed_qc_flag = None
 
-
     def __str__(self):
         return "CoverageMapContig(bam_file={}, genome_id={}, ncontigs={}, nreads={}, cov_frac={}, passed_qc={})".format(
-            self.bam_file, self.genome_id, len(self.contig_ids), self.reads, self.frac_nonzero, self.passed_qc_flag
+            self.bam_file,
+            self.genome_id,
+            len(self.contig_ids),
+            self.reads,
+            self.frac_nonzero,
+            self.passed_qc_flag,
         )
 
-
     def __repr__(self):
         return self.__str__()
 
-
     def reset(self):
         self.bin_size = None
         self.binned_reads = {}
@@ -827,7 +891,6 @@ def reset(self):
         self.reads = None
         self.passed_qc_flag = None
 
-
     def compute_bin_size(self):
         """Compute bin size for read counts.
 
@@ -845,7 +908,8 @@ def compute_bin_size(self):
         total_length = 0
         for contig_id in self.contig_lengths:
             length = self.contig_lengths[contig_id]
-            if length >= 11000: total_length += length
+            if length >= 11000:
+                total_length += length
 
         # bound bin size below by 1000
         bin_size = np.max((1000, total_length / target_bins))
@@ -856,10 +920,9 @@ def compute_bin_size(self):
 
         return bin_size
 
-
     def get_length(self, contig_id):
         """Get the length of a contig.
-        
+
         Parameters
         ----------
             contig_id : str
@@ -873,15 +936,14 @@ def get_length(self, contig_id):
         length = self.contig_lengths[contig_id]
         return length
 
-
     def get_reads(self, contig_id):
         """Get the coordinates of all reads for a contig.
-        
+
         Parameters
         ----------
             contig_id : str
                 Reference sequence id of the contig
-        
+
         Returns
         -------
             reads : numpy.array
@@ -890,7 +952,6 @@ def get_reads(self, contig_id):
         reads = np.copy(self.contig_read_positions[contig_id])
         return reads
 
-
     def bin_reads(self, contig_id):
         """Count reads in bins.
 
@@ -929,7 +990,6 @@ def bin_reads(self, contig_id):
 
         return self.binned_reads[contig_id]
 
-
     def count_reads(self):
         """Return number of mapped reads.
 
@@ -943,10 +1003,8 @@ def count_reads(self):
             nreads += np.array(self.contig_read_positions[contig_id]).size
         return nreads
 
-
     def passed_qc(self):
-        """Run basic quality control checks. Sets the passed_gc_flag attribute.
-        """
+        """Run basic quality control checks. Sets the passed_gc_flag attribute."""
         if self.passed_qc_flag is not None:
             return self.passed_qc_flag
         total_bins = 0
@@ -954,7 +1012,8 @@ def passed_qc(self):
         zero_bins = 0
         for cid in self.contig_ids:
             length = self.contig_lengths[cid]
-            if length < 11000: continue
+            if length < 11000:
+                continue
             bins = self.bin_reads(cid)
             total_bins += bins.size
             total_reads += bins.sum()
@@ -965,7 +1024,9 @@ def passed_qc(self):
         else:
             self.passed_qc_flag = True
 
-        self.frac_nonzero = (total_bins - zero_bins) / total_bins if total_bins > 0 else 0
-        self.reads  = total_reads
+        self.frac_nonzero = (
+            (total_bins - zero_bins) / total_bins if total_bins > 0 else 0
+        )
+        self.reads = total_reads
         self.total_bins = total_bins
-        return self.passed_qc_flag
\ No newline at end of file
+        return self.passed_qc_flag
diff --git a/src/coptr/cli.py b/src/coptr/cli.py
new file mode 100644
index 0000000..1c383fb
--- /dev/null
+++ b/src/coptr/cli.py
@@ -0,0 +1,600 @@
+"""
+This file is part of CoPTR.
+
+CoPTR is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+CoPTR is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with CoPTR.  If not, see <https://www.gnu.org/licenses/>.
+"""
+
+import argparse
+import logging
+import os
+import os.path
+import pickle as pkl
+import sys
+
+import numpy as np
+
+from . import __version__
+from .bam_processor import BamProcessor
+from .compute_read_counts import compute_read_counts
+from .compute_rel_abun import compute_rel_abun
+from .coptr_contig import estimate_ptrs_coptr_contig
+from .coptr_ref import estimate_ptrs_coptr_ref
+from .read_mapper import ReadMapper
+from .util import get_fastq_name
+
+
+logger = logging.getLogger(__name__)
+
+
+class ProgramOptions:
+    def __init__(self):
+        parser = argparse.ArgumentParser(
+            description="CoPTR (v{}): Compute PTRs from complete reference genomes and assemblies.".format(
+                __version__
+            ),
+            usage="""coptr <command> [options]
+
+command: index            create a bowtie2 index for a reference database
+         map              map reads against a reference database
+         merge            merge BAM files from reads mapped to multiple indexes
+         extract          compute coverage maps from bam files
+         estimate         estimate PTRs from coverage maps
+         count            compute read counts for each genome after filtering
+         rabun            estimate relative abundances for each genomes after filtering
+""",
+        )
+        self.default_bt2_k = 10
+
+        if len(sys.argv[1:]) < 1:
+            parser.print_help()
+            sys.exit(2)
+
+        parser.add_argument("command", type=str, help="Command to run.")
+        args = parser.parse_args(sys.argv[1:2])
+
+        if not hasattr(self, args.command):
+            logger.critical("Unrecognized command.")
+            parser.print_help()
+            sys.exit(2)
+        getattr(self, args.command)()
+
+    def index(self):
+        parser = argparse.ArgumentParser(
+            usage="coptr index [-h] [--bt2-bmax BT2_BMAX] [--bt2-dcv BT2_DCV] [--bt2-threads BT2_THREADS] [--bt2-packed] ref-fasta index-out"
+        )
+        parser.add_argument(
+            "ref_fasta",
+            help="""File or folder containing fasta to index. If a folder, the extension for each
+        fasta must be one of [.fasta, .fna, .fa]
+        """,
+        )
+        parser.add_argument("index_out", help="Filepath to store index.")
+        parser.add_argument(
+            "--bt2-bmax",
+            default=None,
+            help="Set the --bmax arguement for bowtie2-build. Used to control memory useage.",
+        )
+        parser.add_argument(
+            "--bt2-dcv",
+            default=None,
+            help="Set the --dcv argument for bowtie2-build. Used to control memory usage.",
+        )
+        parser.add_argument(
+            "--bt2-threads",
+            default="1",
+            help="Number of threads to pass to bowtie2-build.",
+        )
+        parser.add_argument(
+            "--bt2-packed",
+            action="store_true",
+            help="Set the --packed flag for bowtie2-build. Used to control memory usage.",
+        )
+
+        if len(sys.argv[2:]) < 1:
+            parser.print_help()
+            sys.exit(2)
+
+        args = parser.parse_args(sys.argv[2:])
+        read_mapper = ReadMapper()
+        read_mapper.index(
+            args.ref_fasta,
+            args.index_out,
+            args.bt2_bmax,
+            args.bt2_dcv,
+            args.bt2_threads,
+            args.bt2_packed,
+        )
+
+    def map(self):
+        parser = argparse.ArgumentParser(
+            usage="coptr map [-h] [--threads INT] [--bt2-k INT] [--paired] index input out-folder"
+        )
+        parser.add_argument("index", help="Name of database index.")
+        parser.add_argument(
+            "input",
+            help="""File or folder containing fastq reads to map. If a folder, the extension for
+        each fastq must be one of [.fastq, .fq, .fastq.gz, fq.gz]
+        """,
+        )
+        parser.add_argument(
+            "out_folder", help="Folder to save mapped reads. BAM files are output here."
+        )
+        parser.add_argument(
+            "--paired",
+            action="store_true",
+            help="Set for paired end reads. Assumes fastq files end in _1.* and _2.*",
+        )
+        parser.add_argument(
+            "--threads",
+            type=int,
+            default=1,
+            help="Number of threads for bowtie2 mapping.",
+        )
+        parser.add_argument(
+            "--bt2-k",
+            type=int,
+            default=self.default_bt2_k,
+            help="(Default 10). Number of alignments to report. Passed to -k flag of bowtie2.",
+        )
+
+        if len(sys.argv[2:]) < 1:
+            parser.print_help()
+            sys.exit(2)
+
+        args = parser.parse_args(sys.argv[2:])
+        read_mapper = ReadMapper()
+        read_mapper.map(
+            args.index,
+            args.input,
+            args.out_folder,
+            args.paired,
+            args.threads,
+            args.bt2_k,
+        )
+
+    def merge(self):
+        parser = argparse.ArgumentParser(
+            usage="coptr merge [-h] in-bam1 in-bam2 ... in-bamN out-bam"
+        )
+        parser.add_argument(
+            "in-bams",
+            nargs="+",
+            help="A space separated list of BAM files to merge. Assumes same reads were mapped against different indexes. "
+            + "Only keeps read 1 of paired end sequencing, since this is used downstream.",
+        )
+        parser.add_argument("out-bam", help="Path to merged BAM.")
+
+        if len(sys.argv[2:]) < 2:
+            parser.print_help()
+            sys.exit(2)
+
+        args = vars(parser.parse_args(sys.argv[2:]))
+        in_bams = args["in-bams"]
+        out_bam = args["out-bam"]
+        bam_processor = BamProcessor()
+        bam_processor.merge(in_bams, out_bam)
+
+    def extract(self):
+        parser = argparse.ArgumentParser(
+            usage="coptr extract [-h] [--ref-genome-regex REF_GENOME_REGEX] [--check-regex] in-folder out-folder"
+        )
+        parser.add_argument("in_folder", help="Folder with BAM files.")
+        parser.add_argument("out_folder", help="Folder to store coverage maps.")
+        parser.add_argument(
+            "--ref-genome-regex",
+            default="[^\|]+",
+            help="Regular expression extracting a reference genome id from the sequence id in a bam file.",
+        )
+        parser.add_argument(
+            "--check-regex",
+            action="store_true",
+            default=False,
+            help="Check the regular expression by counting reference genomes without processing.",
+        )
+        parser.add_argument(
+            "--bt2-k",
+            type=int,
+            default=self.default_bt2_k,
+            help="Maximum number of alignments.",
+        )
+
+        if len(sys.argv[2:]) < 1:
+            parser.print_help()
+            sys.exit(2)
+
+        args = parser.parse_args(sys.argv[2:])
+
+        bam_processor = BamProcessor(args.ref_genome_regex)
+        ref_sequences = set()
+        ref_genomes = set()
+        for f in sorted(os.listdir(args.in_folder)):
+            fname, ext = os.path.splitext(f)
+            if ext == ".bam":
+                fpath = os.path.join(args.in_folder, f)
+                seq, gen = bam_processor.get_ref_names(fpath)
+                ref_sequences.update(seq)
+                ref_genomes.update(gen)
+
+                if os.path.isfile(
+                    os.path.join(args.out_folder, get_fastq_name(f) + ".cm.pkl")
+                ):
+                    logger.info("Output for %s already found, skipping.", fname)
+                    continue
+
+                # don't process the rest of the bam file if we just want to
+                # sanity check the regular expression
+                if args.check_regex:
+                    continue
+
+                coverage_maps = bam_processor.process_bam(fpath, args.bt2_k)
+                with open(
+                    os.path.join(args.out_folder, get_fastq_name(f) + ".cm.pkl"), "wb"
+                ) as f:
+                    pkl.dump(coverage_maps, f)
+
+        logger.info(
+            "Found %d reference sequences corresponding to %d genomes.",
+            len(ref_sequences),
+            len(ref_genomes),
+        )
+        if args.check_regex:
+            logger.info("Reference genome ids:")
+            for ref in sorted(ref_genomes):
+                logger.info("\t%s", ref)
+
+    def estimate(self):
+        parser = argparse.ArgumentParser(
+            usage="""usage: coptr estimate [-h] [--min-reads MIN_READS] [--min-cov MIN_COV] [--min-samples MIN_SAMPLES] [--threads THREADS] [--plot OUTFOLDER] [--restart] coverage-map-folder out-file
+        """
+        )
+        parser.add_argument(
+            "coverage_map_folder",
+            help="Folder with coverage maps computed from 'extract'.",
+        )
+        parser.add_argument("out_file", help="Filename to store PTR table.")
+        parser.add_argument(
+            "--min-reads",
+            type=float,
+            help="Minimum number of reads required to compute a PTR (default 5000).",
+            default=5000,
+        )
+        parser.add_argument(
+            "--min-cov",
+            type=float,
+            help="Fraction of nonzero bins required to compute a PTR (default 0.75).",
+            default=0.75,
+        )
+        parser.add_argument(
+            "--min-samples",
+            type=float,
+            help="CoPTRContig only. Minimum number of samples required to reorder bins (default 5).",
+            default=5,
+        )
+        parser.add_argument(
+            "--plot",
+            metavar="OUTFOLDER",
+            default=None,
+            help="Plot model fit for each PTR."
+        )
+        parser.add_argument(
+            "--restart",
+            default=False,
+            action="store_true",
+            help="Restarts the estimation step using the genomes in the coverage-maps-genome folder.",
+        )
+        parser.add_argument(
+            "--oriC",
+            default=False,
+            action="store_true",
+            help="Compute OriC estimates."
+        )
+
+        if len(sys.argv[2:]) < 1:
+            parser.print_help()
+            sys.exit(2)
+
+        args = parser.parse_args(sys.argv[2:])
+        sample_ids = set()
+        ref_genome_ids = set()
+        assembly_genome_ids = set()
+
+        grouped_coverage_map_folder = os.path.join(
+            args.coverage_map_folder, "coverage-maps-genome"
+        )
+        if not args.restart and not os.path.isdir(grouped_coverage_map_folder):
+            os.mkdir(grouped_coverage_map_folder)
+
+        if args.restart:
+            logger.info("Restarting from files in %s.", grouped_coverage_map_folder)
+            for cm_file in sorted(os.listdir(grouped_coverage_map_folder)):
+                _, ext = os.path.splitext(cm_file)
+                if ext != ".pkl":
+                    continue
+
+                logger.info("Checking %s.", cm_file)
+                with open(
+                    os.path.join(grouped_coverage_map_folder, cm_file), "rb"
+                ) as f:
+                    try:
+                        while True:
+                            cm = pkl.load(f)
+                            if cm.is_assembly:
+                                assembly_genome_ids.add(cm.genome_id)
+                            else:
+                                ref_genome_ids.add(cm.genome_id)
+                            sample_ids.add(cm.sample_id)
+
+                    except EOFError:
+                        pass
+
+        else:
+            logger.info("Grouping reads by reference genome.")
+            logger.info("Saving to %s:", grouped_coverage_map_folder)
+
+            # first construct a list of genome_ids for ptr estimates
+            for f in sorted(os.listdir(args.coverage_map_folder)):
+                fname, ext = os.path.splitext(f)
+                if ext != ".pkl":
+                    continue
+                fpath = os.path.join(args.coverage_map_folder, f)
+
+                logger.info("\t%s", f)
+                with open(fpath, "rb") as file:
+                    coverage_maps = pkl.load(file)
+
+                    for ref_id in coverage_maps:
+                        sample_ids.add(coverage_maps[ref_id].sample_id)
+
+                        # don't load coverage maps of species in reference database without reads
+                        if coverage_maps[ref_id].count_reads() < args.min_reads:
+                            continue
+
+                        write_mode = (
+                            "ab+"
+                            if ref_id in ref_genome_ids or ref_id in assembly_genome_ids
+                            else "wb"
+                        )
+
+                        # append to genome file
+                        with open(
+                            os.path.join(
+                                grouped_coverage_map_folder, ref_id + ".cm.pkl"
+                            ),
+                            write_mode,
+                        ) as tofile:
+                            pkl.dump(coverage_maps[ref_id], tofile)
+
+                        if coverage_maps[ref_id].is_assembly:
+                            assembly_genome_ids.add(ref_id)
+                        else:
+                            ref_genome_ids.add(ref_id)
+
+                    del coverage_maps
+            logger.info("Grouping by reference genome: Complete.")
+            logger.info("The --restart flag can be used to start from here.")
+
+        sample_ids = sorted(list(sample_ids))
+        results_ref = estimate_ptrs_coptr_ref(
+            ref_genome_ids,
+            grouped_coverage_map_folder,
+            args.min_reads,
+            args.min_cov,
+            plot_folder=args.plot,
+        )
+        results_contig, ref_id_to_bin_coords = estimate_ptrs_coptr_contig(
+            assembly_genome_ids,
+            grouped_coverage_map_folder,
+            args.min_reads,
+            args.min_samples,
+            plot_folder=args.plot,
+        )
+
+        out_file = args.out_file
+        _, ext = os.path.splitext(out_file)
+        if ext != ".csv":
+            out_file += ".csv"
+
+        logger.info("Writing %s.", out_file)
+
+        with open(out_file, "w") as f:
+            # write the header
+            f.write("log2(PTR):genome_id/sample_id")
+            for sample_id in sample_ids:
+                f.write(",{}".format(sample_id))
+            f.write("\n")
+
+            for genome_id in sorted(results_ref):
+                # don't write rows without estimates
+                estimates = [result.estimate for result in results_ref[genome_id]]
+                if np.all(np.isnan(estimates)):
+                    continue
+
+                f.write(genome_id + ",")
+                row = ["" for s in sample_ids]
+                for result in results_ref[genome_id]:
+                    if not np.isnan(result.estimate):
+                        row[sample_ids.index(result.sample_id)] = str(result.estimate)
+                f.write(",".join(row) + "\n")
+
+            for genome_id in sorted(results_contig):
+                # don't write rows without estimates
+                estimates = [result.estimate for result in results_contig[genome_id]]
+                if np.all(np.isnan(estimates)):
+                    continue
+
+                f.write(genome_id + ",")
+                row = ["" for s in sample_ids]
+                for result in results_contig[genome_id]:
+                    if not np.isnan(result.estimate):
+                        row[sample_ids.index(result.sample_id)] = str(result.estimate)
+                f.write(",".join(row) + "\n")
+
+        if args.oriC:
+            oric_file = os.path.splitext(out_file)[0] + "-oriC.csv"
+            with open(oric_file, "w") as f:
+                f.write("genome_id,oriC\n")
+
+                for genome_id in sorted(results_ref):
+                    orics = [
+                        result.ori_estimate_coord
+                        for result in results_ref[genome_id]
+                        if np.isfinite(result.ori_estimate_coord)
+                    ]
+                    oric_est = np.mean(orics)
+                    f.write("{},{}\n".format(genome_id, oric_est))
+
+                for genome_id in sorted(ref_id_to_bin_coords):
+                    bin_coords = ref_id_to_bin_coords[genome_id]
+                    f.write("{},{}\n".format(genome_id, "\t".join(bin_coords)))
+
+        logger.info("Done.")
+        logger.info("You may now remove the folder %s.", grouped_coverage_map_folder)
+
+    def count(self):
+        parser = argparse.ArgumentParser(
+            usage="""usage: coptr count [-h] [--min-cov MIN_COV] [--min-samples MIN_SAMPLES] coverage-map-folder out-file
+        """
+        )
+        parser.add_argument(
+            "coverage_map_folder",
+            help="Folder with coverage maps computed from 'extract'.",
+        )
+        parser.add_argument("out_file", help="Filename to store PTR table.")
+        parser.add_argument(
+            "--min-cov",
+            type=float,
+            help="Fraction of nonzero bins required to compute a PTR (default 0.75).",
+            default=0.75,
+        )
+        parser.add_argument(
+            "--min-samples",
+            type=float,
+            help="CoPTRContig only. Minimum number of samples required to reorder bins (default 5).",
+            default=5,
+        )
+
+        if len(sys.argv[2:]) < 1:
+            parser.print_help()
+            exit(1)
+
+        args = parser.parse_args(sys.argv[2:])
+
+        logger.info("Computing read counts.")
+
+        counts, genome_ids = compute_read_counts(
+            args.coverage_map_folder, args.min_cov, args.min_samples
+        )
+
+        out_file = args.out_file
+        _, ext = os.path.splitext(out_file)
+        if ext != ".csv":
+            out_file += ".csv"
+
+        logger.info("Writing %s.", out_file)
+
+        with open(out_file, "w") as f:
+            # write the header
+            f.write("count:genome_id/sample_id")
+            for sample_id in counts:
+                f.write(",{}".format(sample_id))
+            f.write("\n")
+
+            for genome_id in sorted(genome_ids):
+                row = [genome_id]
+                for sample_id in counts:
+                    if genome_id in counts[sample_id]:
+                        row.append(str(counts[sample_id][genome_id]))
+                    else:
+                        row.append(str(0))
+                row = ",".join(row) + "\n"
+                f.write(row)
+
+        logger.info("Done.")
+
+    def rabun(self):
+        parser = argparse.ArgumentParser(
+            usage="""usage: coptr rabun [-h] [--min-reads MIN_READS] [--min-cov MIN_COV] [--min-samples MIN_SAMPLES] coverage-map-folder out-file
+        """
+        )
+        parser.add_argument(
+            "coverage_map_folder",
+            help="Folder with coverage maps computed from 'extract'.",
+        )
+        parser.add_argument("out_file", help="Filename to store PTR table.")
+        parser.add_argument(
+            "--min-reads",
+            type=float,
+            help="Minimum number of reads required to compute a PTR (default 5000).",
+            default=5000,
+        )
+        parser.add_argument(
+            "--min-cov",
+            type=float,
+            help="Fraction of nonzero bins required to compute a PTR (default 0.75).",
+            default=0.75,
+        )
+        parser.add_argument(
+            "--min-samples",
+            type=float,
+            help="CoPTRContig only. Minimum number of samples required to reorder bins (default 5).",
+            default=5,
+        )
+
+        if len(sys.argv[2:]) < 1:
+            parser.print_help()
+            exit(1)
+
+        args = parser.parse_args(sys.argv[2:])
+
+        logger.info("Computing relative abundances.")
+
+        rel_abun, genome_ids = compute_rel_abun(
+            args.coverage_map_folder, args.min_reads, args.min_cov, args.min_samples
+        )
+
+        out_file = args.out_file
+        _, ext = os.path.splitext(out_file)
+        if ext != ".csv":
+            out_file += ".csv"
+
+        logger.info("Writing %s.", out_file)
+
+        with open(out_file, "w") as f:
+            # write the header
+            f.write("rel_abun:genome_id/sample_id")
+            for sample_id in rel_abun:
+                f.write(",{}".format(sample_id))
+            f.write("\n")
+
+            for genome_id in sorted(genome_ids):
+                row = [genome_id]
+                for sample_id in rel_abun:
+                    if genome_id in rel_abun[sample_id]:
+                        row.append(str(rel_abun[sample_id][genome_id]))
+                    else:
+                        row.append(str(0))
+                row = ",".join(row) + "\n"
+                f.write(row)
+
+        logger.info("Done.")
+
+
+def cli():
+    """Serve as an entry point for command line calls."""
+    logging.basicConfig(
+        level="INFO",
+        format="[%(levelname)s] [%(asctime)s] [%(name)s] %(message)s",
+        datefmt="%b %d, %Y %H:%M:%S",
+    )
+    ProgramOptions()
diff --git a/src/compute_read_counts.py b/src/coptr/compute_read_counts.py
similarity index 63%
rename from src/compute_read_counts.py
rename to src/coptr/compute_read_counts.py
index c8d568c..950f6be 100644
--- a/src/compute_read_counts.py
+++ b/src/coptr/compute_read_counts.py
@@ -21,20 +21,24 @@
 along with CoPTR.  If not, see <https://www.gnu.org/licenses/>.
 """
 
-import numpy as np
+import logging
 import os
 import pickle as pkl
 
-from src.coptr_contig import CoPTRContig
-from src.coptr_ref import ReadFilterRef
-from src.print import print_info, print_warning
+import numpy as np
+
+from .coptr_contig import CoPTRContig
+from .coptr_ref import ReadFilterRef
 
 
-def compute_read_counts_from_coverage_maps(coverage_maps):
+logger = logging.getLogger(__name__)
+
+
+def compute_read_counts_from_coverage_maps(coverage_maps, min_cov, min_samples):
     # instantiate classes for filtering methods
 
-    coptr_contig = CoPTRContig(5000, 5)
-    rf_ref = ReadFilterRef(5000, 0.75)
+    coptr_contig = CoPTRContig(1, min_samples)
+    rf_ref = ReadFilterRef(1, min_cov)
     total_passing_reads = 0
     read_counts = {}
     sample_id = None
@@ -46,49 +50,55 @@ def compute_read_counts_from_coverage_maps(coverage_maps):
         if cm.is_assembly and cm.passed_qc():
             binned_reads = coptr_contig.construct_coverage_matrix([cm])
 
-            lower_bound, upper_bound = coptr_contig.compute_genomewide_bounds(binned_reads)
-            count = binned_reads[np.logical_and(binned_reads >= lower_bound, binned_reads <= upper_bound)].sum()
+            lower_bound, upper_bound = coptr_contig.compute_genomewide_bounds(
+                binned_reads
+            )
+            count = binned_reads[
+                np.logical_and(binned_reads >= lower_bound, binned_reads <= upper_bound)
+            ].sum()
             read_counts[cm.genome_id] = count
             total_passing_reads += count
             genome_ids.add(cm.genome_id)
 
         elif not cm.is_assembly:
-            filtered_reads, filtered_length, qc_result = rf_ref.filter_reads(cm.read_positions, cm.length)
+            filtered_reads, filtered_length, qc_result = rf_ref.filter_reads(
+                cm.read_positions, cm.length
+            )
 
             if qc_result.passed_qc:
                 count = filtered_reads.size
                 read_counts[cm.genome_id] = count
                 total_passing_reads += count
+                genome_ids.add(cm.genome_id)
 
-            genome_ids.add(cm.genome_id)
+    return sample_id, read_counts, genome_ids
 
-    rel_abun = {}
-    for genome_id in read_counts:
-        rel_abun[genome_id] = read_counts[genome_id] / total_passing_reads
 
-    return sample_id, rel_abun, genome_ids
+def compute_read_counts(coverage_map_folder, min_cov, min_samples):
 
-
-
-
-def compute_read_counts(coverage_map_folder):
-
-    rel_abundances = {}
+    read_counts = {}
     genome_ids = set()
     for f in sorted(os.listdir(coverage_map_folder)):
         fname, ext = os.path.splitext(f)
-        if ext != ".pkl": continue
+        if ext != ".pkl":
+            continue
         fpath = os.path.join(coverage_map_folder, f)
 
-        print_info("Count", "\tprocessing {}".format(f))
+        logger.info("\t%s", f)
 
         with open(fpath, "rb") as file:
             coverage_maps = pkl.load(file)
 
-            sample_id, sample_rel_abun, sample_genome_ids = compute_read_counts_from_coverage_maps(coverage_maps)
+            (
+                sample_id,
+                sample_read_counts,
+                sample_genome_ids,
+            ) = compute_read_counts_from_coverage_maps(
+                coverage_maps, min_cov, min_samples
+            )
 
             if sample_id is not None:
-                rel_abundances[sample_id] = sample_rel_abun
+                read_counts[sample_id] = sample_read_counts
                 genome_ids.update(sample_genome_ids)
 
-    return rel_abundances, genome_ids
\ No newline at end of file
+    return read_counts, genome_ids
diff --git a/src/coptr/compute_rel_abun.py b/src/coptr/compute_rel_abun.py
new file mode 100644
index 0000000..ac56fcb
--- /dev/null
+++ b/src/coptr/compute_rel_abun.py
@@ -0,0 +1,119 @@
+"""
+compute_rel_abun.py
+======================
+Estimate relative abundances.
+"""
+
+"""
+This file is part of CoPTR.
+
+CoPTR is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+CoPTR is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with CoPTR.  If not, see <https://www.gnu.org/licenses/>.
+"""
+
+import logging
+import os
+import pickle as pkl
+
+import numpy as np
+
+from .coptr_contig import CoPTRContig
+from .coptr_ref import ReadFilterRef
+
+
+logger = logging.getLogger(__name__)
+
+
+def compute_rel_abun_from_coverage_maps(coverage_maps, min_reads, min_cov, min_samples):
+    # instantiate classes for filtering methods
+
+    coptr_contig = CoPTRContig(min_reads, min_samples)
+    rf_ref = ReadFilterRef(min_reads, min_cov)
+    total_passing_reads = 0
+    read_counts = {}
+    genome_lengths = {}
+    sample_id = None
+    genome_ids = set()
+    for genome_id in coverage_maps:
+        cm = coverage_maps[genome_id]
+        sample_id = cm.sample_id
+
+        if cm.is_assembly and cm.passed_qc():
+            binned_reads = coptr_contig.construct_coverage_matrix([cm])
+
+            lower_bound, upper_bound = coptr_contig.compute_genomewide_bounds(
+                binned_reads
+            )
+            count = binned_reads[
+                np.logical_and(binned_reads >= lower_bound, binned_reads <= upper_bound)
+            ].sum()
+            read_counts[cm.genome_id] = count
+            genome_lengths[cm.genome_id] = binned_reads.shape[0] * cm.compute_bin_size()
+            total_passing_reads += count
+            genome_ids.add(cm.genome_id)
+
+        elif not cm.is_assembly:
+            filtered_reads, filtered_length, qc_result = rf_ref.filter_reads(
+                cm.read_positions, cm.length
+            )
+
+            if qc_result.passed_qc:
+                count = filtered_reads.size
+                read_counts[cm.genome_id] = count
+                genome_lengths[cm.genome_id] = filtered_length
+                total_passing_reads += count
+                genome_ids.add(cm.genome_id)
+
+    rel_abun = {}
+    normalizing_constant = 0
+    for genome_id in read_counts:
+        unnormalized_rel_abun = (
+            read_counts[genome_id] / total_passing_reads
+        ) / genome_lengths[genome_id]
+        rel_abun[genome_id] = unnormalized_rel_abun
+        normalizing_constant += unnormalized_rel_abun
+
+    for genome_id in read_counts:
+        rel_abun[genome_id] = rel_abun[genome_id] / normalizing_constant
+
+    return sample_id, rel_abun, genome_ids
+
+
+def compute_rel_abun(coverage_map_folder, min_reads, min_cov, min_samples):
+
+    rel_abun = {}
+    genome_ids = set()
+    for f in sorted(os.listdir(coverage_map_folder)):
+        fname, ext = os.path.splitext(f)
+        if ext != ".pkl":
+            continue
+        fpath = os.path.join(coverage_map_folder, f)
+
+        logger.info("\t%s", f)
+
+        with open(fpath, "rb") as file:
+            coverage_maps = pkl.load(file)
+
+            (
+                sample_id,
+                sample_rel_abun,
+                sample_genome_ids,
+            ) = compute_rel_abun_from_coverage_maps(
+                coverage_maps, min_reads, min_cov, min_samples
+            )
+
+            if sample_id is not None:
+                rel_abun[sample_id] = sample_rel_abun
+                genome_ids.update(sample_genome_ids)
+
+    return rel_abun, genome_ids
diff --git a/src/coptr_contig.py b/src/coptr/coptr_contig.py
similarity index 71%
rename from src/coptr_contig.py
rename to src/coptr/coptr_contig.py
index 11c4ded..e16587f 100644
--- a/src/coptr_contig.py
+++ b/src/coptr/coptr_contig.py
@@ -21,19 +21,22 @@
 along with CoPTR.  If not, see <https://www.gnu.org/licenses/>.
 """
 
-import math
+import logging
 import multiprocessing as mp
-import numpy as np
 import os.path
 import pickle as pkl
+import struct
+import sys
+
+import numpy as np
 import scipy.special
 import scipy.stats
-import sys
 
-from src.poisson_pca import PoissonPCA
-from src.print import print_info, print_warning, print_error
+from .poisson_pca import PoissonPCA
 
 
+logger = logging.getLogger(__name__)
+
 
 class CoPTRContigEstimate:
     """Data structure to store CoPTRContig estimates.
@@ -67,8 +70,13 @@ def __init__(self, bam_file, genome_id, sample_id, estimate, nreads, cov_frac):
 
     def __str__(self):
         return "CoPTRContigEstimate(bam_file={}, genome_id={}, sample_id={}, estimate={:.3f}, nreads={}, cov_frac={})".format(
-            self.bam_file, self.genome_id, self.sample_id, self.estimate, self.nreads, self.cov_frac
-            )
+            self.bam_file,
+            self.genome_id,
+            self.sample_id,
+            self.estimate,
+            self.nreads,
+            self.cov_frac,
+        )
 
     def __repr__(self):
         return self.__str__()
@@ -76,7 +84,7 @@ def __repr__(self):
 
 class CoPTRContig:
     """Estimate PTRs from draft assemblies.
-    
+
     Parameters
     ----------
         min_reads : float
@@ -89,7 +97,6 @@ def __init__(self, min_reads, min_samples):
         self.min_reads = min_reads
         self.min_samples = min_samples
 
-
     def compute_genomewide_bounds(self, binned_counts, crit_region=0.05):
         """Compute bounds on read counts in bins.
 
@@ -113,14 +120,13 @@ def compute_genomewide_bounds(self, binned_counts, crit_region=0.05):
         std = np.max((1, std))
 
         norm = scipy.stats.norm()
-        m = norm.ppf(1-crit_region)
+        m = norm.ppf(1 - crit_region)
 
-        lower_bound = np.power(2, median - m*std)
-        upper_bound = np.power(2, median + m*std)
+        lower_bound = np.power(2, median - m * std)
+        upper_bound = np.power(2, median + m * std)
 
         return lower_bound, upper_bound
 
-
     def construct_coverage_matrix(self, coverage_maps):
         """Aggregate coverage maps into a bin by sample matrix.
 
@@ -137,31 +143,49 @@ def construct_coverage_matrix(self, coverage_maps):
         A = []
 
         # find all common contig ids
-        contig_id_list = [cm.contig_ids for cm in coverage_maps]
         contig_ids = coverage_maps[0].contig_ids
         ref_genome = coverage_maps[0].genome_id
 
+        bin_coords = []
+        contigs_seen = set()
+
         for cm in coverage_maps:
 
             binned_reads = []
             for contig_id in sorted(contig_ids):
 
                 if contig_id not in cm.contig_ids:
-                    print_error("CoPTRContig", "missing contig {} from {} in {}".format(contig_id, ref_genome, cm.sample_id), quit=True)
+                    logger.critical(
+                        "Missing contig %s from %s in %s",
+                        contig_id,
+                        ref_genome,
+                        cm.sample_id,
+                    )
+                    sys.exit(1)
 
                 length = cm.get_length(contig_id)
+                bin_size = cm.compute_bin_size()
                 if length < 11000:
                     continue
                 else:
-                    binned_reads.append(cm.bin_reads(contig_id))
+                    bins = cm.bin_reads(contig_id)
+                    binned_reads.append(bins)
+
+                    if contig_id not in contigs_seen:
+                        pos = 0
+                        for b in bins:
+                            bin_coords.append(contig_id + "-{}".format(int(pos)))
+                            pos += bin_size
+                        contigs_seen.add(contig_id)
 
             row = np.concatenate(binned_reads)
             A.append(row)
 
         # bin by sample
         A = np.array(A).T
-        return A
+        bin_coords = np.array(bin_coords)
 
+        return A, bin_coords
 
     def estimate_slope_intercept(self, bin_log_probs):
         """Estimate the slope along reordering bins using least squares.
@@ -181,16 +205,15 @@ def estimate_slope_intercept(self, bin_log_probs):
         rng = np.linspace(0, 1, bin_log_probs.size)
         y = bin_log_probs
         x = np.ones((y.size, 2))
-        x[:,0] = rng
+        x[:, 0] = rng
 
         sol, residuals, rank, s = np.linalg.lstsq(x, y, rcond=None)
-        m,b = sol
+        m, b = sol
 
-        pred = m*rng + b
+        pred = m * rng + b
         residuals = y - pred
 
-        return m,b,residuals
-
+        return m, b, residuals
 
     def compute_log_likelihood(self, log2_ptr, binned_counts):
         """Compute the log likelihood of reordered bin probabilities given a log2(PTR).
@@ -214,7 +237,7 @@ def compute_log_likelihood(self, log2_ptr, binned_counts):
         x = np.linspace(0, 1, binned_counts.size)
         # change to base e
         loge_ptr = log2_ptr / np.log2(np.e)
-        log_probs = np.array([loge_ptr*xi for xi in x])
+        log_probs = np.array([loge_ptr * xi for xi in x])
         log_norm = scipy.special.logsumexp(log_probs)
         log_probs -= log_norm
 
@@ -224,11 +247,10 @@ def compute_log_likelihood(self, log2_ptr, binned_counts):
         # log_norm = np.log2(np.power(2, log_probs).sum())
         # log_probs -= log_norm
 
-        log_lk = (binned_counts*log_probs).sum()
+        log_lk = (binned_counts * log_probs).sum()
 
         return log_lk
 
-
     def estimate_ptr_maximum_likelihood(self, binned_counts):
         """Compute maximum likelihood estimates of log2(PTR) gived observed reads.
 
@@ -255,7 +277,7 @@ def estimate_ptr_maximum_likelihood(self, binned_counts):
         x = np.linspace(0, 1, binned_counts.size)
         # compute normalized probabilities
         loge_ptr = log2_ptr / np.log2(np.e)
-        log_probs = np.array([loge_ptr*xi for xi in x])
+        log_probs = np.array([loge_ptr * xi for xi in x])
         log_norm = scipy.special.logsumexp(log_probs)
         log_probs -= log_norm
         probs = np.exp(log_probs)
@@ -271,10 +293,9 @@ def estimate_ptr_maximum_likelihood(self, binned_counts):
 
         return log2_ptr, intercept, log_lk, flip
 
-
     def estimate_ptrs(self, coverage_maps, return_bins=False):
         """Estimate PTRs across multiple samples of the same reference genome.
-        
+
         Parameters
         ----------
             coverage_maps : list[CoverageMapContig]
@@ -313,7 +334,14 @@ def estimate_ptrs(self, coverage_maps, return_bins=False):
             else:
                 bam_file = cm.bam_file
                 genome_id = cm.genome_id
-                estimate = CoPTRContigEstimate(cm.bam_file, cm.genome_id, cm.sample_id, np.nan, cm.reads, cm.frac_nonzero)
+                estimate = CoPTRContigEstimate(
+                    cm.bam_file,
+                    cm.genome_id,
+                    cm.sample_id,
+                    np.nan,
+                    cm.reads,
+                    cm.frac_nonzero,
+                )
                 estimates.append(estimate)
                 parameters.append((np.nan, np.nan))
                 binned_counts.append([np.nan])
@@ -329,10 +357,9 @@ def estimate_ptrs(self, coverage_maps, return_bins=False):
         genome_id = coverage_maps[0].genome_id
 
         # bin by sample
-        A = self.construct_coverage_matrix(passing_coverage_maps)
+        A, bin_coords = self.construct_coverage_matrix(passing_coverage_maps)
         nbins = A.shape[0]
 
-
         # filter out low coverage and high coverage bins,
         # remove samples with too few reads
         A_filtered = []
@@ -342,9 +369,11 @@ def estimate_ptrs(self, coverage_maps, return_bins=False):
 
         tmp = []
         np.set_printoptions(suppress=True)
-        for i,col in enumerate(A.T):
+        for i, col in enumerate(A.T):
 
-            lower_bound, upper_bound = self.compute_genomewide_bounds(col, crit_region=0.05)
+            lower_bound, upper_bound = self.compute_genomewide_bounds(
+                col, crit_region=0.05
+            )
             col[np.logical_or(col > upper_bound, col < lower_bound)] = np.nan
 
             frac_filtered = np.isnan(col).sum() / col.size
@@ -361,17 +390,25 @@ def estimate_ptrs(self, coverage_maps, return_bins=False):
                 sample_id = passing_coverage_maps[i].sample_id
                 frac_nonzero = passing_coverage_maps[i].frac_nonzero
                 m = np.nan
-                estimate = CoPTRContigEstimate(bam_file, genome_id, sample_id, np.nan, reads, frac_nonzero)
+                estimate = CoPTRContigEstimate(
+                    bam_file, genome_id, sample_id, np.nan, reads, frac_nonzero
+                )
                 estimates.append(estimate)
                 parameters.append((np.nan, np.nan))
                 binned_counts.append([np.nan])
 
-
         if len(A_filtered) < self.min_samples:
             for cm in tmp:
                 bam_file = cm.bam_file
                 genome_id = cm.genome_id
-                estimate = CoPTRContigEstimate(cm.bam_file, cm.genome_id, cm.sample_id, np.nan, cm.reads, cm.frac_nonzero)
+                estimate = CoPTRContigEstimate(
+                    cm.bam_file,
+                    cm.genome_id,
+                    cm.sample_id,
+                    np.nan,
+                    cm.reads,
+                    cm.frac_nonzero,
+                )
                 estimates.append(estimate)
                 parameters.append((np.nan, np.nan))
                 binned_counts.append([np.nan])
@@ -382,19 +419,39 @@ def estimate_ptrs(self, coverage_maps, return_bins=False):
         else:
             tmp = None
 
-        print_info("CoPTRContig", "running {}".format(genome_id))
+        logger.info("Running %s.", genome_id)
 
         A = np.array(A_filtered).T
 
         # filter out rows where too many bins are missing
-        keep_rows = np.sum(np.isnan(A), axis=1) < 0.025*A.shape[1]
-        A = A[keep_rows,:]
-
-        if keep_rows.sum() < 0.5*keep_rows.size:
-            for j,col in enumerate(A.T):
+        keep_rows = np.sum(np.isnan(A), axis=1) < 0.025 * A.shape[1]
+        A = A[keep_rows, :]
+        bin_coords = bin_coords[keep_rows]
+
+        ppca_failed = False
+        try:
+            if keep_rows.sum() >= self.min_samples:
+                poisson_pca = PoissonPCA()
+                W, V = poisson_pca.pca(A, k=1)
+        except:
+            ppca_failed = True
+            logger.warn("PoissonPCA failed for assembly " + genome_ids[0])
+
+
+        if keep_rows.sum() < self.min_samples or ppca_failed:
+            for j, col in enumerate(A.T):
                 reads = col[np.isfinite(col)].sum()
-                frac_nonzero = (col[np.isfinite(col)] > 0).sum() / col[np.isfinite(col)].size
-                estimate = CoPTRContigEstimate(bam_files[j], genome_ids[j], sample_ids[j], np.nan, reads, frac_nonzero)
+                frac_nonzero = (col[np.isfinite(col)] > 0).sum() / col[
+                    np.isfinite(col)
+                ].size
+                estimate = CoPTRContigEstimate(
+                    bam_files[j],
+                    genome_ids[j],
+                    sample_ids[j],
+                    np.nan,
+                    reads,
+                    frac_nonzero,
+                )
                 estimates.append(estimate)
                 parameters.append((np.nan, np.nan))
                 binned_counts.append([np.nan])
@@ -404,19 +461,19 @@ def estimate_ptrs(self, coverage_maps, return_bins=False):
             else:
                 return estimates
 
-        poisson_pca = PoissonPCA()
-        W,V = poisson_pca.pca(A,k=1)
         sorted_idx = np.argsort(W.flatten())
-        A = A[sorted_idx,:]
+        A = A[sorted_idx, :]
+        bin_coords = bin_coords[sorted_idx]
 
-        for j,col in enumerate(A.T):
+        flipped_bins = False
+        for j, col in enumerate(A.T):
             col = col[np.isfinite(col)]
             reads = col.sum()
-            lb = int(0.05*col.size)
-            ub = int(0.95*col.size)
+            lb = int(0.05 * col.size)
+            ub = int(0.95 * col.size)
             col = col[lb:ub]
 
-            m,b,log_lk,flip = self.estimate_ptr_maximum_likelihood(col)
+            m, b, log_lk, flip = self.estimate_ptr_maximum_likelihood(col)
             # m,b,res = self.estimate_slope_intercept(col)
             # flip = m < 0
 
@@ -424,23 +481,28 @@ def estimate_ptrs(self, coverage_maps, return_bins=False):
             # so let's look at positive slopes only
             m = np.abs(m)
             frac_nonzero = (col > 0).sum() / col.size
-            estimate = CoPTRContigEstimate(bam_files[j], genome_ids[j], sample_ids[j], m, reads, frac_nonzero)
+            estimate = CoPTRContigEstimate(
+                bam_files[j], genome_ids[j], sample_ids[j], m, reads, frac_nonzero
+            )
             estimates.append(estimate)
 
             parameters.append((m, b))
 
             bc = col
             if flip:
+                flipped_bins = True
                 bc = np.flip(bc)
             binned_counts.append(bc)
 
+        if flipped_bins:
+            bin_coords = np.flip(bin_coords)
 
-        print_info("CoPTRContig", "finished {}".format(genome_id))
+        logger.info("Finished %s.", genome_id)
 
         if return_bins:
-            return estimates, parameters, binned_counts
+            return estimates, parameters, binned_counts, bin_coords
         else:
-            return estimates
+            return estimates, bin_coords
 
     def _parallel_helper(self, x):
         ref_genome = x[0]
@@ -448,13 +510,14 @@ def _parallel_helper(self, x):
         plot_folder = x[2]
 
         if plot_folder is not None:
-            estimates, parameters, reordered_bins = self.estimate_ptrs(coverage_maps, return_bins=True)
+            estimates, parameters, reordered_bins, bin_coords = self.estimate_ptrs(
+                coverage_maps, return_bins=True
+            )
             plot_fit(estimates, parameters, coverage_maps, reordered_bins, plot_folder)
         else:
-            estimates = self.estimate_ptrs(coverage_maps)
-
-        return (ref_genome, estimates)
+            estimates, bin_coords = self.estimate_ptrs(coverage_maps)
 
+        return (ref_genome, estimates), bin_coords
 
 
 def plot_fit(estimates, parameters, coverage_maps, reordered_bins, plot_folder):
@@ -463,16 +526,17 @@ def plot_fit(estimates, parameters, coverage_maps, reordered_bins, plot_folder):
     coptr_contig = CoPTRContig(min_reads=5000, min_samples=5)
 
     # make sure esimates and coverage maps are in the same order
-    order = dict([ (est.sample_id, i) for i,est in enumerate(estimates)])
+    order = dict([(est.sample_id, i) for i, est in enumerate(estimates)])
     tmp = [[] for cm in coverage_maps]
     for cm in coverage_maps:
         idx = order[cm.sample_id]
         tmp[idx] = cm
     coverage_maps = tmp
 
-
     nplots = 0
-    for estimate,p,cm,rbins in zip(estimates, parameters, coverage_maps, reordered_bins):
+    for estimate, p, cm, rbins in zip(
+        estimates, parameters, coverage_maps, reordered_bins
+    ):
         if np.isnan(estimate.estimate):
             continue
 
@@ -481,7 +545,8 @@ def plot_fit(estimates, parameters, coverage_maps, reordered_bins, plot_folder):
         binned_counts = []
         for cid in contig_ids:
             length = cm.get_length(cid)
-            if length < 11000: continue
+            if length < 11000:
+                continue
 
             binned_counts += cm.bin_reads(cid).tolist()
 
@@ -490,25 +555,37 @@ def plot_fit(estimates, parameters, coverage_maps, reordered_bins, plot_folder):
 
         median = np.median(binned_counts[binned_counts != 0])
 
-        fig,ax = plt.subplots(nrows=2,ncols=1, figsize=(9,6))
+        fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(9, 6))
 
         # plot unordered bins
         x1 = np.linspace(0, 1, binned_counts.size)
         ax[0].scatter(x1, binned_counts)
         ax[0].set_yscale("log", base=2)
-        ax[0].plot(x1, np.ones(binned_counts.size)*median, c="red", linewidth=3)
-        ax[0].plot(x1, np.ones(binned_counts.size)*upper_bound, c="red", linestyle=":", linewidth=3)
-        ax[0].plot(x1, np.ones(binned_counts.size)*lower_bound, c="red", linestyle=":", linewidth=3)
+        ax[0].plot(x1, np.ones(binned_counts.size) * median, c="red", linewidth=3)
+        ax[0].plot(
+            x1,
+            np.ones(binned_counts.size) * upper_bound,
+            c="red",
+            linestyle=":",
+            linewidth=3,
+        )
+        ax[0].plot(
+            x1,
+            np.ones(binned_counts.size) * lower_bound,
+            c="red",
+            linestyle=":",
+            linewidth=3,
+        )
         y_lim = ax[0].get_ylim()
         ax[0].set_ylabel("Read Count")
         ax[0].set_xlabel("Unordered Bins")
 
         # plot model fit
         emp_probs = rbins / rbins.sum()
-        m,b = p # slope intercept in log space
+        m, b = p  # slope intercept in log space
 
         x2 = np.linspace(0, 1, emp_probs.size)
-        y = np.array([m*xi + b for xi in x2])
+        y = np.array([m * xi + b for xi in x2])
         y = np.power(2, y)
         y /= y.sum()
         ax[1].scatter(x2, emp_probs, c="C1")
@@ -516,15 +593,20 @@ def plot_fit(estimates, parameters, coverage_maps, reordered_bins, plot_folder):
         ax[1].set_yscale("log", base=2)
         ax[1].set_ylabel("Density")
         ax[1].set_xlabel("Reordered Bins")
-        ax[1].set_title("\nlog2(PTR)={:.3f} (Reads = {})".format(m, int(estimate.nreads)))
+        ax[1].set_title(
+            "\nlog2(PTR)={:.3f} (Reads = {})".format(m, int(estimate.nreads))
+        )
 
         plt.tight_layout()
-        plt.savefig(os.path.join(plot_folder, estimate.sample_id + "-" + estimate.genome_id + ".pdf"))
+        plt.savefig(
+            os.path.join(
+                plot_folder, estimate.sample_id + "-" + estimate.genome_id + ".pdf"
+            )
+        )
         plt.close()
         nplots += 1
 
 
-
 def load_coverage_maps(file_handle, ref_id):
     reached_end = False
     coverage_maps = []
@@ -539,8 +621,13 @@ def load_coverage_maps(file_handle, ref_id):
     return coverage_maps
 
 
-
-def estimate_ptrs_coptr_contig(assembly_genome_ids, grouped_coverage_map_folder, min_reads, min_samples, threads, plot_folder=None):
+def estimate_ptrs_coptr_contig(
+    assembly_genome_ids,
+    grouped_coverage_map_folder,
+    min_reads,
+    min_samples,
+    plot_folder=None,
+):
     """Estimate Peak-to-Trough ratios across samples.
 
     Parameters
@@ -564,37 +651,25 @@ def estimate_ptrs_coptr_contig(assembly_genome_ids, grouped_coverage_map_folder,
             A dictionary with key reference genome id and value
             a list of CoPTRContigEstimate for that reference genome.
     """
-    print_info("CoPTRContig", "checking reference genomes")
+    logger.info("Checking reference genomes.")
     coptr_contig_estimates = {}
     coptr_contig = CoPTRContig(min_reads, min_samples)
 
-    if threads == 1:
-        for ref_id in sorted(assembly_genome_ids):
-            with open(os.path.join(grouped_coverage_map_folder, ref_id + ".cm.pkl"), "rb") as f:
-                coverage_maps = load_coverage_maps(f, ref_id)
+    ref_id_to_bin_coords = {}
 
-            if plot_folder is not None:
-                estimates, parameters, reordered_bins = coptr_contig.estimate_ptrs(coverage_maps, return_bins=True)
-                plot_fit(estimates, parameters, coverage_maps, reordered_bins, plot_folder)
-            else:
-                estimates = coptr_contig.estimate_ptrs(coverage_maps)
-
-            coptr_contig_estimates[ref_id] = estimates
-
-    else:
-        # workers for multiprocessing
-        pool = mp.Pool(threads)
-        coverage_maps = {}
-
-        for i,ref_id in enumerate(sorted(assembly_genome_ids)):
-            with open(os.path.join(grouped_coverage_map_folder, ref_id + ".cm.pkl"), "rb") as f:
-                coverage_maps[ref_id] = load_coverage_maps(f, ref_id)
+    for ref_id in sorted(assembly_genome_ids):
+        with open(os.path.join(grouped_coverage_map_folder, ref_id + ".cm.pkl"), "rb") as f:
+            coverage_maps = load_coverage_maps(f, ref_id)
 
-            if (i % threads == 0 and i > 0) or i == len(assembly_genome_ids) - 1:
-                flat_coverage_maps = [(ref_id, coverage_maps[ref_id], plot_folder) for ref_id in coverage_maps]
-                flat_results = pool.map(coptr_contig._parallel_helper, flat_coverage_maps)
-                coptr_contig_estimates.update(dict(flat_results))
+        if plot_folder is not None:
+            estimates, parameters, reordered_bins, bin_coords = (
+                coptr_contig.estimate_ptrs(coverage_maps, return_bins=True)
+            )
+            plot_fit(estimates, parameters, coverage_maps, reordered_bins, plot_folder)
+        else:
+            estimates, bin_coords = coptr_contig.estimate_ptrs(coverage_maps)
 
-                coverage_maps = {}
+        coptr_contig_estimates[ref_id] = estimates
+        ref_id_to_bin_coords[ref_id] = bin_coords
 
-    return coptr_contig_estimates
\ No newline at end of file
+    return coptr_contig_estimates, ref_id_to_bin_coords
diff --git a/src/coptr_ref.py b/src/coptr/coptr_ref.py
similarity index 72%
rename from src/coptr_ref.py
rename to src/coptr/coptr_ref.py
index 798e6a6..6424818 100644
--- a/src/coptr_ref.py
+++ b/src/coptr/coptr_ref.py
@@ -21,16 +21,20 @@
 along with CoPTR.  If not, see <https://www.gnu.org/licenses/>.
 """
 
+import logging
 import math
 import multiprocessing as mp
-import numpy as np
 import os.path
 import pickle as pkl
+import struct
+import sys
+
+import numpy as np
 import scipy.optimize
 import scipy.stats
-import sys
 
-from src.print import print_info, print_warning
+
+logger = logging.getLogger(__name__)
 
 
 class QCResult:
@@ -62,6 +66,28 @@ def __str__(self):
     def __repr__(self):
         return self.__str__()
 
+
+class CoordinateMap:
+    """Map post-filtering genomic coordinates to pre-filtering
+    genomic coordinates.
+    """
+
+    def __init__(self):
+        self.break_points = []
+
+    def update_break_points(self, break_point, shift_size):
+        self.break_points.append((break_point, shift_size))
+
+    def translate(self, filtered_coord):
+        unfiltered_coord = filtered_coord
+        for break_point, shift_size in self.break_points:
+            if unfiltered_coord >= break_point:
+                unfiltered_coord += shift_size
+        return unfiltered_coord
+
+
+
+
 class ReadFilterRef:
     """Read filtering steps for CoPTR Ref.
 
@@ -77,7 +103,6 @@ def __init__(self, min_reads, min_cov):
         self.min_reads = min_reads
         self.min_cov = min_cov
 
-
     def filter_reads(self, read_positions, genome_length):
         """Filter out reads in regions of the genome where the coverage
         is either too high or too low.
@@ -101,20 +126,38 @@ def filter_reads(self, read_positions, genome_length):
         # don't filter if there are too few reads
         # just return that the sample failed QC
         if len(read_positions) < self.min_reads:
-            return np.array([]), genome_length, QCResult(-1, len(read_positions), -1, False)
+            return (
+                np.array([]),
+                genome_length,
+                QCResult(-1, len(read_positions), -1, False),
+            )
 
+        coord_map = CoordinateMap()
         bin_size = self.compute_bin_size(genome_length)
         read_positions = np.copy(read_positions)
-        filtered_read_positions, filtered_genome_length = self.filter_reads_phase1(read_positions, genome_length, bin_size)
+        filtered_read_positions, filtered_genome_length = self.filter_reads_phase1(
+            read_positions, genome_length, bin_size, coord_map
+        )
         # more than 25% of the genome is removed
         frac_removed = 1 - filtered_genome_length / genome_length
-        if  frac_removed > 0.25:
-            return np.array(filtered_read_positions), filtered_genome_length, QCResult(-1, filtered_read_positions.size, frac_removed, False)
-
-        filtered_read_positions, filtered_genome_length = self.filter_reads_phase2(filtered_read_positions, filtered_genome_length, bin_size)
-        qc_result = self.quality_check(filtered_read_positions, filtered_genome_length, genome_length, bin_size, self.min_reads, self.min_cov)
-        return filtered_read_positions, filtered_genome_length, qc_result
-
+        if frac_removed > 0.25:
+            return (
+                np.array(filtered_read_positions),
+                filtered_genome_length,
+                QCResult(-1, filtered_read_positions.size, frac_removed, False),
+            )
+
+        filtered_read_positions, filtered_genome_length = self.filter_reads_phase2(
+            filtered_read_positions, filtered_genome_length, bin_size, coord_map
+        )
+        qc_result = self.quality_check(
+            filtered_read_positions,
+            filtered_genome_length,
+            genome_length, bin_size,
+            self.min_reads,
+            self.min_cov
+        )
+        return filtered_read_positions, filtered_genome_length, qc_result, coord_map
 
     def compute_bin_size(self, genome_length):
         """Compute bin size for read counts.
@@ -136,10 +179,24 @@ def compute_bin_size(self, genome_length):
         # want a number divisible by 100 for downstream steps
         bin_size = bin_size - (bin_size % 100)
 
-        return bin_size
+        if bin_size < 1:
+            logger.error(
+                "Found complete reference genome with <500bp. "
+                "This is probably due to a mislabeled contig. "
+                "Please check your .genomes file."
+            )
 
+        return bin_size
 
-    def quality_check(self, filtered_read_positions, filtered_genome_length, original_genome_length, bin_size, min_reads, frac_nonzero):
+    def quality_check(
+        self,
+        filtered_read_positions,
+        filtered_genome_length,
+        original_genome_length,
+        bin_size,
+        min_reads,
+        frac_nonzero,
+    ):
         """A basic quality check for required coverage of a genome in a sample.
 
         Parameters
@@ -161,11 +218,15 @@ def quality_check(self, filtered_read_positions, filtered_genome_length, origina
             qc_result : QCResult
                 Object that stores the result of the quality check
         """
-        binned_counts = self.bin_reads(filtered_read_positions, filtered_genome_length, bin_size)
+        binned_counts = self.bin_reads(
+            filtered_read_positions, filtered_genome_length, bin_size
+        )
         nonzero_bins = (binned_counts > 0).sum() / np.max((1, binned_counts.size))
         nreads = binned_counts.sum()
 
-        passed_qc = True if nonzero_bins >= frac_nonzero and nreads >= min_reads else False
+        passed_qc = (
+            True if nonzero_bins >= frac_nonzero and nreads >= min_reads else False
+        )
 
         frac_removed = 1 - filtered_genome_length / original_genome_length
         if frac_removed > 0.25:
@@ -173,7 +234,6 @@ def quality_check(self, filtered_read_positions, filtered_genome_length, origina
 
         return QCResult(nonzero_bins, nreads, frac_removed, passed_qc)
 
-
     def compute_genomewide_bounds(self, binned_counts):
         """Compute bounds on read counts in bins.
 
@@ -198,12 +258,11 @@ def compute_genomewide_bounds(self, binned_counts):
         norm = scipy.stats.norm()
         m = norm.ppf(0.95)
 
-        lower_bound = np.power(2, median - m*std)
-        upper_bound = np.power(2, median + m*std)
+        lower_bound = np.power(2, median - m * std)
+        upper_bound = np.power(2, median + m * std)
 
         return lower_bound, upper_bound
 
-
     def compute_bin_bounds(self, binned_counts):
         """Compute bounds on read counts in a smaller window along the genome.
 
@@ -226,12 +285,11 @@ def compute_bin_bounds(self, binned_counts):
         norm = scipy.stats.norm()
         m = norm.ppf(0.975)
 
-        lower_bound = np.power(2, median - m*std)
-        upper_bound = np.power(2, median + m*std)
+        lower_bound = np.power(2, median - m * std)
+        upper_bound = np.power(2, median + m * std)
 
         return lower_bound, upper_bound
 
-
     def compute_rolling_sum(self, read_positions, genome_length, bin_size):
         """Compute a rolling sum of read counts in 10Kb bins, sliding 1Kb at
         a time.
@@ -249,14 +307,14 @@ def compute_rolling_sum(self, read_positions, genome_length, bin_size):
             rolling_counts : np.array of float
                 The read count in each rolling bin
             endpoints : np.array of tuple
-                Each tuple gives the left (inclusive) and right (exclusive) 
+                Each tuple gives the left (inclusive) and right (exclusive)
                 end point of each bin.
         """
-        step = math.ceil(bin_size/100)
+        step = math.ceil(bin_size / 100)
         s = read_positions.size
 
         sorted_reads = np.sort(read_positions)
-        endpoints = np.array([(i, i+bin_size) for i in range(0, genome_length, step)])
+        endpoints = np.array([(i, i + bin_size) for i in range(0, genome_length, step)])
 
         left_endpoints = [e[0] for e in endpoints]
         right_endpoints = [e[1] for e in endpoints]
@@ -267,9 +325,9 @@ def compute_rolling_sum(self, read_positions, genome_length, bin_size):
         return rolling_counts, endpoints
 
 
-    def remove_reads_by_region(self, read_positions, genome_length, regions):
+    def remove_reads_by_region(self, read_positions, genome_length, regions, coord_map):
         """Remove reads that overlap a region in regions.
-        
+
         Parameters
         ----------
             read_positions : np.array of int
@@ -291,20 +349,25 @@ def remove_reads_by_region(self, read_positions, genome_length, regions):
         new_genome_length = genome_length
         filter_count = 0
         adjustment = 0
-        for left,right in regions:
+        for left, right in regions:
             remove_size = int(right - left)
             new_genome_length -= remove_size
 
-            keep = np.logical_or(read_positions < (left - adjustment), read_positions >= (right - adjustment))
+            keep = np.logical_or(
+                read_positions < (left - adjustment),
+                read_positions >= (right - adjustment),
+            )
             adjustment += remove_size
-            filter_count += (read_positions.size - keep.sum())
+            filter_count += read_positions.size - keep.sum()
             read_positions = read_positions[keep]
             read_positions[read_positions > (right - adjustment)] -= remove_size
 
+            coord_map.update_break_points(right - remove_size, remove_size)
+
         return read_positions, new_genome_length
 
 
-    def filter_reads_phase1(self, read_positions, genome_length, bin_size):
+    def filter_reads_phase1(self, read_positions, genome_length, bin_size, coord_map):
         """A coarse-grained genomewide filter that removes reads in
         ultra-high or ultra-low coverage regions.
 
@@ -326,36 +389,40 @@ def filter_reads_phase1(self, read_positions, genome_length, bin_size):
         binned_reads = self.bin_reads(read_positions, genome_length, bin_size)
         lower_bound, upper_bound = self.compute_genomewide_bounds(binned_reads)
 
-        rolling_counts, endpoints = self.compute_rolling_sum(read_positions, genome_length, bin_size)
+        rolling_counts, endpoints = self.compute_rolling_sum(
+            read_positions, genome_length, bin_size
+        )
 
         # find regions to remove
         remove_start = None
         remove_end = None
         remove_regions = []
-        for i,c in enumerate(rolling_counts):
+        for i, c in enumerate(rolling_counts):
             left, right = endpoints[i]
 
             if (c > upper_bound or c < lower_bound) and remove_start is None:
                 remove_start = left
 
             elif c < upper_bound and c > lower_bound and remove_start is not None:
-                remove_end = left # want endpoint of previous bin
-                remove_regions.append( (remove_start, remove_end) )
+                remove_end = left  # want endpoint of previous bin
+                remove_regions.append((remove_start, remove_end))
                 remove_start = None
                 remove_end = None
 
         # get endpoint
         if remove_start is not None:
             remove_end = genome_length
-            remove_regions.append( (remove_start, remove_end) )
+            remove_regions.append((remove_start, remove_end))
+
+        read_positions, new_genome_length = self.remove_reads_by_region(
+            read_positions, genome_length, remove_regions, coord_map
+        )
 
-        read_positions, new_genome_length = self.remove_reads_by_region(read_positions, genome_length, remove_regions)
-        
         binned_reads = self.bin_reads(read_positions, genome_length, bin_size)
         return read_positions, new_genome_length
 
 
-    def filter_reads_phase2(self, read_positions, genome_length, bin_size):
+    def filter_reads_phase2(self, read_positions, genome_length, bin_size, coord_map):
         """A fine-grained filter that removes reads in localized regions
         with too-high or too-low coverage. For each 10Kb, looks 6.25% of
         the genome length ahead and 6.25% of the genome length behind.
@@ -377,11 +444,13 @@ def filter_reads_phase2(self, read_positions, genome_length, bin_size):
         """
 
         # rolling counts in 10Kb bins, shifting by 1000bp
-        rolling_counts, endpoints = self.compute_rolling_sum(read_positions, genome_length, bin_size)
+        rolling_counts, endpoints = self.compute_rolling_sum(
+            read_positions, genome_length, bin_size
+        )
         nbins = rolling_counts.size
 
         # how many bins to use
-        window = np.max((4,math.ceil(0.0625*genome_length / bin_size)))
+        window = np.max((4, math.ceil(0.0625 * genome_length / bin_size)))
         window = int(window)
 
         # find regions to remove
@@ -391,14 +460,16 @@ def filter_reads_phase2(self, read_positions, genome_length, bin_size):
 
         bounds = {}
 
-        for i,c in enumerate(rolling_counts):
+        for i, c in enumerate(rolling_counts):
             # endpoints
             left, right = endpoints[i]
 
             # take bins in window to the left and right
-            left_bins = [(i - 100*j) for j in range(window)]
-            right_bins = [(i + 100*j) % nbins for j in range(window)]
-            bins = np.concatenate((rolling_counts[left_bins], rolling_counts[right_bins]))
+            left_bins = [(i - 100 * j) for j in range(window)]
+            right_bins = [(i + 100 * j) % nbins for j in range(window)]
+            bins = np.concatenate(
+                (rolling_counts[left_bins], rolling_counts[right_bins])
+            )
             median = np.median(bins)
 
             # computing these bounds is expensive, so
@@ -413,8 +484,8 @@ def filter_reads_phase2(self, read_positions, genome_length, bin_size):
                 remove_start = left
 
             elif (c < upper_bound and c > lower_bound) and remove_start is not None:
-                remove_end = left # want endpoint of previous bin
-                remove_regions.append( (remove_start, remove_end) )
+                remove_end = left  # want endpoint of previous bin
+                remove_regions.append((remove_start, remove_end))
                 remove_start = None
                 remove_end = None
 
@@ -423,13 +494,14 @@ def filter_reads_phase2(self, read_positions, genome_length, bin_size):
             remove_end = genome_length
             remove_regions.append((remove_start, remove_end))
 
-        read_positions, new_genome_length = self.remove_reads_by_region(read_positions, genome_length, remove_regions)
+        read_positions, new_genome_length = self.remove_reads_by_region(
+            read_positions, genome_length, remove_regions, coord_map
+        )
         return read_positions, new_genome_length
 
-
     def bin_reads(self, read_positions, genome_length, bin_size=10000):
         """Aggregate reads into bin_size windows and compute the read count in each window.
-        
+
         Parameters
         ----------
             read_positions : np.array of int
@@ -452,7 +524,6 @@ def bin_reads(self, read_positions, genome_length, bin_size=10000):
         return bin_counts
 
 
-
 class CoPTRRefEstimate:
     """Data structure to store CoPTRRef estimates.
 
@@ -477,7 +548,18 @@ class CoPTRRefEstimate:
             The fraction of nonzero bins in 10Kb windows.
     """
 
-    def __init__(self, bam_file, genome_id, sample_id, estimate, ori_estimate, ter_estimate, nreads, cov_frac):
+    def __init__(
+        self,
+        bam_file,
+        genome_id,
+        sample_id,
+        estimate,
+        ori_estimate,
+        ter_estimate,
+        nreads,
+        cov_frac,
+        ori_estimate_coord
+    ):
         self.bam_file = bam_file
         self.genome_id = genome_id
         self.sample_id = sample_id
@@ -486,18 +568,22 @@ def __init__(self, bam_file, genome_id, sample_id, estimate, ori_estimate, ter_e
         self.ter_estimate = ter_estimate
         self.nreads = nreads
         self.cov_frac = cov_frac
-
+        self.ori_estimate_coord = ori_estimate_coord
 
     def __str__(self):
         return "CoPTRRefEstimate(bam_file={}, genome_id={}, sample_id={}, estimate={:.3f}, nreads={}, cov_frac={})".format(
-            self.bam_file, self.genome_id, self.sample_id, self.estimate, self.nreads, self.cov_frac
-          )
+            self.bam_file,
+            self.genome_id,
+            self.sample_id,
+            self.estimate,
+            self.nreads,
+            self.cov_frac,
+        )
 
     def __repr__(self):
         return self.__str__()
 
 
-
 class CoPTRRef:
     """CoPTR estimator for complete reference genomes.
 
@@ -513,7 +599,6 @@ def __init__(self, min_reads, min_cov):
         self.min_reads = min_reads
         self.min_cov = min_cov
 
-
     def compute_log_likelihood(self, ori_loc, ter_loc, log2_ptr, read_locs):
         """Model log-likelihood for one sample.
 
@@ -528,27 +613,52 @@ def compute_log_likelihood(self, ori_loc, ter_loc, log2_ptr, read_locs):
             read_locs : np.array of float
                 A 1D numpy array giving read positions in [0, 1]
         """
-        if ori_loc > 1 or ori_loc < 0 or ter_loc > 1 or ter_loc < 0 or log2_ptr > np.log2(16) or log2_ptr <= np.log2(1):
+        if (
+            ori_loc > 1
+            or ori_loc < 0
+            or ter_loc > 1
+            or ter_loc < 0
+            or log2_ptr > np.log2(16)
+            or log2_ptr <= np.log2(1)
+        ):
             return -np.inf
 
-        assert read_locs.size == 1 or np.all(read_locs[:-1] <= read_locs[1:]), "reads must be sorted"
+        assert read_locs.size == 1 or np.all(
+            read_locs[:-1] <= read_locs[1:]
+        ), "reads must be sorted"
 
         x1 = np.min([ori_loc, ter_loc])
         x2 = np.max([ori_loc, ter_loc])
 
-        dist = np.abs(x2-x1)
+        dist = np.abs(x2 - x1)
         if dist < 0.45 or dist > 0.55:
             return -np.inf
 
         # compute log2_p(x_i), log2_p(x_t) so that the density normalizes
         alpha = log2_ptr / (ori_loc - ter_loc)
         if ori_loc < ter_loc:
-            c1 = np.log2(np.log(2)) - \
-                  np.log2((1/alpha)*(2**(alpha*x1) + 2**(alpha*(x2 - x1)) - 2 - 2**(-alpha*(1-x2) - log2_ptr) + 2**(-log2_ptr)))
+            c1 = np.log2(np.log(2)) - np.log2(
+                (1 / alpha)
+                * (
+                    2 ** (alpha * x1)
+                    + 2 ** (alpha * (x2 - x1))
+                    - 2
+                    - 2 ** (-alpha * (1 - x2) - log2_ptr)
+                    + 2 ** (-log2_ptr)
+                )
+            )
             c2 = c1 - log2_ptr
         else:
-            c1 = np.log2(np.log(2)) - \
-                 np.log2((1/alpha)*(2**(alpha*x1) + 2**(alpha*(x2 - x1)) - 2 - 2**(-alpha*(1-x2) + log2_ptr) + 2**(log2_ptr)))
+            c1 = np.log2(np.log(2)) - np.log2(
+                (1 / alpha)
+                * (
+                    2 ** (alpha * x1)
+                    + 2 ** (alpha * (x2 - x1))
+                    - 2
+                    - 2 ** (-alpha * (1 - x2) + log2_ptr)
+                    + 2 ** (log2_ptr)
+                )
+            )
             c2 = c1 + log2_ptr
 
         log_prob = 0
@@ -562,21 +672,23 @@ def compute_log_likelihood(self, ori_loc, ter_loc, log2_ptr, read_locs):
             left_part = read_locs[:left_x]
             middle_part = read_locs[left_x:right_x]
             right_part = read_locs[right_x:]
-        log_prob += (-alpha*(left_part - x1)   + c1).sum()
-        log_prob += ( alpha*(middle_part - x1) + c1).sum()
-        log_prob += (-alpha*(right_part - x2)  + c2).sum()
+        log_prob += (-alpha * (left_part - x1) + c1).sum()
+        log_prob += (alpha * (middle_part - x1) + c1).sum()
+        log_prob += (-alpha * (right_part - x2) + c2).sum()
 
         return log_prob
 
-
-    def compute_multisample_log_likelihood(self, ori_loc, ter_loc, log2_ptrs, read_locs_list):
+    def compute_multisample_log_likelihood(
+        self, ori_loc, ter_loc, log2_ptrs, read_locs_list
+    ):
         log_lk = 0
         for log2_ptr, read_locs in zip(log2_ptrs, read_locs_list):
             log_lk += self.compute_log_likelihood(ori_loc, ter_loc, log2_ptr, read_locs)
         return log_lk
 
-
-    def estimate_ptr(self, read_positions, ref_genome_len, filter_reads=True, estimate_terminus=False):
+    def estimate_ptr(
+        self, read_positions, ref_genome_len, filter_reads=True, estimate_terminus=False
+    ):
         """Estimate the PTR for a single sample.
 
         Parameters
@@ -604,7 +716,9 @@ def estimate_ptr(self, read_positions, ref_genome_len, filter_reads=True, estima
         """
         if filter_reads:
             rf = ReadFilterRef(self.min_reads, self.min_cov)
-            read_positions, ref_genome_len, qc_result = rf.filter_reads(read_positions, ref_genome_len)
+            read_positions, ref_genome_len, qc_result, coord_map = rf.filter_reads(
+                read_positions, ref_genome_len
+            )
             if not qc_result.passed_qc:
                 return np.nan, np.nan, np.nan, np.nan, qc_result
 
@@ -616,8 +730,8 @@ def estimate_ptr(self, read_positions, ref_genome_len, filter_reads=True, estima
         if estimate_terminus:
             f = lambda x: -self.compute_log_likelihood(x[0], x[1], x[2], read_locs)
 
-            log_ptrs = [0.2*(i+1) for i in range(5)]
-            oris = [0.2*i for i in range(5)]
+            log_ptrs = [0.2 * (i + 1) for i in range(5)]
+            oris = [0.2 * i for i in range(5)]
             np.random.shuffle(log_ptrs)
             np.random.shuffle(oris)
             xs = []
@@ -628,10 +742,12 @@ def estimate_ptr(self, read_positions, ref_genome_len, filter_reads=True, estima
             xs = np.array(xs)
 
         else:
-            f = lambda x: -self.compute_log_likelihood(x[0], (x[0] + 0.5) % 1, x[1], read_locs)
+            f = lambda x: -self.compute_log_likelihood(
+                x[0], (x[0] + 0.5) % 1, x[1], read_locs
+            )
 
-            log_ptrs = [0.2*(i+1) for i in range(5)]
-            oris = [0.2*i for i in range(5)]
+            log_ptrs = [0.2 * (i + 1) for i in range(5)]
+            oris = [0.2 * i for i in range(5)]
             np.random.shuffle(log_ptrs)
             np.random.shuffle(oris)
             xs = []
@@ -639,7 +755,7 @@ def estimate_ptr(self, read_positions, ref_genome_len, filter_reads=True, estima
                 for ori in oris:
                     xs.append([ori, log_ptr])
             xs = np.array(xs)
-  
+
         # initial ori estimate, initial ptr estimate
         best_x = 0
         best_f = np.inf
@@ -661,17 +777,15 @@ def estimate_ptr(self, read_positions, ref_genome_len, filter_reads=True, estima
             ter = (ori + 0.5) % 1
             log2_ptr = np.clip(best_x[1], 0, np.inf)
 
-
         np.seterr(invalid="warn")
         if best_f == np.inf:
-            print_warning("CoPTRRef", "PTR optimization failed")
+            logger.error("PTR optimization failed.")
             return np.nan, np.nan, np.nan, np.nan
         elif not filter_reads:
             return log2_ptr, ori, ter, -best_f
         else:
             return log2_ptr, ori, ter, -best_f, qc_result
 
-
     def update_ori_ter(self, log2_ptrs, read_locs_list):
         """Compute maximum likelihood ori estimates across samples
         given estimated log2(PTR)s across samples.
@@ -692,8 +806,10 @@ def update_ori_ter(self, log2_ptrs, read_locs_list):
 
         """
         np.seterr(invalid="ignore")
-        xs = [0.1*i for i in range(10)]
-        f = lambda x: -self.compute_multisample_log_likelihood(x[0], (x[0] + 0.5 ) % 1, log2_ptrs, read_locs_list)
+        xs = [0.1 * i for i in range(10)]
+        f = lambda x: -self.compute_multisample_log_likelihood(
+            x[0], (x[0] + 0.5) % 1, log2_ptrs, read_locs_list
+        )
 
         best_x = None
         best_f = np.inf
@@ -705,13 +821,12 @@ def update_ori_ter(self, log2_ptrs, read_locs_list):
                 best_f = res.fun
 
         np.seterr(invalid="warn")
-        return best_x, (best_x+0.5)%1
-
+        return best_x, (best_x + 0.5) % 1
 
     def update_ptrs(self, ori, ter, prv_log2_ptrs, read_locs_list):
         """Compute maximum likelihood PTR estimates across samples
         given an estimated replication origin and terminus.
-        
+
         Parameters
         ----------
             ori : float
@@ -731,14 +846,13 @@ def update_ptrs(self, ori, ter, prv_log2_ptrs, read_locs_list):
 
         np.seterr(invalid="ignore")
         log2_ptrs = []
-        for i,read_locs in enumerate(read_locs_list):
+        for i, read_locs in enumerate(read_locs_list):
             f = lambda x: -self.compute_log_likelihood(ori, ter, x, read_locs)
             res = scipy.optimize.minimize(f, prv_log2_ptrs[i], method="SLSQP")
             log2_ptrs.append(res.x[0])
         np.seterr(invalid="warn")
         return log2_ptrs
 
-
     def estimate_ptrs(self, coverage_maps):
         """Compute maximum likelihood PTR estimates across samples.
 
@@ -758,26 +872,46 @@ def estimate_ptrs(self, coverage_maps):
         sample_ids = []
         read_positions_list = []
         lengths = []
+        filtered_genomes_lengths = []
+        coord_maps = []
         genome_id = coverage_maps[0].genome_id
         for cm in coverage_maps:
-            read_positions, ref_genome_len, qc_result = rf.filter_reads(cm.read_positions, cm.length)
+            read_positions, ref_genome_len, qc_result, coord_map = rf.filter_reads(
+                cm.read_positions, cm.length
+            )
+            coord_maps.append(coord_map)
+            filtered_genomes_lengths.append(ref_genome_len)
             if qc_result.passed_qc:
                 read_positions_list.append(np.array(read_positions))
                 lengths.append(ref_genome_len)
                 sample_ids.append(cm.sample_id)
 
-            estimates.append(CoPTRRefEstimate(cm.bam_file, cm.genome_id, cm.sample_id, np.nan, np.nan, np.nan, qc_result.nreads, qc_result.frac_nonzero))
+            estimates.append(
+                CoPTRRefEstimate(
+                    cm.bam_file,
+                    cm.genome_id,
+                    cm.sample_id,
+                    np.nan,
+                    np.nan,
+                    np.nan,
+                    qc_result.nreads,
+                    qc_result.frac_nonzero,
+                    np.nan
+                )
+            )
 
         if len(sample_ids) == 0:
             return estimates
 
-        print_info("CoPTRRef", "running {}".format(genome_id))
+        logger.info("Running %s.", genome_id)
 
         # first, compute individiual log2_ptr, ori, ter estimates
         log2_ptrs = []
         read_locs_list = []
-        for read_positions,length in zip(read_positions_list, lengths):
-            log2_ptr, ori, ter, f = self.estimate_ptr(read_positions, length, filter_reads=False)
+        for read_positions, length in zip(read_positions_list, lengths):
+            log2_ptr, ori, ter, f = self.estimate_ptr(
+                read_positions, length, filter_reads=False
+            )
             log2_ptrs.append(log2_ptr)
             read_locs_list.append(np.sort(read_positions / length))
 
@@ -788,24 +922,25 @@ def estimate_ptrs(self, coverage_maps):
             log2_ptrs = self.update_ptrs(ori, ter, log2_ptrs, read_locs_list)
 
         n = 0
-        for i,cm in enumerate(coverage_maps):
+        for i, cm in enumerate(coverage_maps):
             if cm.sample_id in sample_ids:
                 # sanity check that the ptr estimate is good
                 if np.abs(log2_ptrs[n] - 4) < 1e-4:
                     estimates[i].ori_estimate = ori
                     estimates[i].ter_estimate = ter
                     estimates[i].estimate = np.nan
+                    estimates[i].ori_estimate_coord = coord_maps[i].translate(ori*filtered_genomes_lengths[i])
                 else:
                     estimates[i].ori_estimate = ori
                     estimates[i].ter_estimate = ter
                     estimates[i].estimate = log2_ptrs[n]
+                    estimates[i].ori_estimate_coord = coord_maps[i].translate(ori*filtered_genomes_lengths[i])
                 n += 1
 
-        print_info("CoPTRRef", "finished {}".format(genome_id))
+        logger.info("Finished %s.", genome_id)
 
         return estimates
 
-
     def _parallel_helper(self, x):
         ref_genome = x[0]
         coverage_maps = x[1]
@@ -813,8 +948,14 @@ def _parallel_helper(self, x):
         return (ref_genome, self.estimate_ptrs(coverage_maps))
 
 
-def plot_fit(coptr_ref_est, read_positions, genome_length, min_reads, min_cov, plot_folder):
-    import matplotlib.pyplot as plt
+def plot_fit(
+    coptr_ref_est, read_positions, genome_length, min_reads, min_cov, plot_folder
+):
+    try:
+        import matplotlib.pyplot as plt
+    except ModuleNotFoundError:
+        logger.critical("Please install matplotlib to enable plotting.")
+        raise
 
     coptr = CoPTRRef(min_reads, min_cov)
     rf = ReadFilterRef(min_reads, min_cov)
@@ -822,7 +963,7 @@ def plot_fit(coptr_ref_est, read_positions, genome_length, min_reads, min_cov, p
     rp = np.array(read_positions)
     length = genome_length
 
-    fig,ax = plt.subplots(nrows=2,ncols=1, figsize=(9,6))
+    fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(9, 6))
 
     # first plot unfiltered bins
     bin_size = rf.compute_bin_size(genome_length)
@@ -832,9 +973,9 @@ def plot_fit(coptr_ref_est, read_positions, genome_length, min_reads, min_cov, p
     lower_bound, upper_bound = rf.compute_genomewide_bounds(binned_counts)
 
     min_y = np.min(binned_counts[binned_counts != 0])
-    min_y -= 0.5*min_y
-    max_y = np.max(binned_counts[binned_counts != 0]) + 0.008*binned_counts.sum()
-    
+    min_y -= 0.5 * min_y
+    max_y = np.max(binned_counts[binned_counts != 0]) + 0.008 * binned_counts.sum()
+
     # plot bins
     ax[0].scatter(x, binned_counts)
     ax[0].set_yscale("log", base=2)
@@ -842,9 +983,13 @@ def plot_fit(coptr_ref_est, read_positions, genome_length, min_reads, min_cov, p
     y_lim = ax[0].get_ylim()
 
     # plot upper and lower bounds
-    ax[0].plot(x, median*np.ones(binned_counts.size), c="red", linewidth=3)
-    ax[0].plot(x, lower_bound*np.ones(binned_counts.size), c="red", ls=":", linewidth=3)
-    ax[0].plot(x, upper_bound*np.ones(binned_counts.size), c="red", ls=":", linewidth=3)
+    ax[0].plot(x, median * np.ones(binned_counts.size), c="red", linewidth=3)
+    ax[0].plot(
+        x, lower_bound * np.ones(binned_counts.size), c="red", ls=":", linewidth=3
+    )
+    ax[0].plot(
+        x, upper_bound * np.ones(binned_counts.size), c="red", ls=":", linewidth=3
+    )
 
     ax[0].set_ylabel("Read Count")
     ax[0].set_xlabel("Position Along Genome")
@@ -858,25 +1003,40 @@ def plot_fit(coptr_ref_est, read_positions, genome_length, min_reads, min_cov, p
     ax[1].scatter(x, probs, c="C1")
     ax[1].set_yscale("log", base=2)
     min_y = np.min(probs[probs != 0])
-    min_y -= 0.5*min_y
+    min_y -= 0.5 * min_y
     max_y = np.max(probs) + 0.008
     ax[1].set_ylim((min_y, max_y))
 
     # plot fit
-    y = [  coptr.compute_log_likelihood(coptr_ref_est.ori_estimate, coptr_ref_est.ter_estimate, coptr_ref_est.estimate, xi) 
-           for xi in x ]
-    y = np.power(2, y)*(x[1] - x[0])
+    y = [
+        coptr.compute_log_likelihood(
+            coptr_ref_est.ori_estimate,
+            coptr_ref_est.ter_estimate,
+            coptr_ref_est.estimate,
+            xi,
+        )
+        for xi in x
+    ]
+    y = np.power(2, y) * (x[1] - x[0])
     ax[1].plot(x, y, c="black", linewidth=3)
     ax[1].set_ylabel("Density")
     ax[1].set_xlabel("Position Along Genome")
-    ax[1].set_title("\nlog2(PTR)={:.3f} (Reads = {})".format(coptr_ref_est.estimate, int(binned_counts.sum())))
+    ax[1].set_title(
+        "\nlog2(PTR)={:.3f} (Reads = {})".format(
+            coptr_ref_est.estimate, int(binned_counts.sum())
+        )
+    )
 
     plt.tight_layout()
-    plt.savefig(os.path.join(plot_folder, coptr_ref_est.sample_id + "-" + coptr_ref_est.genome_id + ".pdf"))
+    plt.savefig(
+        os.path.join(
+            plot_folder,
+            coptr_ref_est.sample_id + "-" + coptr_ref_est.genome_id + ".pdf",
+        )
+    )
     plt.close()
 
 
-
 def load_coverage_maps(file_handle, ref_id):
     coverage_maps = []
     try:
@@ -888,8 +1048,14 @@ def load_coverage_maps(file_handle, ref_id):
     return coverage_maps
 
 
-
-def estimate_ptrs_coptr_ref(ref_genome_ids, grouped_coverage_map_folder, min_reads, min_cov, threads, plot_folder=None, mem_limit=None):
+def estimate_ptrs_coptr_ref(
+    ref_genome_ids,
+    grouped_coverage_map_folder,
+    min_reads,
+    min_cov,
+    plot_folder=None,
+    mem_limit=None,
+):
     """Estimate Peak-to-Trough ratios across samples.
 
     Parameters
@@ -917,42 +1083,28 @@ def estimate_ptrs_coptr_ref(ref_genome_ids, grouped_coverage_map_folder, min_rea
             A dictionary with key reference genome id and value
             a list of CoPTRRefEstimate for that reference genome.
     """
-    print_info("CoPTRRef", "checking reference genomes")
+    logger.info("Checking reference genomes.")
     coptr_ref_estimates = {}
     coptr_ref = CoPTRRef(min_reads, min_cov)
 
-    if threads > 1:
-        # workers for multiprocessing
-        pool = mp.Pool(threads)
-        coverage_maps = {}
-
-        for i,ref_id in enumerate(sorted(ref_genome_ids)):
-            with open(os.path.join(grouped_coverage_map_folder, ref_id + ".cm.pkl"), "rb") as f:
-                coverage_maps[ref_id] = load_coverage_maps(f, ref_id)
-
-            if (i % threads == 0 and i > 0) or i == len(ref_genome_ids) - 1:
-                flat_coverage_maps = [(ref_id, coverage_maps[ref_id]) for ref_id in coverage_maps]
-                flat_results = pool.map(coptr_ref._parallel_helper, flat_coverage_maps)
-                coptr_ref_estimates.update(dict(flat_results))
-
-                for ref_id in sorted(coverage_maps):
-                    for sample,est in zip(coverage_maps[ref_id], coptr_ref_estimates[ref_id]):
-                         # only plot samples with a PTR estimate
-                         if plot_folder is not None and not np.isnan(est.estimate):
-                            plot_fit(est, sample.read_positions, sample.length, min_reads, min_cov, plot_folder)
-
-                coverage_maps = {}
-
-    else:
-        for ref_id in sorted(ref_genome_ids):
-            with open(os.path.join(grouped_coverage_map_folder, ref_id + ".cm.pkl"), "rb") as f:
-                coverage_maps = load_coverage_maps(f, ref_id)
-
-            coptr_ref_estimates[ref_id] = coptr_ref.estimate_ptrs(coverage_maps)
-
-            for sample,est in zip(coverage_maps, coptr_ref_estimates[ref_id]):
-                 # only plot samples with a PTR estimate
-                 if plot_folder is not None and not np.isnan(est.estimate):
-                    plot_fit(est, sample.read_positions, sample.length, min_reads, min_cov, plot_folder)
-
-    return coptr_ref_estimates
\ No newline at end of file
+    for ref_id in sorted(ref_genome_ids):
+        with open(
+            os.path.join(grouped_coverage_map_folder, ref_id + ".cm.pkl"), "rb"
+        ) as f:
+            coverage_maps = load_coverage_maps(f, ref_id)
+
+        coptr_ref_estimates[ref_id] = coptr_ref.estimate_ptrs(coverage_maps)
+
+        for sample, est in zip(coverage_maps, coptr_ref_estimates[ref_id]):
+            # only plot samples with a PTR estimate
+            if plot_folder is not None and not np.isnan(est.estimate):
+                plot_fit(
+                    est,
+                    sample.read_positions,
+                    sample.length,
+                    min_reads,
+                    min_cov,
+                    plot_folder,
+                )
+
+    return coptr_ref_estimates
diff --git a/src/poisson_pca.py b/src/coptr/poisson_pca.py
similarity index 76%
rename from src/poisson_pca.py
rename to src/coptr/poisson_pca.py
index f45b1f5..c5add7a 100644
--- a/src/poisson_pca.py
+++ b/src/coptr/poisson_pca.py
@@ -21,15 +21,19 @@
 along with CoPTR.  If not, see <https://www.gnu.org/licenses/>.
 """
 
+import logging
+
 import numpy as np
 import scipy.optimize
-import sys
+
+
+logger = logging.getLogger(__name__)
+
 
 class PoissonPCA:
-    """Principal Component Analysis with Poisson observations.
-    """
+    """Principal Component Analysis with Poisson observations."""
 
-    def pca(self, X, k, tol=1E-3, verbose=False):
+    def pca(self, X, k, tol=1e-3):
         """Run the PCA. For a data matrix X (sample by observations), finds two matrices W and V
         such that:
 
@@ -46,34 +50,31 @@ def pca(self, X, k, tol=1E-3, verbose=False):
         """
         assert k <= X.shape[1], "k must be smaller than the number of columns of X"
         assert k > 0 and k == int(k), "k must be a positive integer"
-        assert np.all(X[np.isfinite(X)] >= 0), "X must have nonnegative entries or be missing"
+        assert np.all(
+            X[np.isfinite(X)] >= 0
+        ), "X must have nonnegative entries or be missing"
         X = X.astype(float)
 
         # use PCA to initialize matrices W,V
         # initializing missing values to mean
         Xt = np.copy(X)
         Xt[Xt == 0] = 0.1
-        for i,row in enumerate(Xt):
+        for i, row in enumerate(Xt):
             if np.isnan(row).sum() > 0:
-                Xt[i,np.isnan(row)] = np.mean(row[np.isfinite(row)])
-        u,s,vh = np.linalg.svd(np.log2(Xt))
-        u = u[:,:k]
+                Xt[i, np.isnan(row)] = np.mean(row[np.isfinite(row)])
+        u, s, vh = np.linalg.svd(np.log2(Xt))
+        u = u[:, :k]
         s = s[:k]
-        vh = vh[:k,:]
-        W = u*s
+        vh = vh[:k, :]
+        W = u * s
         V = vh
-        # W = np.random.normal(loc=0, scale=0.1, size=(X.shape[0], k))
-        # V = np.random.normal(loc=0, scale=0.1, size=(k, X.shape[1]))
-        # W = np.zeros((X.shape[0], k))
-        # V = np.zeros((k, X.shape[1]))
 
         it = 0
         prv_log_lk = -np.inf
         log_lk = self.log_likelihood(X, W, V)
         while np.abs(prv_log_lk - log_lk) > tol:
 
-            if verbose:
-                print("iteration:", it, "log likelihood:", log_lk, file=sys.stderr)
+            logger.debug("Iteration: %d; Log likelihood: %.3G", it, log_lk)
 
             prv_log_lk = log_lk
             W = self.update_W(X, W, V)
@@ -82,9 +83,10 @@ def pca(self, X, k, tol=1E-3, verbose=False):
 
             it += 1
 
-            assert prv_log_lk <= log_lk or np.abs(prv_log_lk - log_lk) < 1E-6, "log likelihood must be increasing ({} > {})".format(prv_log_lk, log_lk)
-        return W,V
-
+            assert (
+                prv_log_lk <= log_lk or np.abs(prv_log_lk - log_lk) < 1e-6
+            ), "log likelihood must be increasing ({} > {})".format(prv_log_lk, log_lk)
+        return W, V
 
     def update_W(self, X, W, V):
         """Optimize the log-likelihood with respect to W.
@@ -98,15 +100,20 @@ def update_W(self, X, W, V):
             V : np.array
                 The current estimate of V
         """
-        loss_fn = lambda Wx : -self.log_likelihood(X, Wx.reshape(X.shape[0], V.shape[0]), V)
-        grad_fn = lambda Wx : -self.grad_W(X, Wx.reshape(X.shape[0], V.shape[0]), V).flatten()
+        loss_fn = lambda Wx: -self.log_likelihood(
+            X, Wx.reshape(X.shape[0], V.shape[0]), V
+        )
+        grad_fn = lambda Wx: -self.grad_W(
+            X, Wx.reshape(X.shape[0], V.shape[0]), V
+        ).flatten()
         # these are handled by the minimizer
         np.seterr(over="ignore")
-        res = scipy.optimize.minimize(loss_fn, np.copy(W).flatten(), jac=grad_fn, method="CG")
+        res = scipy.optimize.minimize(
+            loss_fn, np.copy(W).flatten(), jac=grad_fn, method="CG"
+        )
         np.seterr(over="warn")
         return res.x.reshape(X.shape[0], V.shape[0])
 
-
     def update_V(self, X, W, V):
         """Optimize the log-likelihood with respect to V.
 
@@ -121,7 +128,6 @@ def update_V(self, X, W, V):
         """
         return self.update_W(X.T, V.T, W.T).T
 
-
     def log_likelihood(self, X, W, V):
         """Compute the log likelihood (minus a constant).
 
@@ -135,8 +141,7 @@ def log_likelihood(self, X, W, V):
                 A k x m matrix
         """
         param = np.exp(W.dot(V))
-        return (X*W.dot(V) - param)[np.isfinite(X)].sum()
-
+        return (X * W.dot(V) - param)[np.isfinite(X)].sum()
 
     def grad_W(self, X, W, V):
         """Compute the gradient of the log likelihood wrt W.
@@ -164,11 +169,11 @@ def grad_W(self, X, W, V):
 
     # generate 100 random test sets
     for i in range(100):
-        print("running test set", i+1, "of", 100)
+        print("running test set", i + 1, "of", 100)
         n = np.random.randint(10, 100)
         m = np.random.randint(5, n)
         k = np.random.randint(1, np.min((m, 10)))
-        print("\tn {} m {} k {}".format(n,m,k))
+        print("\tn {} m {} k {}".format(n, m, k))
 
         # generate test data
         W = np.random.normal(loc=0, scale=1, size=(n, k))
@@ -176,13 +181,12 @@ def grad_W(self, X, W, V):
 
         X = np.random.poisson(np.exp(W.dot(V))).astype(float)
 
-        coords = [(i,j) for i in range(n) for j in range(m)]
-        nmissing = int(0.25*X.size)
+        coords = [(i, j) for i in range(n) for j in range(m)]
+        nmissing = int(0.25 * X.size)
         for l in range(nmissing):
             coord = coords[np.random.randint(len(coords))]
             X[coord] = np.nan
 
-
         X_finite = X[np.isfinite(X)]
         print("\tmin {}".format(X_finite.min()))
         print("\tmax {}".format(X_finite.max()))
diff --git a/src/read_assigner.py b/src/coptr/read_assigner.py
similarity index 70%
rename from src/read_assigner.py
rename to src/coptr/read_assigner.py
index 19f71d6..dd2e41a 100644
--- a/src/read_assigner.py
+++ b/src/coptr/read_assigner.py
@@ -22,15 +22,17 @@
 along with CoPTR.  If not, see <https://www.gnu.org/licenses/>.
 """
 
+import logging
+
 import numpy as np
+import scipy.misc
 import scipy.special
 import scipy.stats
-import scipy.misc
-
+from scipy.sparse import csr_matrix
 from scipy.special import digamma
-from scipy.special import loggamma
 
-from scipy.sparse import csr_matrix
+
+logger = logging.getLogger(__name__)
 
 
 class ReadAssigner(object):
@@ -47,7 +49,9 @@ def __init__(self, X, prior_counts):
         self.X = csr_matrix(X)
         self.nreads = X.shape[0]
         self.ngenomes = X.shape[1]
-        assert np.all(self.X.sum(axis=1) > 0), "each read must have at least one valid mapping"
+        assert np.all(
+            self.X.sum(axis=1) > 0
+        ), "each read must have at least one valid mapping"
 
         # set alpha based on unambiguously assigned reads
         self.alpha = np.ones(self.ngenomes) + prior_counts
@@ -56,14 +60,12 @@ def __init__(self, X, prior_counts):
         self.phi = self.X.multiply(1 / self.X.sum(axis=1))
         self.eta = np.copy(self.alpha)
 
-
     def compute_elbo(self):
-        """Compute the variational objective function.
-        """
+        """Compute the variational objective function."""
         elbo = 0
 
         # E_q[log p(pi)]
-        elbo += ((self.alpha - 1)*(digamma(self.eta) - digamma(self.eta.sum()))).sum()
+        elbo += ((self.alpha - 1) * (digamma(self.eta) - digamma(self.eta.sum()))).sum()
 
         # E_q[log p(X | Z)]
         elbo += (self.phi.multiply(self.X)).sum()
@@ -75,49 +77,44 @@ def compute_elbo(self):
         elbo += scipy.stats.dirichlet(self.eta).entropy()
 
         # entropy of categorical
-        tmp = (self.phi.data*np.log(self.phi.data))
+        tmp = self.phi.data * np.log(self.phi.data)
         elbo += -tmp.sum()
 
         return elbo
 
-
     def update_phi(self):
-        """Optimize phi with fixed eta.
-        """
+        """Optimize phi with fixed eta."""
         log_phi = self.X.multiply(digamma(self.eta))
-        phi = csr_matrix((np.exp(log_phi.data), (log_phi.row, log_phi.col)), shape=log_phi.shape)
-        self.phi = phi.multiply(1/phi.sum(axis=1))
-
+        phi = csr_matrix(
+            (np.exp(log_phi.data), (log_phi.row, log_phi.col)), shape=log_phi.shape
+        )
+        self.phi = phi.multiply(1 / phi.sum(axis=1))
 
     def update_eta(self):
-        """Optimize eta with fixed phi.
-        """
+        """Optimize eta with fixed phi."""
         self.eta = np.array(self.alpha + self.phi.sum(axis=0))[0]
 
-
-    def run_vi(self, tol=1E-3, verbose=False):
-        """Optimize variational parameters with respect to elbo.
-        """
+    def run_vi(self, tol=1e-3):
+        """Optimize variational parameters with respect to elbo."""
         prv_elbo = -np.inf
         elbo = self.compute_elbo()
 
         it = 0
         while np.abs(prv_elbo - elbo) > tol:
-            if verbose:
-                print("it:", it, "elbo:", elbo)
+            logger.debug("Iteration: %d; elbo: %.3G", it, elbo)
             self.update_eta()
             self.update_phi()
             prv_elbo = elbo
             elbo = self.compute_elbo()
             it += 1
 
-            assert elbo >= prv_elbo - 1E-3, "elbo must be strictly increasing ({} > {})".format(elbo, prv_elbo)
+            assert (
+                elbo >= prv_elbo - 1e-3
+            ), "elbo must be strictly increasing ({} > {})".format(elbo, prv_elbo)
 
-        if verbose:
-            print("it:", it, "elbo:", elbo)
+        logger.debug("Iteration: %d; elbo: %.3G", it, elbo)
 
-
-    def assign_reads(self, verbose=False):
+    def assign_reads(self):
         """Compute read assignments.
 
         Return
@@ -126,7 +123,7 @@ def assign_reads(self, verbose=False):
                 A 1D array. Each entry gives a genome id that is
                 the genome assignment for each read.
         """
-        self.run_vi(verbose=verbose)
+        self.run_vi()
         self.phi = csr_matrix(self.phi)
         assignments = []
         for i in range(self.nreads):
@@ -140,7 +137,7 @@ def assign_reads(self, verbose=False):
 
     # test a large number of cases
     for i in range(100):
-        print("running test set", i+1)
+        print("running test set", i + 1)
         nreads = np.random.randint(10000, 100000)
         ngenomes = np.random.randint(10, 50)
         Z = np.zeros((nreads, ngenomes))
@@ -150,25 +147,30 @@ def assign_reads(self, verbose=False):
 
         for n in range(nreads):
             true_assignment = np.random.randint(5)
-            Z[n,true_assignment] = 1
+            Z[n, true_assignment] = 1
 
         X = np.copy(Z)
-        multimapped_reads = int(0.1*nreads)
+        multimapped_reads = int(0.1 * nreads)
         for n in range(multimapped_reads):
             true_assignment = np.argwhere(Z[n] == 1).flatten()[0]
-            additional_mappings = np.max((1,np.random.poisson(1)))
+            additional_mappings = np.max((1, np.random.poisson(1)))
             for i in range(additional_mappings):
                 mapping_id = np.random.randint(ngenomes)
-                X[n,mapping_id] = 1
-        X = X[:multimapped_reads,:]
+                X[n, mapping_id] = 1
+        X = X[:multimapped_reads, :]
 
-        prior = Z[multimapped_reads:,].sum(axis=0)
+        prior = Z[
+            multimapped_reads:,
+        ].sum(axis=0)
         read_assigner = ReadAssigner(X[:multimapped_reads], prior)
         assignments = read_assigner.assign_reads()
         correct_assignments = 0
-        for i,assignment in enumerate(assignments):
-            if Z[i,assignment] == 1:
+        for i, assignment in enumerate(assignments):
+            if Z[i, assignment] == 1:
                 correct_assignments += 1
-        print("\t{} of reads correctly mapped".format(correct_assignments / len(assignments)))
+        print(
+            "\t{} of reads correctly mapped".format(
+                correct_assignments / len(assignments)
+            )
+        )
     print("done!")
-
diff --git a/src/read_mapper.py b/src/coptr/read_mapper.py
similarity index 53%
rename from src/read_mapper.py
rename to src/coptr/read_mapper.py
index 3ea51fb..b4e2d78 100644
--- a/src/read_mapper.py
+++ b/src/coptr/read_mapper.py
@@ -22,21 +22,23 @@
 along with CoPTR.  If not, see <https://www.gnu.org/licenses/>.
 """
 
-import numpy as np
+import logging
 import os
 import os.path
-import pysam
 import subprocess as sub
-import sys
-import time
+from datetime import datetime, timezone
+from pathlib import Path
+
+import pysam
+
+from .util import get_fastq_name
 
-from src.print import print_error, print_info
-from src.util import get_fastq_name
+
+logger = logging.getLogger(__name__)
 
 
 class ReadMapper:
-    """Wrapper around bowtie2.
-    """
+    """Wrapper around bowtie2."""
 
     def index(self, ref_fasta, index_out, bt2_bmax, bt2_dcv, bt2_threads, bt2_packed):
         """Build a bowtie2 index from ref_fasta.
@@ -57,57 +59,68 @@ def index(self, ref_fasta, index_out, bt2_bmax, bt2_dcv, bt2_threads, bt2_packed
             bt2_packed : str
                 Parameter to pass to bowtie2-build --packed argument.
         """
+        files_found = 0
+        total_size = 0
+        ref_files = []
         if os.path.isfile(ref_fasta):
-            ref_files = [ref_fasta]
-            files_found = 1
-            total_size = os.stat(ref_fasta).st_size
+            ref_files.append(ref_fasta)
+            files_found += 1
+            total_size += os.stat(ref_fasta).st_size
         elif os.path.isdir(ref_fasta):
             valid_ext = [".fasta", ".fa", ".fna"]
             # for multiple fasta files, bowtie2 takes a comma
             # separated list
-            ref_files = []
-            files_found = 0
-            total_size = 0
-            for f in os.listdir(ref_fasta):
-                fname,ext = os.path.splitext(f)
-                fpath = os.path.join(ref_fasta,f)
+            for in_handle in os.listdir(ref_fasta):
+                fname, ext = os.path.splitext(in_handle)
+                fpath = os.path.join(ref_fasta, in_handle)
 
                 if os.path.isfile(fpath) and ext in valid_ext:
                     ref_files.append(fpath)
                     total_size += os.stat(fpath).st_size
-                    files_found += 1
 
         else:
-            print_error("ReadMapper", "index must either be file or folder.")
-
+            logger.error("The index must be either a file or directory.")
 
-        print_info("ReadMapper", "found {} files totaling {:.3f} Gb".format(len(ref_files), total_size / (1024**3)))
+        logger.info(
+            "Found %d files totaling %.3g GB.", len(ref_files), total_size / (1024 ** 3)
+        )
 
-        fna_out = open("coptr-fna-{}.fna".format(time.time()), "w")
-        genomes_out = open("{}.genomes".format(index_out), "w")
-        n_genomes = 0
+        sequence_collection = Path(
+            f"coptr-fna-{datetime.now(timezone.utc).isoformat(timespec='seconds')}.fna"
+        )
+        genomes = Path(f"{index_out}.genomes")
 
-        print_info("ReadMapper", "copying to fasta files to {} with prepended genome ids (filenames)".format(fna_out.name))
+        logger.info(
+            "Copying FASTA files to %s with prepended genome ids (filenames).",
+            str(sequence_collection),
+        )
 
         # assume 1 genome per fasta
         # let's set the filename as the genome identifier
-        for fpath in ref_files:
-            fname = os.path.basename(os.path.splitext(fpath)[0])
-            genomes_out.write(os.path.basename(fname) + "\n")
-            n_genomes += 1
-
-            with open(fpath, "r") as f:
-                for line in f:
-                    if ">" == line[0]:
-                        # prepend the filename as the identifier for the genome
-                        line = line[0] + fname + "|" + line[1:]
-                    fna_out.write(line)
-
-        fna_out.close()
-        genomes_out.close()
-
-        print_info("ReadMapper", "writing {} reference genome ids to {}".format(n_genomes, genomes_out.name))
-        call = ["bowtie2-build", fna_out.name, index_out, "--threads", bt2_threads]
+        n_genomes = 0
+        with sequence_collection.open("w") as out_handle, genomes.open(
+            "w"
+        ) as genomes_handle:
+            for fpath in ref_files:
+                fname = os.path.basename(os.path.splitext(fpath)[0])
+                genomes_handle.write(os.path.basename(fname) + "\n")
+                n_genomes += 1
+
+                with open(fpath, "r") as in_handle:
+                    for line in in_handle:
+                        if line.startswith(">"):
+                            # prepend the filename as the identifier for the genome
+                            line = f">{fname}|{line[1:]}"
+                        out_handle.write(line)
+
+        logger.info("Writing %d reference genome ids to %s.", n_genomes, str(genomes))
+        call = [
+            "bowtie2-build",
+            str(sequence_collection),
+            index_out,
+            "--threads",
+            bt2_threads,
+        ]
         if bt2_bmax is not None:
             call += ["--bmax", bt2_bmax]
         if bt2_dcv is not None:
@@ -116,15 +129,17 @@ def index(self, ref_fasta, index_out, bt2_bmax, bt2_dcv, bt2_threads, bt2_packed
             call += ["--packed"]
 
         try:
-            print_info("ReadMapper", " ".join(call))
+            logger.info("%s", " ".join(call))
             sub.check_call(call)
-            print_info("ReadMapper", "indexed {} fasta files for reference database.".format(files_found))
-        except Exception as e:
-            print(e, file=sys.stderr)
+            logger.info(
+                "Indexed %d FASTA files for the reference database.", len(ref_files)
+            )
+        except Exception as error:
+            logger.error("An error occurred while indexing with bowtie2.")
+            logger.info("Detailed information:", exc_info=error)
         finally:
-            print_info("ReadMapper", "cleaning up {}".format(fna_out.name))
-            sub.check_call(["rm", fna_out.name])
-
+            logger.info("Cleaning up %s.", str(sequence_collection))
+            sequence_collection.unlink()
 
     def map(self, index, inputf, outfolder, paired, threads, bt2_k):
         """Map reads from infile against reference database using bowtie2, then
@@ -147,75 +162,100 @@ def map(self, index, inputf, outfolder, paired, threads, bt2_k):
         """
         bt2_k = str(bt2_k)
 
-        if not os.path.isdir(outfolder):
-            print_error("ReadMapper", "output folder does not exist.")
+        outfolder = Path(outfolder)
+        if not outfolder.is_dir():
+            logger.error("The output directory %s does not exist.", str(outfolder))
 
         if os.path.isfile(inputf):
             bn = os.path.basename(inputf)
-            bn,ext = os.path.splitext(bn)
-            out_sam = os.path.join(outfolder, bn + ".sam")
-            out_bam = os.path.join(outfolder, bn + ".bam")
+            bn, ext = os.path.splitext(bn)
+            out_sam = outfolder / f"{bn}.sam"
+            out_bam = outfolder / f"{bn}.bam"
 
             # first map to a sam file with bowtie2
-            print_info("ReadMapper", "mapping {} to {}".format(inputf, out_sam))
-            call = ["bowtie2", "-x", index, inputf, "--no-unal", "-p", str(threads), "-k", bt2_k]
-            print_info("ReadMapper", " ".join(call))
-            with open(out_sam, "w") as out:
+            logger.info("Mapping {} to {}".format(inputf, str(out_sam)))
+            call = [
+                "bowtie2",
+                "-x",
+                index,
+                inputf,
+                "--no-unal",
+                "-p",
+                str(threads),
+                "-k",
+                bt2_k,
+            ]
+            logger.info(" ".join(call))
+            with out_sam.open("w") as out:
                 sub.check_call(call, stdout=out)
 
             # then convert to a bam file
-            print_info("ReadMapper", "converting {} to {}".format(out_sam, out_bam))
+            logger.info("Converting {} to {}.".format(str(out_sam), str(out_bam)))
             infile = pysam.AlignmentFile(out_sam, "r")
             outfile = pysam.AlignmentFile(out_bam, "wb", template=infile)
-            for s in infile:
-                outfile.write(s)
-            infile.close()
-            outfile.close()
+            try:
+                for s in infile:
+                    outfile.write(s)
+            finally:
+                infile.close()
+                outfile.close()
 
             # now remove sam file
-            print_info("ReadMapper", "cleaning up {}".format(out_sam))
-            call = ["rm", out_sam]
-            sub.check_call(call)
+            logger.info("Cleaning up %s.", str(out_sam))
+            out_sam.unlink()
 
         # single end sequencing
         elif os.path.isdir(inputf) and not paired:
             valid_ext = [".fastq", ".fq", ".gz"]
             files_found = 0
             for f in sorted(os.listdir(inputf)):
-                fname,ext1 = os.path.splitext(f)
+                fname, ext1 = os.path.splitext(f)
                 if ext1 == ".gz":
                     ext2 = fname.split(".")[1]
                 else:
                     ext2 = ""
 
-                fpath = os.path.join(inputf,f)
+                fpath = os.path.join(inputf, f)
                 if ext1 in valid_ext or ext2 in valid_ext:
                     bn = os.path.basename(f)
-                    bn,ext = os.path.splitext(bn)
-                    out_sam = os.path.join(outfolder, get_fastq_name(bn) + ".sam")
-                    out_bam = os.path.join(outfolder, get_fastq_name(bn) + ".bam")
+                    bn, ext = os.path.splitext(bn)
+                    out_sam = outfolder / f"{get_fastq_name(bn)}.sam"
+                    out_bam = outfolder / f"{get_fastq_name(bn)}.bam"
 
                     # map reads with bowtie2
-                    print_info("ReadMapper", "mapping {} to {}".format(fpath, out_sam))
-                    call = ["bowtie2", "-x", index, fpath, "--no-unal", "-p", str(threads), "-k", bt2_k]
-                    print_info("ReadMapper", " ".join(call))
-                    with open(out_sam, "w") as out:
+                    logger.info("Mapping {} to {}".format(inputf, str(out_sam)))
+                    call = [
+                        "bowtie2",
+                        "-x",
+                        index,
+                        fpath,
+                        "--no-unal",
+                        "-p",
+                        str(threads),
+                        "-k",
+                        bt2_k,
+                    ]
+                    logger.info(" ".join(call))
+                    with out_sam.open("w") as out:
                         sub.check_call(call, stdout=out)
 
                     # then convert to a bam file
-                    print_info("ReadMapper", "converting {} to {}".format(out_sam, out_bam))
+                    logger.info(
+                        "Converting {} to {}.".format(str(out_sam), str(out_bam))
+                    )
                     infile = pysam.AlignmentFile(out_sam, "r")
                     outfile = pysam.AlignmentFile(out_bam, "wb", template=infile)
-                    for s in infile:
-                        outfile.write(s)
-                    infile.close()
-                    outfile.close()
+                    try:
+                        for s in infile:
+                            outfile.write(s)
+                    finally:
+                        infile.close()
+                        outfile.close()
 
                     # now remove sam file
-                    print_info("ReadMapper", "cleaning up {}".format(out_sam))
-                    call = ["rm", out_sam]
-                    sub.check_call(call)
-        
+                    logger.info("Cleaning up %s.", str(out_sam))
+                    out_sam.unlink()
+
         # paired end sequencing
         elif os.path.isdir(inputf) and paired:
             valid_ext = [".fastq", "fq", ".gz"]
@@ -223,7 +263,7 @@ def map(self, index, inputf, outfolder, paired, threads, bt2_k):
             # file prefix -> [pair_1, pair_2]
             read_pairs = {}
             for f in sorted(os.listdir(inputf)):
-                fname,ext1 = os.path.splitext(f)
+                fname, ext1 = os.path.splitext(f)
                 if ext1 == ".gz":
                     ext2 = fname.split(".")[1]
                 else:
@@ -248,29 +288,45 @@ def map(self, index, inputf, outfolder, paired, threads, bt2_k):
                 f2 = os.path.join(inputf, read_pairs[pair_name][1])
                 bn = os.path.basename(f1)
                 bn = bn.split("_")[0]
-                out_sam = os.path.join(outfolder, get_fastq_name(bn) + ".sam")
-                out_bam = os.path.join(outfolder, get_fastq_name(bn) + ".bam")
+                out_sam = outfolder / f"{get_fastq_name(bn)}.sam"
+                out_bam = outfolder / f"{get_fastq_name(bn)}.bam"
 
                 # map reads with bowtie2
-                print_info("ReadMapper", "mapping {},{} to {}".format(f1, f2, out_sam))
-                call = ["bowtie2", "-x", index, "-1", f1, "-2", f2, "--no-unal", "-p", str(threads), "-k", bt2_k]
-                print_info("ReadMapper", " ".join(call))
-                with open(out_sam, "w") as out:
+                logger.info("Mapping {} to {}".format(inputf, str(out_sam)))
+                call = [
+                    "bowtie2",
+                    "-x",
+                    index,
+                    "-1",
+                    f1,
+                    "-2",
+                    f2,
+                    "--no-unal",
+                    "-p",
+                    str(threads),
+                    "-k",
+                    bt2_k,
+                ]
+                logger.info(" ".join(call))
+                with out_sam.open("w") as out:
                     sub.check_call(call, stdout=out)
 
                 # then convert to a bam file
-                print_info("ReadMapper", "converting {} to {}".format(out_sam, out_bam))
+                logger.info(
+                    "Converting {} to {}.".format(str(out_sam), str(out_bam))
+                )
                 infile = pysam.AlignmentFile(out_sam, "r")
                 outfile = pysam.AlignmentFile(out_bam, "wb", template=infile)
-                for s in infile:
-                    outfile.write(s)
-                infile.close()
-                outfile.close()
+                try:
+                    for s in infile:
+                        outfile.write(s)
+                finally:
+                    infile.close()
+                    outfile.close()
 
                 # now remove sam file
-                print_info("ReadMapper", "cleaning up {}".format(out_sam))
-                call = ["rm", out_sam]
-                sub.check_call(call)
+                logger.info("Cleaning up %s.", str(out_sam))
+                out_sam.unlink()
 
         else:
-            print_error("ReadMapper", "input must either be a file or folder.")
+            logger.error("Input must be either a file or a folder.")
diff --git a/src/util.py b/src/coptr/util.py
similarity index 92%
rename from src/util.py
rename to src/coptr/util.py
index a3ec115..9a2a5e9 100644
--- a/src/util.py
+++ b/src/coptr/util.py
@@ -23,6 +23,7 @@
 
 import os.path
 
+
 def get_fastq_name(fpath):
     """Remove the path and extension from a fastq file.
 
@@ -35,7 +36,7 @@ def get_fastq_name(fpath):
         fname : str
             the name of the file with its path and extension removeed
     """
-    bn,ex = os.path.splitext(fpath)
+    bn, ex = os.path.splitext(fpath)
     if ex == ".gz":
-        bn,ex = os.path.splitext(bn)
-    return bn
\ No newline at end of file
+        bn, ex = os.path.splitext(bn)
+    return bn
diff --git a/src/print.py b/src/print.py
deleted file mode 100644
index dbc1975..0000000
--- a/src/print.py
+++ /dev/null
@@ -1,69 +0,0 @@
-"""
-print.py
-======================
-Print error, warning, and info messages.
-"""
-
-"""
-This file is part of CoPTR.
-
-CoPTR is free software: you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation, either version 3 of the License, or
-(at your option) any later version.
-
-CoPTR is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with CoPTR.  If not, see <https://www.gnu.org/licenses/>.
-"""
-
-import sys
-import time
-
-def print_error(module, msg, quit=True):
-    """Print an error message to the standard error.
-
-    Parameters
-    ----------
-        module : str
-            Which module generated the error message
-        msg : str
-            The error message itself
-        exit :  bool
-            If TRUE, the program terminates after printing the error
-    """
-    print("[ERROR] ({}) {}: {}".format(time.strftime("%b %d, %Y %X"), module, msg), file=sys.stderr, flush=True)
-    if quit:
-        exit(1)
-
-
-def print_warning(module, msg):
-    """Print a warning message to the standard error.
-    
-    Parameters
-    ----------
-        module : str
-            Which module generated the message
-        msg : str
-            The message itself
-    """
-
-    print("[WARNING] ({}) {}: {}".format(time.strftime("%b %d, %Y %X"), module, msg), file=sys.stderr, flush=True)
-
-
-def print_info(module, msg):
-    """Print an info message to the standard error.
-    
-    Parameters
-    ----------
-        module : str
-            Which module generated the message
-        msg : str
-            The message itself
-    """
-
-    print("[INFO] ({}) {}: {}".format(time.strftime("%b %d, %Y %X"), module, msg), file=sys.stderr, flush=True)
\ No newline at end of file
diff --git a/test/test_bam_processor.py b/test/test_bam_processor.py
index c4c0dc7..cf71312 100644
--- a/test/test_bam_processor.py
+++ b/test/test_bam_processor.py
@@ -1,46 +1,46 @@
 import os
-import pysam
 import unittest
 
-from src.bam_processor import BamProcessor
+import pysam
+
+from coptr.bam_processor import BamProcessor
+
 
 def read_to_dict(read):
     read = read.strip("\n").split()
     read_dict = {
-        "name"          : read[0], 
-        "flag"          : read[1],
-        "ref_name"      : read[2],
-        "ref_pos"       : read[3],
-        "map_quality"   : read[4],
-        "cigar"         : read[5],
-        "next_ref_name" : read[6],
-        "next_ref_pos"  : read[7],
-        "length"        : read[8],
-        "seq"           : read[9],
-        "qual"          : read[10],
-        "tags"          : read[11:]
+        "name": read[0],
+        "flag": read[1],
+        "ref_name": read[2],
+        "ref_pos": read[3],
+        "map_quality": read[4],
+        "cigar": read[5],
+        "next_ref_name": read[6],
+        "next_ref_pos": read[7],
+        "length": read[8],
+        "seq": read[9],
+        "qual": read[10],
+        "tags": read[11:],
     }
     return read_dict
 
 
 class TestBamProcessor(unittest.TestCase):
-
     def setUp(self):
-        header = { 
-            "HD" : {"VN": "1.0", "SO": "unsorted"},
-            "SQ" : 
-                [
-                    {"SN": "ref1|seq1", "LN" : 1000000},
-                    {"SN": "ref1|seq2", "LN" : 1000000},
-                    {"SN": "ref2|seq1", "LN" : 1000000}
-                ]
+        header = {
+            "HD": {"VN": "1.0", "SO": "unsorted"},
+            "SQ": [
+                {"SN": "ref1|seq1", "LN": 1000000},
+                {"SN": "ref1|seq2", "LN": 1000000},
+                {"SN": "ref2|seq1", "LN": 1000000},
+            ],
         }
 
         bam1_reads = [
             "read1     0       ref1|seq1      24975   42      80M     *       0       0       TGGGCCAGAAAAAATGACTTCTCCATCTCGCTGCCGGTAGACCGACTCTCTTTTCTGCTGGCGGTTGCCACGCTGAGCGG        AAAAAF.A.FFAFFFFFAFFFFFFFFFFFFFF<FFFFAFFFFFFA.FFFFA<7FFFFFFFF<FFFFFF))<FFFFF.FFF        AS:i:-3 XN:i:0  XM:i:1  XO:i:0  XG:i:0  NM:i:1  MD:Z:76A3       YT:Z:UU",
             "read2     0       ref1|seq1      20984   42      80M     *       0       0       GTTTAAACAGTTGTTGTTGTTCTTCCTGCGATACTCCACTTCCAGAAGCCATAATCGTCATTTTGATAACAGCGTGGTTG        AAAAA.<FFAFFFFFFF<FFAFF)FFFFF<FFF.FFA)FFAF<F<F<.FF<F.FFAFFF7FAFFF.AF.<)F7FFAAFFF        AS:i:-6 XN:i:0  XM:i:2  XO:i:0  XG:i:0  NM:i:2  MD:Z:33A13T32   YT:Z:UU",
             "read3     0       ref2|seq1       3210    42      80M     *       0       0       ACCTACCACTTCACCGACATATTCATGGCCCACGACCATCGGCACCGGGATGGATTTTTGCGACCACTCATCCCAGTTAT        AAAA7FAFFFFF.FFFFF<FFFAA7FFFFFF7FFFFFFFA<FF7FFAF<F.FF.FFF7FFFFAF<FFFFAFFFFA77FFF        AS:i:-3 XN:i:0  XM:i:1  XO:i:0  XG:i:0  NM:i:1  MD:Z:53T26      YT:Z:UU",
-            "read4     0       ref1|seq2       9298    23      79M     *       0       0       CAGCATCGCTTCCAAAAATAGTAGTGCAGTTGATCGGAGTAGGAGCGTAATGGATTGCCTGCGTGATTGGCTATCTGGC AAAAAF.A.FFAFFFFFAFFFFFFFFFFFFFF<FFFFAFFFFFFA.FFFFA<7FFFFFFFF<FFFFFF))<FFFFF.FF AS:i:-23        XN:i:0  XM:i:6  XO:i:0  XG:i:0  NM:i:6  MD:Z:19T8A0C2T4T10T30   YT:Z:UU"
+            "read4     0       ref1|seq2       9298    23      79M     *       0       0       CAGCATCGCTTCCAAAAATAGTAGTGCAGTTGATCGGAGTAGGAGCGTAATGGATTGCCTGCGTGATTGGCTATCTGGC AAAAAF.A.FFAFFFFFAFFFFFFFFFFFFFF<FFFFAFFFFFFA.FFFFA<7FFFFFFFF<FFFFFF))<FFFFF.FF AS:i:-23        XN:i:0  XM:i:6  XO:i:0  XG:i:0  NM:i:6  MD:Z:19T8A0C2T4T10T30   YT:Z:UU",
         ]
 
         aln_header = pysam.AlignmentHeader().from_dict(header)
@@ -53,7 +53,7 @@ def setUp(self):
 
         bam2_reads = [
             "read1     0       ref2|seq1      24975   50      80M     *       0       0       TGGGCCAGAAAAAATGACTTCTCCATCTCGCTGCCGGTAGACCGACTCTCTTTTCTGCTGGCGGTTGCCACGCTGAGCGG        AAAAAF.A.FFAFFFFFAFFFFFFFFFFFFFF<FFFFAFFFFFFA.FFFFA<7FFFFFFFF<FFFFFF))<FFFFF.FFF        AS:i:0 XN:i:0  XM:i:1  XO:i:0  XG:i:0  NM:i:1  MD:Z:76A3       YT:Z:UU",
-            "read2     0       ref2|seq1      20984   30      80M     *       0       0       GTTTAAACAGTTGTTGTTGTTCTTCCTGCGATACTCCACTTCCAGAAGCCATAATCGTCATTTTGATAACAGCGTGGTTG        AAAAA.<FFAFFFFFFF<FFAFF)FFFFF<FFF.FFA)FFAF<F<F<.FF<F.FFAFFF7FAFFF.AF.<)F7FFAAFFF        AS:i:-12 XN:i:0  XM:i:2  XO:i:0  XG:i:0  NM:i:2  MD:Z:33A13T32   YT:Z:UU"
+            "read2     0       ref2|seq1      20984   30      80M     *       0       0       GTTTAAACAGTTGTTGTTGTTCTTCCTGCGATACTCCACTTCCAGAAGCCATAATCGTCATTTTGATAACAGCGTGGTTG        AAAAA.<FFAFFFFFFF<FFAFF)FFFFF<FFF.FFA)FFAF<F<F<.FF<F.FFAFFF7FAFFF.AF.<)F7FFAAFFF        AS:i:-12 XN:i:0  XM:i:2  XO:i:0  XG:i:0  NM:i:2  MD:Z:33A13T32   YT:Z:UU",
         ]
 
         test_bam2 = pysam.AlignmentFile("test/test_bam2.bam", "wb", header=header)
@@ -62,12 +62,10 @@ def setUp(self):
             test_bam2.write(aln_segment.from_dict(read, aln_header))
         test_bam2.close()
 
-
     def tearDown(self):
         os.remove("test/test_bam1.bam")
         os.remove("test/test_bam2.bam")
 
-
     def test_process_bam(self):
         bam_processor = BamProcessor()
         coverage_maps = bam_processor.process_bam("test/test_bam1.bam")
@@ -84,15 +82,22 @@ def test_process_bam(self):
         self.assertFalse(coverage_maps[genome2].is_assembly)
 
         # check that reads are stored
-        self.assertTrue(3210  - 1 in coverage_maps[genome2].read_positions)
-        self.assertTrue(24975 - 1 in coverage_maps[genome1].contig_read_positions["ref1|seq1"])
-        self.assertTrue(20984 - 1 in coverage_maps[genome1].contig_read_positions["ref1|seq1"])
-        self.assertTrue(9298  - 1 in coverage_maps[genome1].contig_read_positions["ref1|seq2"])
-
+        self.assertTrue(3210 - 1 in coverage_maps[genome2].read_positions)
+        self.assertTrue(
+            24975 - 1 in coverage_maps[genome1].contig_read_positions["ref1|seq1"]
+        )
+        self.assertTrue(
+            20984 - 1 in coverage_maps[genome1].contig_read_positions["ref1|seq1"]
+        )
+        self.assertTrue(
+            9298 - 1 in coverage_maps[genome1].contig_read_positions["ref1|seq2"]
+        )
 
     def test_merge_bam(self):
         bam_processor = BamProcessor()
-        bam_processor.merge(["test/test_bam1.bam", "test/test_bam2.bam"], "test/test_bam_merged.bam")
+        bam_processor.merge(
+            ["test/test_bam1.bam", "test/test_bam2.bam"], "test/test_bam_merged.bam"
+        )
 
         infile = pysam.AlignmentFile("test/test_bam_merged.bam", "rb")
         nreads = 0
@@ -112,4 +117,4 @@ def test_merge_bam(self):
 
 
 if __name__ == "__main__":
-    unittest.main()
\ No newline at end of file
+    unittest.main()
diff --git a/test/test_coordinate_map.py b/test/test_coordinate_map.py
new file mode 100644
index 0000000..a6bac27
--- /dev/null
+++ b/test/test_coordinate_map.py
@@ -0,0 +1,52 @@
+import numpy as np
+import unittest
+
+from coptr.coptr_ref import CoordinateMap
+from coptr.coptr_ref import ReadFilterRef
+
+
+class TestCoordinateMap(unittest.TestCase):
+
+    def test_coordinate_map1(self):
+        rf = ReadFilterRef(0, 0)
+
+        coord = 3
+        read_positions = np.array([coord])
+        genome_length = 10
+        regions = [(0, 1)]
+
+        coord_map = CoordinateMap()
+        read_positions, new_genome_length = rf.remove_reads_by_region(read_positions, genome_length, regions, coord_map)
+
+        coord_map = CoordinateMap()
+        coord_map.update_break_points(regions[0][1], regions[0][1] - regions[0][0])
+
+        unfiltered_coord = coord_map.translate(read_positions[0])
+
+        self.assertTrue(unfiltered_coord == coord)
+
+    def test_coordinate_map2(self):
+
+        rf = ReadFilterRef(0, 1)
+
+        coordinates = [i for i in range(6, 25)] + \
+                      [i for i in range(28, 30)] + \
+                      [i for i in range(46, 60)] + \
+                      [i for i in range(68, 83)] + \
+                      [i for i in range(90, 100)]
+        read_positions = np.array(coordinates)
+        genome_length = 100
+        regions = [(1, 5), (25, 27), (30, 45), (60, 67), (83, 89)]
+
+        coord_map = CoordinateMap()
+        new_read_positions, new_genome_length = rf.remove_reads_by_region(read_positions, genome_length, regions, coord_map)
+        self.assertTrue(new_read_positions.size == read_positions.size)
+
+        for i,filtered_coord in enumerate(new_read_positions):
+            unfiltered_coord = coord_map.translate(filtered_coord)
+            self.assertTrue(unfiltered_coord == coordinates[i])
+
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/test/test_coptr.py b/test/test_coptr.py
index 8c254b6..c500193 100644
--- a/test/test_coptr.py
+++ b/test/test_coptr.py
@@ -1,14 +1,15 @@
-import numpy as np
 import pickle as pkl
-import scipy.stats
 import unittest
 
-from src.bam_processor import CoverageMapContig
-from src.coptr_ref import CoPTRRef
-from src.coptr_contig import CoPTRContig
+import numpy as np
+import scipy.stats
 
-class TestCoPTR(unittest.TestCase):
+from coptr.bam_processor import CoverageMapContig
+from coptr.coptr_contig import CoPTRContig
+from coptr.coptr_ref import CoPTRRef
 
+
+class TestCoPTR(unittest.TestCase):
     def test_coptr(self):
         print("running test simulations for CoPTRRef and CoPTRContig...")
         np.random.seed(22721)
@@ -24,19 +25,32 @@ def test_coptr(self):
         coverage_map_contig = []
         ori_pos = np.random.random()
         for i in range(20):
-            print("\tsimulation", i+1, "of", 20)
-            read_positions, ptr, ori_pos, ter_pos = simulator.simulate_reads(20000, ori_pos=ori_pos)
+            print("\tsimulation", i + 1, "of", 20)
+            read_positions, ptr, ori_pos, ter_pos = simulator.simulate_reads(
+                20000, ori_pos=ori_pos
+            )
+            read_positions = read_positions.astype(int)
             sim_log2_ptrs.append(np.log2(ptr))
 
-            est_log2_ptr, ori, ter, best_f, qc_result = coptr_ref.estimate_ptr(read_positions, simulator.get_genome_length())
+            est_log2_ptr, ori, ter, best_f, qc_result = coptr_ref.estimate_ptr(
+                read_positions, simulator.get_genome_length()
+            )
             coptr_ref_results.append(est_log2_ptr)
 
             coverage_map_contig.append(
-                CoverageMapContig("{}.bam".format(i), "E-coli-simulation", {"e-coli": read_positions}, {"e-coli": simulator.get_genome_length()})
+                CoverageMapContig(
+                    "{}.bam".format(i),
+                    "E-coli-simulation",
+                    {"e-coli": read_positions},
+                    {"e-coli": simulator.get_genome_length()},
+                )
             )
 
             if len(coverage_map_contig) % 10 == 0:
-                coptr_contig_estimates = sorted(coptr_contig.estimate_ptrs(coverage_map_contig), key=lambda e: e.bam_file)
+                coptr_contig_estimates = sorted(
+                    coptr_contig.estimate_ptrs(coverage_map_contig)[0],
+                    key=lambda e: e.bam_file,
+                )
                 coptr_contig_results += [e.estimate for e in coptr_contig_estimates]
                 coverage_map_contig = []
                 ori_pos = np.random.random()
@@ -48,30 +62,24 @@ def test_coptr(self):
         self.assertTrue(corr_contig > 0.9)
 
 
-
-
 class Simulator:
-    """Simulate sequencing reads using the density of reads along an E. coli genome.
-    """
+    """Simulate sequencing reads using the density of reads along an E. coli genome."""
 
     def __init__(self):
         f = open("test/e-coli-NZ_CP011495.1.pkl", "rb")
         read_density_map, ptr = pkl.load(f)
         f.close()
 
-        self.bin_edges = read_density_map[:,0]
-        self.probs = read_density_map[:,1] # left bin edges
+        self.bin_edges = read_density_map[:, 0]
+        self.probs = read_density_map[:, 1]  # left bin edges
         self.min_log2_ptr = 0.05
         self.max_log2_ptr = 1.25
 
-
     def get_genome_length(self):
-        """Return the number of bases in the genome.
-        """
-        return 100*self.probs.size
-
+        """Return the number of bases in the genome."""
+        return 100 * self.probs.size
 
-    def simulate_reads(self, nreads, ptr = None, ori_pos = None):
+    def simulate_reads(self, nreads, ptr=None, ori_pos=None):
         """Simulate nreads along 100bp bins with specified ptr and
         origin position. If ptr or ori_pos are None, they are chosen
         randomly.
@@ -98,12 +106,14 @@ def simulate_reads(self, nreads, ptr = None, ori_pos = None):
                 The simulated replication terminus position as a fraction along the genome
         """
         if ptr is None:
-            ptr = np.random.random()*(self.max_log2_ptr - self.min_log2_ptr) + self.min_log2_ptr
-            ptr = 2**ptr
+            ptr = (
+                np.random.random() * (self.max_log2_ptr - self.min_log2_ptr)
+                + self.min_log2_ptr
+            )
+            ptr = 2 ** ptr
         else:
             assert np.log2(ptr) > self.min_log2_ptr, "ptr < 2^0.5 may be unreliable"
 
-
         if ori_pos is None:
             ori_pos = np.random.random()
         else:
@@ -117,14 +127,13 @@ def simulate_reads(self, nreads, ptr = None, ori_pos = None):
 
         # convert counts to coordinates
         read_positions = []
-        for i,c in enumerate(binned_counts):
-            size = binned_counts.size 
-            m = (0.5*(2*i + 1)/size)
+        for i, c in enumerate(binned_counts):
+            size = binned_counts.size
+            m = 0.5 * (2 * i + 1) / size
             read_positions += [m for i in range(c)]
-        read_positions = self.get_genome_length()*np.array(read_positions)
-
-        return read_positions, ptr, ori_pos, ter_pos # 100bp bins
+        read_positions = self.get_genome_length() * np.array(read_positions)
 
+        return read_positions, ptr, ori_pos, ter_pos  # 100bp bins
 
     def adjust_read_probs(self, ptr, ori_pos, ter_pos):
         """Scale bin probabilities based on the PTR.
@@ -147,16 +156,15 @@ def adjust_read_probs(self, ptr, ori_pos, ter_pos):
             # (bin_edges[i], bin_edges[i+1]), so let's
             # take the midpoint of each bin
             if i == self.bin_edges.size - 1:
-                m = 0.5*(self.bin_edges[i] + 1)
-                length = (1 - self.bin_edges[i])
+                m = 0.5 * (self.bin_edges[i] + 1)
+                length = 1 - self.bin_edges[i]
             else:
-                m = 0.5*(self.bin_edges[i] + self.bin_edges[i+1])
-                length = (self.bin_edges[i+1] - self.bin_edges[i])
+                m = 0.5 * (self.bin_edges[i] + self.bin_edges[i + 1])
+                length = self.bin_edges[i + 1] - self.bin_edges[i]
 
             x1 = np.min([ori_pos, ter_pos])
             x2 = np.max([ori_pos, ter_pos])
 
-
             # since we normalize bins at the end,
             # we can compute c1 and c2 + a constant
             # in this case we can use c - log p(x_t)
@@ -168,11 +176,11 @@ def adjust_read_probs(self, ptr, ori_pos, ter_pos):
                 c2 = np.log2(ptr)
 
             if m <= x1:
-                adj_probs[i] = -alpha*(m - x1) + c1
+                adj_probs[i] = -alpha * (m - x1) + c1
             elif x1 < m and m < x2:
-                adj_probs[i] = alpha*(m - x1) + c1
+                adj_probs[i] = alpha * (m - x1) + c1
             else:
-                adj_probs[i] = -alpha*(m - x2) + c2
+                adj_probs[i] = -alpha * (m - x2) + c2
             adj_probs[i] += np.log2(length)
 
         # normalize
@@ -183,8 +191,12 @@ def adjust_read_probs(self, ptr, ori_pos, ter_pos):
 
         # reweight and normalize
         new_probs = np.zeros(self.probs.size)
-        new_probs[self.probs != 0] = np.log(self.probs[self.probs != 0]) + adj_probs[self.probs != 0]
-        new_probs[self.probs != 0] -= scipy.special.logsumexp(new_probs[self.probs != 0])
+        new_probs[self.probs != 0] = (
+            np.log(self.probs[self.probs != 0]) + adj_probs[self.probs != 0]
+        )
+        new_probs[self.probs != 0] -= scipy.special.logsumexp(
+            new_probs[self.probs != 0]
+        )
         new_probs[self.probs != 0] = np.exp(new_probs[self.probs != 0])
 
-        return new_probs
\ No newline at end of file
+        return new_probs