From 0ed871020dc09b9824aa210ed82646d58ca7731c Mon Sep 17 00:00:00 2001 From: Andrew Robbins Date: Fri, 30 Aug 2024 19:22:42 -0400 Subject: [PATCH 1/2] generate tabix in working directory if not present --- ALLCools/count_matrix/dataset.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/ALLCools/count_matrix/dataset.py b/ALLCools/count_matrix/dataset.py index 8106811..b006003 100644 --- a/ALLCools/count_matrix/dataset.py +++ b/ALLCools/count_matrix/dataset.py @@ -164,7 +164,14 @@ def _count_single_region_set(allc_table, region_config, obs_dim, region_dim): total_data = [] for sample, allc_path in allc_table.items(): - with pysam.TabixFile(allc_path) as allc: + try: + allc = pysam.TabixFile(allc_path) + except IOError: + local_tabix = "./" + pathlib.Path(allc_path).name + ".tbi" + local_compressed = pysam.tabix_index(filename=allc_path, index=local_tabix, + seq_col=0, start_col=1, end_col=1) + allc = pysam.TabixFile(filename=local_compressed, index=local_tabix) + with allc: region_ids = [] sample_data = [] region_chunks = pd.read_csv(region_config["regions"], index_col=0, chunksize=1000) From 498faeae011815484563acf23e5e4a5433363593 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 30 Aug 2024 23:26:45 +0000 Subject: [PATCH 2/2] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- ALLCools/count_matrix/dataset.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/ALLCools/count_matrix/dataset.py b/ALLCools/count_matrix/dataset.py index b006003..ad03caa 100644 --- a/ALLCools/count_matrix/dataset.py +++ b/ALLCools/count_matrix/dataset.py @@ -166,10 +166,11 @@ def _count_single_region_set(allc_table, region_config, obs_dim, region_dim): for sample, allc_path in allc_table.items(): try: allc = pysam.TabixFile(allc_path) - except IOError: + except OSError: local_tabix = "./" + pathlib.Path(allc_path).name + ".tbi" - local_compressed = pysam.tabix_index(filename=allc_path, index=local_tabix, - seq_col=0, start_col=1, end_col=1) + local_compressed = pysam.tabix_index( + filename=allc_path, index=local_tabix, seq_col=0, start_col=1, end_col=1 + ) allc = pysam.TabixFile(filename=local_compressed, index=local_tabix) with allc: region_ids = []