From ba17bc24e8553473d9fd30c4b9f8f2052381c52e Mon Sep 17 00:00:00 2001 From: rvanroey <80913877+rvanroey@users.noreply.github.com> Date: Sat, 22 May 2021 11:31:18 +0200 Subject: [PATCH] Fixed Alignment issues The reason that pinaplpy does not find any successful reads is because it prioritises the .sam files over .bam files. I've changed the code a bit to fix the issue temporarily (very sloppy code) and indexed the .bam file with Pysam. Currently, pinaplpy does not find any successful reads, however the bam files do look ok --- bin/GetReadCounts.py | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/bin/GetReadCounts.py b/bin/GetReadCounts.py index 3b52237..f6d28dc 100755 --- a/bin/GetReadCounts.py +++ b/bin/GetReadCounts.py @@ -105,17 +105,25 @@ def CountReads(sample): print('Loading alignment ...') # CLASSIFY ALIGNMENTS # os.chdir(AlnDir) - sam_file = ReadsFilename0 + '.sam' - sam_file_present = path.exists(sam_file) - bam_file = ReadsFilename0 + '.bam' - bam_file_present = path.exists(bam_file) - if sam_file_present: - sam_file = sam_file - elif bam_file_present: - os.system('samtools view -h '+bam_file+' > '+sam_file) - else: + #sam_file = ReadsFilename0 + '.sam' + #sam_file_present = path.exists(sam_file) + #bam_file = ReadsFilename0 + '.bam' + #bam_file_present = path.exists(bam_file) + #if sam_file_present: + # sam_file = sam_file + #elif bam_file_present: + # os.system('samtools view -h '+bam_file+' > '+sam_file) + #else: + # print('### ERROR: No alignment file present ###') + #bw2sam = pysam.AlignmentFile(sam_file,'rb') + if bam_file_present == False: print('### ERROR: No alignment file present ###') - bw2sam = pysam.AlignmentFile(sam_file,'rb') + BFS = str("bam_file_sorted") + pysam.sort("-o", BFS + ".bam", bam_file ) + pysam.index(BFS + ".bam") + bw2sam = pysam.AlignmentFile(BFS + ".bam",'rb') + + print('Applying matching threshold ...') print('Applying ambiguity threshold ...') NFail = 0; NUnique = 0; NTol = 0; NAmb = 0 @@ -313,4 +321,4 @@ def CountReads(sample): if __name__ == "__main__": input1 = sys.argv[1] - CountReads(input1) \ No newline at end of file + CountReads(input1)