Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 6 additions & 8 deletions bamcleanheader.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def get_args():

# extract read group information from header of original bam
def get_clean_header(bam):
clean_header_list = list()
clean_header_list = []
for line in bam.text.split('\n'):
if len(line.rstrip()) == 0:
continue
Expand Down Expand Up @@ -101,17 +101,15 @@ def get_clean_header(bam):
# add read group info to header of new sam file
def bam_clean(bam, is_sam, header_only):
if is_sam:
in_bam = pysam.Samfile(bam, 'r', check_sq=False)
in_bam = pysam.AlignmentFile(bam, 'r', check_sq=False)
else:
in_bam = pysam.Samfile(bam, 'rb', check_sq=False)
in_bam = pysam.AlignmentFile(bam, 'rb', check_sq=False)

# out_bam = pysam.Samfile('-', 'w', template=in_bam)

print get_clean_header(in_bam)
print(get_clean_header(in_bam))

if not header_only:
for al in in_bam:
print al
print(al)

# # this code leads to pipeing errors
# if not header_only:
Expand Down Expand Up @@ -140,6 +138,6 @@ def main():
if __name__ == '__main__':
try:
sys.exit(main())
except IOError, e:
except IOError as e:
if e.errno != 32: # ignore SIGPIPE
raise
30 changes: 13 additions & 17 deletions bamfilterrg.py
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -8,33 +8,31 @@
import sys
import argparse
from argparse import RawTextHelpFormatter
import string
from string import *

__author__ = "Colby Chiang ([email protected])"
__version__ = "$Revision: 0.0.1 $"
__date__ = "$Date: 2015-01-01 16:58 $"

def bamfilterrg(bamfile, readgroup, limit, is_sam, bam_out, uncompressed_out):
# set input file
if bamfile == None:
if bamfile is None:
if is_sam:
in_bam = pysam.Samfile("-", "r")
in_bam = pysam.AlignmentFile('-', 'r')
else:
in_bam = pysam.Samfile('-', 'rb')
in_bam = pysam.AlignmentFile('-', 'rb')
else:
if is_sam:
in_bam = pysam.Samfile(bamfile, 'r')
in_bam = pysam.AlignmentFile(bamfile, 'r')
else:
in_bam = pysam.Samfile(bamfile, "rb")
in_bam = pysam.AlignmentFile(bamfile, 'rb')

# set output file
if uncompressed_out:
out_bam = pysam.Samfile('-', 'wbu', template=in_bam)
out_bam = pysam.AlignmentFile('-', 'wbu', template=in_bam)
elif bam_out:
out_bam = pysam.Samfile('-', 'wb', template=in_bam)
out_bam = pysam.AlignmentFile('-', 'wb', template=in_bam)
else:
out_bam = pysam.Samfile('-', 'wh', template=in_bam)
out_bam = pysam.AlignmentFile('-', 'wh', template=in_bam)


# parse readgroup string
Expand All @@ -46,19 +44,17 @@ def bamfilterrg(bamfile, readgroup, limit, is_sam, bam_out, uncompressed_out):
counter = 0
for al in in_bam:
# must be in a user specified readgroup
if rg_list and al.opt('RG') not in rg_list:
if rg_list and al.get_tag('RG') not in rg_list:
continue

# write out alignment
out_bam.write(al)
counter += 1

# bail if reached limit
if (limit != None
and counter >= limit):
if limit is not None and counter >= limit:
break


# ============================================
# functions
# ============================================
Expand All @@ -67,7 +63,7 @@ def bamfilterrg(bamfile, readgroup, limit, is_sam, bam_out, uncompressed_out):
class Namegroup():
def __init__(self, al):
self.alignments = list()
self.name = al.qname
self.name = al.query_name
self.sa = 0
self.num_prim = 0
self.add_alignment(al)
Expand All @@ -77,8 +73,8 @@ def add_alignment(self, al):
if not al.is_secondary:
self.num_prim += 1
try:
self.sa += len(al.opt('SA').rstrip(';').split(';'))
# print self.sa
self.sa += len(al.get_tag('SA').rstrip(';').split(';'))
except KeyError:
pass

Expand Down Expand Up @@ -125,7 +121,7 @@ def main():
if __name__ == "__main__":
try:
sys.exit(main())
except IOError, e:
except IOError as e:
if e.errno != 32: # ignore SIGPIPE
raise

28 changes: 14 additions & 14 deletions bamfixflags.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,10 +127,10 @@ def bamfixflags(bamfile,
lib_mean = mean(lib_hist)
lib_sd = stdev(lib_hist)

print 'p25', p25
print 'p75', p75
print 'mean', lib_mean
print 'sd', lib_sd
print('p25', p25)
print('p75', p75)
print('mean', lib_mean)
print('sd', lib_sd)

low = int(p25 - mapping_bound * (p75 - p25) + .499)
high = int(p75 + mapping_bound * (p75 - p25) + .499)
Expand Down Expand Up @@ -171,51 +171,51 @@ def bamfixflags(bamfile,
else:
out_bam = pysam.Samfile('-', 'wh', template=in_bam)

print proper
print(proper)
for al in in_bam:
# out_bam.write(al)
print al
print(al)

if al.is_supplementary:
pass

elif al.is_unmapped or al.mate_is_unmapped:
if al.is_proper_pair:
print 'mismarked proper (unmapped)'
print('mismarked proper (unmapped)')
al.is_proper_pair = False

elif al.reference_id != al.next_reference_id:
if al.is_proper_pair:
print 'mismarked proper (chrom)'
print('mismarked proper (chrom)')
al.is_proper_pair = False

elif (al.reference_start < al.next_reference_start
and (al.is_reverse or not al.mate_is_reverse)):
if al.is_proper_pair:
print 'mismarked proper (orient +)'
print('mismarked proper (orient +)')
al.is_proper_pair = False

elif (al.reference_start > al.next_reference_start
and (not al.is_reverse or al.mate_is_reverse)):
if al.is_proper_pair:
print 'mismarked proper (orient -)'
print('mismarked proper (orient -)')
al.is_proper_pair = False

# if al.supp
elif (al.template_length >= proper[al.opt('RG')][0]
and al.template_length <= proper[al.opt('RG')][1]):
if not al.is_proper_pair:
print 'mismarked improper (insert size)'
print('mismarked improper (insert size)')
al.is_proper_pair = True
else:
if al.is_proper_pair:
print 'mismarked proper (insert size)'
print('mismarked proper (insert size)')
al.is_proper_pair = False

# out_bam.write(al)
print al
# print proper[al.opt('RG')], al.template_length
# print al
print(al)
# # must be in a user specified readgroup
# if al.opt('RG') not in rg_list:
# continue
Expand Down Expand Up @@ -301,7 +301,7 @@ def main():
if __name__ == "__main__":
try:
sys.exit(main())
except IOError, e:
except IOError as e:
if e.errno != 32: # ignore SIGPIPE
raise

43 changes: 19 additions & 24 deletions bamgroupreads.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,34 +8,31 @@
import sys
import argparse
from argparse import RawTextHelpFormatter
import string
from string import *

__author__ = "Colby Chiang ([email protected])"
__version__ = "$Revision: 0.0.1 $"
__date__ = "$Date: 2014-12-15 11:43 $"

def bamgroupreads(bamfile, readgroup, reset_dups, fix_flags, is_sam, bam_out, uncompressed_out):
# set input file
if bamfile == None:
if bamfile is None:
if is_sam:
in_bam = pysam.Samfile("-", "r")
in_bam = pysam.AlignmentFile("-", "r")
else:
in_bam = pysam.Samfile('-', 'rb')
in_bam = pysam.AlignmentFile('-', 'rb')
else:
if is_sam:
in_bam = pysam.Samfile(bamfile, 'r')
in_bam = pysam.AlignmentFile(bamfile, 'r')
else:
in_bam = pysam.Samfile(bamfile, "rb")
in_bam = pysam.AlignmentFile(bamfile, "rb")

# set output file
if uncompressed_out:
out_bam = pysam.Samfile('-', 'wbu', template=in_bam)
out_bam = pysam.AlignmentFile('-', 'wbu', template=in_bam)
elif bam_out:
out_bam = pysam.Samfile('-', 'wb', template=in_bam)
out_bam = pysam.AlignmentFile('-', 'wb', template=in_bam)
else:
out_bam = pysam.Samfile('-', 'wh', template=in_bam)

out_bam = pysam.AlignmentFile('-', 'wh', template=in_bam)

# parse readgroup string
try:
Expand All @@ -46,11 +43,11 @@ def bamgroupreads(bamfile, readgroup, reset_dups, fix_flags, is_sam, bam_out, un
d = {}
for al in in_bam:
# must be in a user specified readgroup
if rg_list and al.opt('RG') not in rg_list:
if rg_list and al.get_tag('RG') not in rg_list:
continue

# add read name to dictionary if not already there
key = al.qname
key = al.query_name
if key not in d:
d.setdefault(key,Namegroup(al))
# print matched read pairs
Expand All @@ -60,7 +57,7 @@ def bamgroupreads(bamfile, readgroup, reset_dups, fix_flags, is_sam, bam_out, un
for al in d[key].alignments:
if reset_dups:
# unset the duplicate flag
al.is_duplicate = 0
al.is_duplicate = False
if fix_flags:
# fix the secondary mate flag
proper_pair = False
Expand All @@ -74,8 +71,7 @@ def bamgroupreads(bamfile, readgroup, reset_dups, fix_flags, is_sam, bam_out, un
proper_pair = True
if flagcheck.is_duplicate:
duplicate = True
if (legacy and flagcheck.is_secondary
or not legacy and flagcheck.flag & 2048 == 2048):
if (legacy and flagcheck.is_secondary) or (not legacy and flagcheck.is_supplementary):
continue
if flagcheck.is_read1:
read1_unmapped = flagcheck.is_unmapped
Expand All @@ -102,22 +98,21 @@ def bamgroupreads(bamfile, readgroup, reset_dups, fix_flags, is_sam, bam_out, un
# ============================================

# class that holds reads from a sequence fragment
class Namegroup():
class Namegroup:
def __init__(self, al):
self.alignments = list()
self.name = al.qname
self.alignments = []
self.name = al.query_name
self.sa = 0
self.num_prim = 0
self.add_alignment(al)

def add_alignment(self, al):
self.alignments.append(al)
if not (legacy and al.is_secondary
or not legacy and al.flag & 2048 == 2048):
if not ((legacy and al.is_secondary) or (not legacy and al.is_supplementary)):
self.num_prim += 1
try:
self.sa += len(al.opt('SA').rstrip(';').split(';'))
# print self.sa
self.sa += len(al.get_tag('SA').rstrip(';').split(';'))
# print(self.sa)
except KeyError:
pass

Expand Down Expand Up @@ -169,7 +164,7 @@ def main():
if __name__ == "__main__":
try:
sys.exit(main())
except IOError, e:
except IOError as e:
if e.errno != 32: # ignore SIGPIPE
raise

12 changes: 6 additions & 6 deletions bamheadrg.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,10 @@ def get_args():
# extract read group information from header of original bam
def extract_rg_info(donor, donor_is_sam, rgs_to_extract):
if donor_is_sam:
bam = pysam.Samfile(donor, 'r', check_sq=False)
bam = pysam.AlignmentFile(donor, 'r', check_sq=False)
else:
bam = pysam.Samfile(donor, 'rb', check_sq=False)
rg_out = list()
bam = pysam.AlignmentFile(donor, 'rb', check_sq=False)
rg_out = []
for line in bam.text.split('\n'):
if line[:3] == "@RG":
v = line.rstrip().split('\t')
Expand Down Expand Up @@ -71,9 +71,9 @@ def bamheadrg(recipient, rg_out):
if in_header:
if line[0] != '@':
for readgroup in rg_out:
print '@RG\t' + '\t'.join([':'.join((t,readgroup[t])) for t in readgroup])
print('@RG\t' + '\t'.join([':'.join((t,readgroup[t])) for t in readgroup]))
in_header = False
print line.rstrip()
print(line.rstrip())
return

# --------------------------------------
Expand Down Expand Up @@ -101,6 +101,6 @@ def main():
if __name__ == '__main__':
try:
sys.exit(main())
except IOError, e:
except IOError as e:
if e.errno != 32: # ignore SIGPIPE
raise
Loading