diff --git a/.azure-pipelines/test-template.yml b/.azure-pipelines/test-template.yml index 36e1f4fbb7..19ed2dbd66 100644 --- a/.azure-pipelines/test-template.yml +++ b/.azure-pipelines/test-template.yml @@ -1,15 +1,27 @@ -steps: -- bash: conda create -n foo -q --yes -c conda-forge -c bioconda python=$(python.version) numpy scipy matplotlib==3.1.1 nose flake8 plotly pysam pyBigWig py2bit deeptoolsintervals - displayName: Installing dependencies -- bash: | - source activate foo - python -m pip install . --no-deps --ignore-installed -vvv - displayName: Installing deeptools -- bash: | - source activate foo - flake8 . --exclude=.venv,.build,build --ignore=E501,F403,E402,F999,F405,E722,W504,W605 - displayName: flake8 -- bash: | - source activate foo - nosetests --with-doctest -sv deeptools - displayName: Test deepTools +trigger: + branches: + include: + - '*' +pr: + branches: + include: + - '*' +jobs: +- job: install_deeptools_run_tests + pool: + vmImage: 'ubuntu-latest' + steps: + - bash: conda create -n foo -q --yes -c conda-forge -c bioconda python=$(python.version) numpy scipy matplotlib==3.1.1 nose flake8 plotly pysam pyBigWig py2bit deeptoolsintervals + displayName: Installing dependencies + - bash: | + source activate foo + python -m pip install . --no-deps --ignore-installed -vvv + displayName: Installing deeptools + - bash: | + source activate foo + flake8 . --exclude=.venv,.build,build --ignore=E501,F403,E402,F999,F405,E722,W504,W605 + displayName: flake8 + - bash: | + source activate foo + nosetests --with-doctest -sv deeptools + displayName: Test deepTools diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index cb0973ca9d..b8b360d9ff 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -1,5 +1,5 @@ name: Test -on: [push] +on: [push, pull_request] jobs: build-linux: name: Test on Linux diff --git a/deeptools/computeMatrix.py b/deeptools/computeMatrix.py index 345bbc79a3..8d50bfd764 100644 --- a/deeptools/computeMatrix.py +++ b/deeptools/computeMatrix.py @@ -19,6 +19,7 @@ def parse_arguments(args=None): argparse.ArgumentParser( formatter_class=argparse.RawDescriptionHelpFormatter, description=""" +(This version should work for stranded bigWig files.) This tool calculates scores per genome regions and prepares an intermediate file that can be used with ``plotHeatmap`` and ``plotProfiles``. Typically, the genome regions are genes, but any other regions defined in a BED file can be used. diff --git a/deeptools/heatmapper.py b/deeptools/heatmapper.py index 6a95ba0052..8973e35720 100644 --- a/deeptools/heatmapper.py +++ b/deeptools/heatmapper.py @@ -10,6 +10,7 @@ from deeptools.utilities import toString, toBytes, smartLabels from deeptools.heatmapper_utilities import getProfileTicks +from deeptools.pyBigWigStranded import openBigWigGuessingStrandedness old_settings = np.seterr(all='ignore') @@ -364,8 +365,8 @@ def compute_sub_matrix_worker(self, chrom, start, end, score_file_list, paramete # read BAM or scores file score_file_handles = [] for sc_file in score_file_list: - score_file_handles.append(pyBigWig.open(sc_file)) - + score_file_handles.append( + openBigWigGuessingStrandedness(sc_file)) # determine the number of matrix columns based on the lengths # given by the user, times the number of score files matrix_cols = len(score_file_list) * \ @@ -531,7 +532,7 @@ def compute_sub_matrix_worker(self, chrom, start, end, score_file_list, paramete for sc_handler in score_file_handles: # We're only supporting bigWig files at this point cov = heatmapper.coverage_from_big_wig( - sc_handler, feature_chrom, zones, + sc_handler, feature_chrom, feature_strand, zones, parameters['bin size'], parameters['bin avg type'], parameters['missing data as zero'], @@ -652,7 +653,7 @@ def change_chrom_names(chrom): return chrom @staticmethod - def coverage_from_big_wig(bigwig, chrom, zones, binSize, avgType, nansAsZeros=False, verbose=True): + def coverage_from_big_wig(bigwig, chrom, strand, zones, binSize, avgType, nansAsZeros=False, verbose=True): """ uses pyBigWig @@ -716,7 +717,8 @@ def coverage_from_big_wig(bigwig, chrom, zones, binSize, avgType, nansAsZeros=Fa endIdx += end - start if start < end: # This won't be the case if we extend off the front of a chromosome, such as (-100, 0) - values_array[startIdx:endIdx] = bigwig.values(chrom, start, end) + # jtb: also adding in strand + values_array[startIdx:endIdx] = bigwig.values(chrom, start, end, strand) if end < region[1]: startIdx = endIdx endIdx += region[1] - end diff --git a/deeptools/parserCommon.py b/deeptools/parserCommon.py index ef4f4d0748..8eed12b8e1 100755 --- a/deeptools/parserCommon.py +++ b/deeptools/parserCommon.py @@ -914,3 +914,20 @@ def __call__(self, parser, args, values, option_string=None): raise argparse.ArgumentTypeError(msg) setattr(args, self.dest, values) return RequiredLength + + +def stranded_bigwig_file_pair(): + """Common arguments related to pairs of stranded bigWig files. + """ + parser = argparse.ArgumentParser(add_help=False) + group = parser.add_argument_group('Options for stranded pairs of bigWig files') + + group.add_argument('--strandedBigWigSuffix', + help='If specified, then files ending with the ' + 'first suffix will be treated as the + strand ' + 'of a pair of bigWig files, and the second ' + 'suffix will be used for the - strand.', + default='.fwd.bw,.rev.bw', + type=str, + required=False) + return parser diff --git a/deeptools/pyBigWigStranded.py b/deeptools/pyBigWigStranded.py new file mode 100644 index 0000000000..ee98e54cbd --- /dev/null +++ b/deeptools/pyBigWigStranded.py @@ -0,0 +1,143 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import re + +import numpy as np +import pyBigWig + +class pyBigWig1: + """Wrapper around pyBigWig to ignore strand. + + This modifies some pyBigWig methods to expect (and ignore) strand + information. This way, callers can get data either from + a pyBigWig object, or a strandedPyBigWig object, without + knowing whether the file is stranded. + + Possibly this module belongs in pyBigWig. + + Also, this module (currently) requires numpy support in pyBigWig. + """ + def __init__(self, bigWigFile): + """Constructor is a wrapper around pyBigWig.pyBigWig. + """ + self.bigWig = pyBigWig.open(bigWigFile) + + def values(self, chrom, start, end, strand='+'): + """Gets values at a region. + + This is like pyBigWig.values(), except that expects the strand argument + (which it ignores). + ??? possibly this should reverse values, if strand is '-' ? + """ + return self.bigWig.values(chrom, start, end) + + def chroms(self, chroms=None): + """This is just forwarded. + """ + if chroms: + return self.bigWig.chroms(chroms) + else: + return self.bigWig.chroms() + + +class strandedPyBigWig: + """Wrapper for accessing pairs of 'stranded' bigWig files. + + This has almost the same API as pyBigWig, except that + most methods include a 'strand' argument. When these are + called, this gets the result from the appropriate stranded file. + + Note that that these methods return the absolute value of the + contents of the bigWig. This is because the - strand file is + sometimes stored negated. It seems simpler to just always + return the absolute value, than to add a flag indicating + whether the - strand file is stored negated. + """ + + def __init__(self, plusStrandFile, + suffixPairs=['fwd.bw,rev.bw', 'plus.bw,minus.bw', 'pl.bw,mn.bw', 'p.bw,m.bw'], + minusStrandFile=None): + """Constructor opens a pair of bigWig files for reading. + + suffixPairs: these define the + and - filename pairing. + minusStrandFile: the minus strand file can also simply be given + explicitly (this overrides the name computed using suffixPairs) + """ + # if - strand file isn't given, attempt to compute it + if not minusStrandFile: + minusStrandFile = getMinusStrandFile(plusStrandFile, suffixPairs) + # if we still don't have a - strand filename, throw an error + if not minusStrandFile: + raise ValueError('couldn\'t compute - strand filename') + # open files + self.plusBigWig = pyBigWig1(plusStrandFile) + self.minusBigWig = pyBigWig1(minusStrandFile) + + def chroms(self, chroms=None): + """Gets chromosomes. + + This just uses the + strand file's chromosomes (without + checking that they agree with the - strand file's). + """ + if chroms: + return self.plusBigWig.chroms(chroms) + else: + return self.plusBigWig.chroms() + + def values(self, chrom, start, end, strand='+'): + """Gets values at a region specified by an object. + + This will get values from plusBigWig or minusBigWig, + depending on the strand. + Returns: absolute value of values at that region. + If strand is '-', these will be in descending coordinate + order; otherwise they'll be in ascending coordinate order. + """ + # get the relevant bigWig object + bigWig = self.minusBigWig if strand == '-' else self.plusBigWig + # forward this call to that object, taking absolute value + return np.abs(bigWig.values(chrom, start, end, strand)) + +def getMinusStrandFile(plusStrandFile, + suffixPairs=['fwd.bw,rev.bw', 'plus.bw,minus.bw', 'pl.bw,mn.bw', 'p.bw,m.bw']): + """Converts the + strand filename to the - strand filename. + + plusStrandFilename: name of the + strand file + suffixPairs: these define the + and - filename pairing. + For each A,B pair, if plusStrandFile ends with A, then minusStrandFile is + assumed to end with B. + Returns: name of the - strand file, or None if the + strand file's + name didn't match any of the known + strand file's suffixes. + """ + # loop through + suffix, until a matching one is found + for suffixPair in suffixPairs: + (plusSuffix, minusSuffix) = suffixPair.split(',') + # try replacing the suffix (at the end of the filename) + minusStrandFile = re.sub(re.escape(plusSuffix) + '$', minusSuffix, + plusStrandFile) + # if this is different, then the replacement happened + if minusStrandFile != plusStrandFile: + return minusStrandFile + # at this point, none of the suffixes matched + return None + +def openBigWigGuessingStrandedness(bigWigFile, + suffixPairs=['fwd.bw,rev.bw', 'plus.bw,minus.bw', 'pl.bw,mn.bw', 'p.bw,m.bw']): + """Opens a bigWig file/files, guessing whether they're stranded. + + bigWigFile: name of the file (or the + strand file) + suffixPairs: these define the and - filename pairing + (as elsehwere) + Returns: an object for reading from a bigWig file, either stranded + (if bigWigFile ends with one of the suffixes), or not + (if it doesn't). + """ + # see if bigWigFile "looks" stranded + minusStrandFile = getMinusStrandFile(bigWigFile, suffixPairs) + if minusStrandFile: + # if so, assume it's a stranded pair + return strandedPyBigWig(bigWigFile, minusStrandFile=minusStrandFile) + else: + # otherwise, assume it's unstranded + return pyBigWig1(bigWigFile, suffixPairs) diff --git a/galaxy/wrapper/multiBigwigSummary.xml b/galaxy/wrapper/multiBigwigSummary.xml index 6d929f04f2..40e1cb3b63 100644 --- a/galaxy/wrapper/multiBigwigSummary.xml +++ b/galaxy/wrapper/multiBigwigSummary.xml @@ -1,17 +1,16 @@ - + calculates average scores for a list of two or more bigwig files multiBigwigSummary deepTools_macros.xml - + - @@ -70,7 +70,6 @@ help="Correlation is computed for the number of reads that overlap such regions."/> - @@ -91,18 +90,20 @@ - - + +