Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 81 additions & 6 deletions distiller.nf
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,8 @@ Channel.from([
Channel.from(
params.input.library_groups.collect{ k, v -> [k, v] }
).into{LIBRARY_GROUPS_FOR_COOLER_MERGE;
LIBRARY_GROUPS_FOR_STATS_MERGE}

LIBRARY_GROUPS_FOR_STATS_MERGE;
LIBRARY_GROUPS_FOR_STATS_FILTER}

// the Channel the location of Raw Data (fastqs):
LIB_RUN_SOURCES = Channel.from(
Expand Down Expand Up @@ -518,7 +518,7 @@ process map_parse_sort_chunks {
LIB_RUN_CHUNK_PAIRSAMS
.map {library, run, chunk, pairsam, bam -> tuple(library, pairsam)}
.groupTuple()
.set {LIB_PAIRSAMS_TO_MERGE}
.set { LIB_PAIRSAMS_TO_MERGE }

process merge_dedup_splitbam {
tag "library:${library}"
Expand All @@ -534,7 +534,7 @@ process merge_dedup_splitbam {
"${library}.${ASSEMBLY_NAME}.dups.pairs.gz",
"${library}.${ASSEMBLY_NAME}.dups.bam",
"${library}.${ASSEMBLY_NAME}.unmapped.pairs.gz",
"${library}.${ASSEMBLY_NAME}.unmapped.bam" into LIB_PAIRS_BAMS
"${library}.${ASSEMBLY_NAME}.unmapped.bam" into LIB_PAIRS_BAMS, LIB_PAIRS_BAMS_TO_FILTER
set library, "${library}.${ASSEMBLY_NAME}.dedup.stats" into LIB_DEDUP_STATS

script:
Expand Down Expand Up @@ -598,10 +598,14 @@ process merge_dedup_splitbam {
LIB_PAIRS_BAMS
.map {v -> tuple(v[0], v[1])}
.set {LIB_PAIRS}

/* Read filters for creating coolers and writing stats */
FILTERS = Channel.from(
params.bin.filters.collect{ name, expr -> [name, expr] } )
FILTERS
FILTERS.into {FILTERS_FOR_COOLER; FILTERS_FOR_STATS}
FILTERS_FOR_COOLER
.combine(LIB_PAIRS)
//.combine(CHROM_SIZES_FOR_BINNING.first())
.set {LIB_FILTER_PAIRS}

/*
Expand All @@ -615,7 +619,7 @@ process bin_zoom_library_pairs{
input:
set val(filter_name), val(filter_expr), val(library), file(pairs_lib) from LIB_FILTER_PAIRS
file(chrom_sizes) from CHROM_SIZES_FOR_BINNING.first()

output:
set library, filter_name, "${library}.${ASSEMBLY_NAME}.${filter_name}.${MIN_RES}.cool",
"${library}.${ASSEMBLY_NAME}.${filter_name}.${MIN_RES}.mcool" into LIB_FILTER_COOLERS_ZOOMED
Expand Down Expand Up @@ -660,6 +664,7 @@ LIBRARY_GROUPS_FOR_COOLER_MERGE
process merge_zoom_library_group_coolers{
tag "library_group:${library_group} filter:${filter_name}"
publishDir path: getOutputDir('coolers_library_group'), mode: "copy"
storeDir getOutputDir('coolers_library_group')

input:
set val(library_group), val(filter_name), file(coolers) from LIBGROUP_FILTER_COOLERS_TO_MERGE
Expand Down Expand Up @@ -719,6 +724,7 @@ LIBRARY_GROUPS_FOR_STATS_MERGE
process merge_stats_libraries_into_groups {
tag "library_group:${library_group}"
publishDir path: getOutputDir('stats_library_group'), mode: "copy"
storeDir getOutputDir('stats_library_group')

input:
set val(library_group), file(stats) from LIBGROUP_STATS_TO_MERGE
Expand All @@ -736,3 +742,72 @@ process merge_stats_libraries_into_groups {
pairtools stats --merge ${stats} -o ${library_group}.${ASSEMBLY_NAME}.stats
"""
}



/* Filter pairs for stats and merge them (if requested): */

if (params.get('stats', [:]).get('use_filters', 'false').toBoolean()) {

LIB_PAIRS_BAMS_TO_FILTER
.map {v -> tuple(v[0], [v[1], v[4], v[6]])}
.set {LIB_PAIRS_FILTER}

LIB_PAIRSAMS_TO_FILTER = FILTERS_FOR_STATS.combine(LIB_PAIRS_FILTER)

process merge_dedup_filter {
tag "library:${library}"
storeDir getOutputDir('stats_library')

input:
set val(filter_name), val(filter_expr), val(library), file(lib_pairs) from LIB_PAIRSAMS_TO_FILTER

output:
set library, filter_name, "${library}.${ASSEMBLY_NAME}.${filter_name}.stats" into LIB_STATS_FILTERED

script:
def make_pairsam = params['parse'].get('make_pairsam','false').toBoolean()
def filter_command = (filter_expr == '' ? '' : "| pairtools select '${filter_expr}'")
def stats_command = "| pairtools stats --nproc-in ${task.cpus} --nproc-out ${task.cpus}"
def merge_command = (
"pairtools merge ${lib_pairs} --nproc ${task.cpus} --tmpdir \$TASK_TMP_DIR "
)

"""
TASK_TMP_DIR=\$(mktemp -d -p ${task.distillerTmpDir} distiller.tmp.XXXXXXXXXX)
${merge_command} ${filter_command} ${stats_command} \
--output ${library}.${ASSEMBLY_NAME}.${filter_name}.stats \
| cat
rm -rf \$TASK_TMP_DIR
"""
}

LIBRARY_GROUPS_FOR_STATS_FILTER.combine(LIB_STATS_FILTERED)
.filter{ it[1].contains(it[2]) }
.map {library_group, libraries, library, filter_name, stats_filtered -> tuple(library_group, filter_name, stats_filtered)}
.groupTuple(by:[0,1])
.set { LIBGROUP_STATS_TO_MERGE_FILTERED }


process merge_filter_stats_libraries_into_groups {
tag "library_group:${library_group}"
publishDir path: getOutputDir('stats_library_group'), mode: "copy"
storeDir getOutputDir('stats_library_group')

input:
set val(library_group), val(filter_name), file(stats) from LIBGROUP_STATS_TO_MERGE_FILTERED

output:
set library_group, filter_name, "${library_group}.${ASSEMBLY_NAME}.${filter_name}.stats" into LIBGROUP_STATS_FILTERED

script:
if( isSingleFile(stats))
"""
ln -s ${stats} ${library_group}.${ASSEMBLY_NAME}.${filter_name}.stats
"""
else
"""
pairtools stats --merge ${stats} -o ${library_group}.${ASSEMBLY_NAME}.${filter_name}.stats
"""
}
}
9 changes: 9 additions & 0 deletions project.yml
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,14 @@ bin:
no_filter: ''
mapq_30: '(mapq1>=30) and (mapq2>=30)'

# Control the stats output
stats:
# This option will produce separate stats for filters (see bin.filters above).
# First the pairs are filtered, then the stats are calculated.
# This might result in counterintuitive statistics of total and unmapped reads.
# Only the stats that report uniquely mapped reads passed through the filters should be directly interpreted.
use_filters: False

########################################
# folder structure for storing results
########################################
Expand All @@ -215,4 +223,5 @@ output:
pairs_library: 'results/pairs_library'
coolers_library: 'results/coolers_library/'
coolers_library_group: 'results/coolers_library_group/'
stats_library: 'results/stats_library/'
stats_library_group: 'results/stats_library_group/'