nf-core · jeffe107 · Sep 16, 2025 · Sep 17, 2025 · Sep 17, 2025 · Sep 17, 2025
@@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### `Added`
 
+- [#861](https://github.com/nf-core/mag/pull/861) - Added `--generate_bigmag_file` to execute the bigmag workflow that generates the file to be used as input for [BIgMAG](https://github.com/jeffe107/BIgMAG) (added by @jeffe107)
 - [#718](https://github.com/nf-core/mag/pull/718) - Add support for independent long-read metagenomic assembly (requested by @ljmesi and many others, added by @muabnezor)
 - [#718](https://github.com/nf-core/mag/pull/718) - Added metaMDBG and (meta)Flye as long read assemblers (added by @muabnezor)
 - [#718](https://github.com/nf-core/mag/pull/718) - Added host removal for long reads using minimap2 as aligner (added by @muabnezor)

@@ -82,6 +82,10 @@
 
   > Orakov, A., Fullam, A., Coelho, A. P., Khedkar, S., Szklarczyk, D., Mende, D. R., Schmidt, T. S. B., and Bork, P.. 2021. “GUNC: Detection of Chimerism and Contamination in Prokaryotic Genomes.” Genome Biology 22 (1): 178. doi: 10.1186/s13059-021-02393-0.
 
+- [MAGFlow/BIgMAG](https://doi.org/10.12688/f1000research.152290.2)
+
+  > Yepes-García, J., Falquet, L. (2024). Metagenome quality metrics and taxonomical annotation visualization through the integration of MAGFlow and BIgMAG. F1000Research 13:640. doi.org/10.12688/f1000research.152290.2
+
 - [MaxBin2](https://doi.org/10.1093/bioinformatics/btv638)
 
   > Yu-Wei, W., Simmons, B. A. & Singer, S. W. (2015) MaxBin 2.0: An Automated Binning Algorithm to Recover Genomes from Multiple Metagenomic Datasets. Bioinformatics 32 (4): 605–7. doi: 10.1093/bioinformatics/btv638.

@@ -102,6 +102,7 @@ Other code contributors include:
 - [Nikolaos Vergoulidis](https://github.com/IceGreb)
 - [Greg Fedewa](https://github.com/harper357)
 - [Vini Salazar](https://github.com/vinisalazar)
+- [Jeferyd Yepes](https://github.com/jeffe107)
 
 Long read processing was inspired by [caspargross/HybridAssembly](https://github.com/caspargross/HybridAssembly) written by Caspar Gross [@caspargross](https://github.com/caspargross)
 

@@ -0,0 +1,98 @@
+#!/usr/bin/env python
+
+## Originally written by Jeferyd Yepes and released under the MIT license.
+## See git repository (https://github.com/nf-core/mag) for full license text.
+
+import pandas as pd
+import re
+import argparse
+import sys
+import warnings
+
+def parse_args(args=None):
+    parser = argparse.ArgumentParser()
+    parser.add_argument("-s", "--summary", metavar="FILE", help="Pipeline summary file.")
+    parser.add_argument("-g", "--gunc_summary", metavar="FILE", help="GUNC summary file.")
+    parser.add_argument("-a", "--alt_summary", metavar="FILE", help="BUSCO or CheckM2 summary file.")
+    parser.add_argument(
+        "-t", "--binqc_tool", help="Bin QC tool used", choices=["busco", "checkm", "checkm2"]
+    )
+
+    parser.add_argument(
+        "-o",
+        "--out",
+        required=True,
+        metavar="FILE",
+        type=argparse.FileType("w"),
+        help="Output file containing final bigmag summary.",
+    )
+    return parser.parse_args(args)
+
+
+def main(args=None):
+    args = parse_args(args)
+
+    if (
+        not args.summary
+        and not args.gunc_summary
+        and not args.alt_summary
+    ):
+        sys.exit(
+            "No summary specified! "
+            "Please specify the pipeline summary, the GUNC summary and BUSCO or CheckM2 summary."
+        )
+
+    df_summary = pd.read_csv(args.summary, sep='\t', index_col=0)
+    for i in range(len(df_summary["bin"])):
+        name = df_summary["bin"][i]
+        name = re.sub(r'\.(fa|fasta)(\..*)?$', '', name)
+        df_summary.at[i,"bin"] = name
+        df_summary = df_summary.sort_values(by='bin')
+        df_summary["bin"] = df_summary["bin"].astype(str)
+
+    df_gunc = pd.read_csv(args.gunc_summary, sep='\t')
+    df_gunc["genome"] = df_gunc["genome"].astype(str)
+    df_gunc = df_gunc.sort_values(by='genome')
+
+    df_alt = pd.read_csv(args.alt_summary, sep='\t')
+
+    column_names = ['genome']
+
+    if args.binqc_tool == "busco":
+        df_alt["Name"] = df_alt["Name"].astype(str)
+        df_alt = df_alt.sort_values(by='Name')
+        column_names.append("Name")
+
+    elif args.binqc_tool == "checkm" or args.binqc_tool == "checkm2":
+        for i in range(len(df_alt["Input_file"])):
+            name = df_alt["Input_file"][i]
+            name = re.sub(r'\.(fa|fasta)(\..*)?$', '', name)
+            df_alt.at[i,"Input_file"] = name
+            df_alt = df_alt.sort_values(by='Input_file')
+            df_alt["Input_file"] = df_alt["Input_file"].astype(str)
+        column_names.append("Input_file")
+
+    df_list = [df_gunc, df_alt]
+    for i in range(len(df_list)):
+        df_summary = pd.merge(df_summary, df_list[i], left_on='bin', right_on=column_names[i], how='left')
+
+    df_summary.rename(columns={'bin': 'Bin'}, inplace=True)
+
+    columns_to_remove = ['Name', "genome", 'Input_file', 'Assembly', 'Bin Id']
+    for column in df_summary.columns:
+        if column in columns_to_remove:
+            df_summary = df_summary.drop(columns=column)
+
+    df_summary['sample'] = None
+    for f in range(len(df_summary["Bin"])):
+        match = re.search(r'^.*?-.*?-(.*)$', df_summary["Bin"][f])
+        if match:
+            name = match.group(1)
+            name = re.sub(r'\.(unbinned|noclass)(\..*)?$', '', name)
+            name = re.sub(r'\.\d+(\.[^.]+)?$', '', name)
+            df_summary.at[f,"sample"] = name
+
+    df_summary.to_csv(args.out, sep="\t", index=True)
+
+if __name__ == "__main__":
+    sys.exit(main())
@@ -895,4 +895,20 @@ process {
             saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
         ]
     }
+    withName: BIGMAG_SUMMARY {
+        publishDir = [
+            [
+                path: { "${params.outdir}/GenomeBinning/BIgMAG/" },
+                mode: params.publish_dir_mode,
+                pattern: '*.tsv',
+            ]
+        ]
+    }
+        withName: CONCAT_BIGMAG {
+        publishDir = [
+            path: { "${params.outdir}/GenomeBinning/QC" },
+            mode: params.publish_dir_mode,
+            saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
+        ]
+    }
 }
@@ -760,6 +760,19 @@ In cases where eukaryotic genomes are recovered in binning, [MetaEuk](https://gi
 
 </details>
 
+## Summary file to be used as input for BIgMAG
+
+<details markdown="1">
+<summary>Output files</summary>
+
+- `GenomeBinning/BIgMAG/bigmag_summary.tsv`: Summary of bin sequencing depths together with GUNC, QUAST, GTDB-Tk, BUSCO and CheckM or CheckM2 results.
+
+</details>
+
+The output file in this directory is suitable to be used as input for the dashboard  [BIgMAG](https://github.com/jeffe107/BIgMAG. 
+
+It is generated through a dedicated subworkflow that takes the file `bin_summary.tsv` as input, and it will additionally execute CheckM2 if BUSCO is the selected quality control tool or BUSCO if CheckM or CheckM2 was specified by the user as the main tool. By default the subworkflow will execute GUNC.
+
 ## Ancient DNA
 
 Optional, only running when parameter `-profile ancient_dna` is specified.

@@ -531,3 +531,7 @@ Up until version 4.0.0, this pipeline offered raw read taxonomic profiling using
 This feature was removed in version 5.0.0 to strengthen the pipeline's focus on metagenome assembly and binning.
 
 If you require taxonomic profiling of raw reads, we recommend using [nf-core/taxprofiler](https://nf-co.re/taxprofiler/), which is specifically designed for taxonomic profiling of raw reads and supports a wide range of tools for this purpose.
+
+## BIgMAG compatibility
+
+With the parameter `--generate_bigmag_file` a subworkflow will be triggered to generate one and unique file that contains the ouput from all of the bin-quality tools. The file `bigmag_summary.tsv`located at `GenomeBinning/BIgMAG` in the output directory can be used directly for the application [BIgMAG](https://github.com/jeffe107/BIgMAG), after the proper installation in their local environment. This is the only file they need to run the BIgMAG dashboard.
@@ -0,0 +1,42 @@
+process BIGMAG_SUMMARY {
+
+    conda "conda-forge::pandas=1.4.3"
+    container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container
+        ? 'https://depot.galaxyproject.org/singularity/pandas:1.4.3'
+        : 'biocontainers/pandas:1.4.3'}"
+
+    input:
+    path summary
+    path gunc_sum
+    path alt_sum
+    val  binqc_tool
+
+    output:
+    path "bigmag_summary.tsv", emit: bigmag_summary
+    path "versions.yml"   , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args   ?: ''
+    def args = task.ext.args ?: ''
+    def summary  = summary.sort().size() > 0 ? "--summary ${summary}" : ""
+    def gunc_summary  = gunc_sum.sort().size() > 0 ? "--gunc_summary ${gunc_sum}" : ""
+    def alt_summary = alt_sum.sort().size() > 0 ? "--alt_summary ${alt_sum}" : ""
+    """
+    bigmag_summary.py \
+        ${args} \
+        ${summary} \
+        ${gunc_summary} \
+        ${alt_summary} \
+        --binqc_tool ${binqc_tool} \
+        --out bigmag_summary.tsv
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        python: \$(python --version 2>&1 | sed 's/Python //g')
+        pandas: \$(python -c "import pkg_resources; print(pkg_resources.get_distribution('pandas').version)")
+    END_VERSIONS
+    """
+}
@@ -0,0 +1,55 @@
+process CONCAT_BIGMAG {
+    tag "$meta.id"
+    label 'process_low'
+
+    conda "${moduleDir}/environment.yml"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/csvtk:0.31.0--h9ee0642_0' :
+        'biocontainers/csvtk:0.31.0--h9ee0642_0' }"
+
+    input:
+    tuple val(meta), path(csv, name: 'inputs/csv*/*')
+    val in_format
+    val out_format
+
+    output:
+    tuple val(meta), path("${prefix}.${out_extension}"), emit: csv
+    path "versions.yml"                                , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args   ?: ''
+    prefix = params.binqc_tool == 'busco' ? 'checkm2_summary' : 'busco_summary'
+    def delimiter = in_format == "tsv" ? "\t" : (in_format == "csv" ? "," : in_format)
+    def out_delimiter = out_format == "tsv" ? "\t" : (out_format == "csv" ? "," : out_format)
+    out_extension = out_format == "tsv" ? 'tsv' : 'csv'
+    """
+    csvtk \\
+        concat \\
+        $args \\
+        --num-cpus $task.cpus \\
+        --delimiter "${delimiter}" \\
+        --out-delimiter "${out_delimiter}" \\
+        --out-file ${prefix}.${out_extension} \\
+        $csv
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        csvtk: \$(echo \$( csvtk version | sed -e "s/csvtk v//g" ))
+    END_VERSIONS
+    """
+
+    stub:
+    prefix = params.binqc_tool == 'busco' ? 'checkm2_summary' : 'busco_summary'
+    out_extension = out_format == "tsv" ? 'tsv' : 'csv'
+    """
+    touch ${prefix}.${out_extension}
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        csvtk: \$(echo \$( csvtk version | sed -e "s/csvtk v//g" ))
+    END_VERSIONS
+    """
+}
@@ -159,6 +159,7 @@ params {
     gunc_database_type                   = 'progenomes'
     gunc_db                              = null
     gunc_save_db                         = false
+    generate_bigmag_file                 = false
 
     // Reproducibility options
     megahit_fix_cpu_1                    = false

@@ -966,6 +966,11 @@
                     "type": "boolean",
                     "description": "Save the used GUNC reference files downloaded when not using --gunc_db parameter.",
                     "help_text": "If specified, the corresponding DIAMOND file downloaded from the GUNC server will be stored in your output directory alongside your GUNC results."
+                },
+                "generate_bigmag_file": {
+                    "type": "boolean",
+                    "description": "Make the file bin_summary.tsv suitable for the application BIgMAG.",
+                    "help_text": "If specified, BUSCO, ChecKM2 and GUNC will be executed simultaneously."
                 }
             }
         },