diff --git a/workflows/virology/phi-toolkit/.dockstore.yml b/workflows/virology/phi-toolkit/.dockstore.yml new file mode 100644 index 0000000000..4f897a2052 --- /dev/null +++ b/workflows/virology/phi-toolkit/.dockstore.yml @@ -0,0 +1,17 @@ +version: 1.2 +workflows: +- name: main + subclass: Galaxy + publish: true + primaryDescriptorPath: /PHI-Toolkit.ga + testParameterFiles: + - /PHI-Toolkit-tests.yml + authors: + - name: Matthias Bernt + orcid: 0000-0003-3763-0797 + - name: "Felipe Borim Corr\xEAa" + orcid: 0000-0003-0031-1565 + - name: Joao Pedro Saraiva + orcid: 0000-0001-8251-1960 + - name: "Helmholtz-Zentrum f\xFCr Umweltforschung GmbH - UFZ" + url: https://www.ufz.de/ diff --git a/workflows/virology/phi-toolkit/CHANGELOG.md b/workflows/virology/phi-toolkit/CHANGELOG.md new file mode 100644 index 0000000000..49c8ea71c8 --- /dev/null +++ b/workflows/virology/phi-toolkit/CHANGELOG.md @@ -0,0 +1,5 @@ +# Changelog + +## [0.1] - 2025-07-09 + +Addition of the workflow to the iwc repository. diff --git a/workflows/virology/phi-toolkit/PHI-Toolkit-tests.yml b/workflows/virology/phi-toolkit/PHI-Toolkit-tests.yml new file mode 100644 index 0000000000..89f07981f1 --- /dev/null +++ b/workflows/virology/phi-toolkit/PHI-Toolkit-tests.yml @@ -0,0 +1,84 @@ +- doc: Test outline for PHI-Toolkit + job: + Host contigs: + class: Collection + collection_type: list + elements: + - class: File + identifier: NC_021184.fasta + location: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nuccore&id=NC_021184&rettype=fasta&retmode=text + - class: File + identifier: NC_019936.fasta + location: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nuccore&id=NC_019936&rettype=fasta&retmode=text + - class: File + identifier: NC_019904.fasta + location: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nuccore&id=NC_019904&rettype=fasta&retmode=text + - class: File + identifier: NC_019897.fasta + location: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nuccore&id=NC_019897&rettype=fasta&retmode=text + - class: File + identifier: NC_018515.fasta + location: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nuccore&id=NC_018515&rettype=fasta&retmode=text + - class: File + identifier: NC_018068.fasta + location: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nuccore&id=NC_018068&rettype=fasta&retmode=text + - class: File + identifier: NC_018014.fasta + location: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nuccore&id=NC_018014&rettype=fasta&retmode=text + - class: File + identifier: NC_017095.fasta + location: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nuccore&id=NC_017095&rettype=fasta&retmode=text + - class: File + identifier: NC_017033.fasta + location: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nuccore&id=NC_017033&rettype=fasta&retmode=text + - class: File + identifier: NC_015761.fasta + location: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nuccore&id=NC_015761&rettype=fasta&retmode=text + - class: File + identifier: NC_014364.fasta + location: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nuccore&id=NC_014364&rettype=fasta&retmode=text + - class: File + identifier: NC_014363.fasta + location: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nuccore&id=NC_014363&rettype=fasta&retmode=text + - class: File + identifier: NC_014212.fasta + location: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nuccore&id=NC_014212&rettype=fasta&retmode=text + - class: File + identifier: NC_014211.fasta + location: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nuccore&id=NC_014211&rettype=fasta&retmode=text + - class: File + identifier: NC_014168.fasta + location: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nuccore&id=NC_014168&rettype=fasta&retmode=text + - class: File + identifier: NC_014008.fasta + location: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nuccore&id=NC_014008&rettype=fasta&retmode=text + - class: File + identifier: NC_012982.fasta + location: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nuccore&id=NC_012982&rettype=fasta&retmode=text + - class: File + identifier: NC_009012.fasta + location: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nuccore&id=NC_009012&rettype=fasta&retmode=text + - class: File + identifier: NC_008261.fasta + location: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nuccore&id=NC_008261&rettype=fasta&retmode=text + - class: File + identifier: NC_003450.fasta + location: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nuccore&id=NC_003450&rettype=fasta&retmode=text + - class: File + identifier: NC_002737.fasta + location: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nuccore&id=NC_002737&rettype=fasta&retmode=text + - class: File + identifier: NC_000913.fasta + location: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nuccore&id=NC_000913&rettype=fasta&retmode=text + GTDB-Tk database: Full Database - Release 220 (2024-10-19) + Agree with genomad license: true + outputs: + PHI toolkit report: + asserts: + has_text: + text: html + has_size: + size: 100 + has_text: + text: "Error: " + negate: true diff --git a/workflows/virology/phi-toolkit/PHI-Toolkit.ga b/workflows/virology/phi-toolkit/PHI-Toolkit.ga new file mode 100644 index 0000000000..2aec3be6dd --- /dev/null +++ b/workflows/virology/phi-toolkit/PHI-Toolkit.ga @@ -0,0 +1,1256 @@ +{ + "a_galaxy_workflow": "true", + "annotation": "Phage Interaction Toolkit (PHI) performs an automated and comprehensive identification and profiling of phages within host genomes", + "comments": [ + { + "child_steps": [ + 4, + 5, + 6, + 7 + ], + "color": "orange", + "data": { + "title": "Host Analyses" + }, + "id": 0, + "position": [ + 290.82164319804593, + 0 + ], + "size": [ + 809, + 699 + ], + "type": "frame" + }, + { + "child_steps": [ + 11, + 12, + 13, + 14, + 16, + 17 + ], + "color": "green", + "data": { + "title": "Phage Analyses" + }, + "id": 1, + "position": [ + 816.0216431980459, + 746.1 + ], + "size": [ + 1098, + 673 + ], + "type": "frame" + } + ], + "creator": [ + { + "class": "Person", + "identifier": "https://orcid.org/0000-0003-3763-0797", + "name": "Matthias Bernt" + }, + { + "class": "Person", + "identifier": "https://orcid.org/0000-0003-0031-1565", + "name": "Felipe Borim Corr\u00eaa" + }, + { + "class": "Person", + "identifier": "https://orcid.org/0000-0001-8251-1960", + "name": "Joao Pedro Saraiva" + }, + { + "class": "Person", + "identifier": "https://orcid.org/0000-0002-5652-3666", + "name": "Rodolfo Brizola Toscan" + }, + { + "class": "Organization", + "name": "Helmholtz-Zentrum f\u00fcr Umweltforschung GmbH - UFZ", + "url": "https://www.ufz.de/" + } + ], + "format-version": "0.1", + "license": "MIT", + "name": "PHI Toolkit", + "readme": "Profiling of bacteriophages is essential in expanding our knowledge of phage-host dynamics. However, current workflows focus solely on the identification and annotation of genes or prediction of phage hosts independently. Here we present the Phage Interaction Toolkit (PHI) which is designed to perform an automated and comprehensive identification and profiling of phages within host genomes. This workflow integrates state-of-the-art phage identification tools with host interaction analysis tools.\n\nGenomad: Detection and annotation of mobile genetic elements including prophages.\nCheckM2: Assess bacterial genome quality (completeness and contamination).\nGTDB-tk: Assignment of taxonomy to bacterial genomes.\nDefenseFinder: Scan genomes for known bacterial defense mechanisms such as restriction-modification and CRISPR. \nVibrant: Identification, annotation and curation of phage sequences in genomes. Also produces gene calls, annotations and quality assessment.\nCheckV: Estimation of genome completeness, contamination of putative viral contigs.\nPhatyp: Phage lifestyle prediction (e.g., temperate vs. virulent) based on clustering of genomic signatures.\niPHoP: Prediction of phage-host interactions based on signals such as sequence similarity, CRISPR matches and tRNA matches. Identification of potential host taxa.\ndRep Compare: Dereplication and comparison of viral genomes. \nABRIcate: Screen of contigs for antimicrobial resistance or virulence genes.", + "report": { + "markdown": "\n# Workflow Execution Report\n\n## Workflow Inputs\n```galaxy\ninvocation_inputs()\n```\n\n## Workflow Outputs\n```galaxy\ninvocation_outputs()\n```\n\n## Workflow\n```galaxy\nworkflow_display()\n```\n" + }, + "steps": { + "0": { + "annotation": "Input host contigs organized in a collection", + "content_id": null, + "errors": null, + "id": 0, + "input_connections": {}, + "inputs": [ + { + "description": "Input host contigs organized in a collection", + "name": "Host contigs" + } + ], + "label": "Host contigs", + "name": "Input dataset collection", + "outputs": [], + "position": { + "left": 7.0274587653642016, + "top": 611.3096425492315 + }, + "tool_id": null, + "tool_state": "{\"optional\": false, \"format\": [\"fasta\"], \"tag\": null, \"collection_type\": \"list\", \"fields\": null}", + "tool_version": null, + "type": "data_collection_input", + "uuid": "b4019bea-b90b-4140-a1bb-af25832c0f8f", + "when": null, + "workflow_outputs": [] + }, + "1": { + "annotation": "The GTDB-Tk database version to use for the workflow", + "content_id": null, + "errors": null, + "id": 1, + "input_connections": {}, + "inputs": [ + { + "description": "The GTDB-Tk database version to use for the workflow", + "name": "GTDB-Tk database" + } + ], + "label": "GTDB-Tk database", + "name": "Input parameter", + "outputs": [], + "position": { + "left": 4.755533854166629, + "top": 709.0070553989281 + }, + "tool_id": null, + "tool_state": "{\"multiple\": false, \"validators\": [], \"restrictOnConnections\": true, \"parameter_type\": \"text\", \"optional\": false}", + "tool_version": null, + "type": "parameter_input", + "uuid": "bce0b9ed-1816-48eb-a489-5fcada9baa8c", + "when": null, + "workflow_outputs": [ + { + "label": null, + "output_name": "output", + "uuid": "3c24db5e-3427-475f-b226-39b7a3b0db47" + } + ] + }, + "2": { + "annotation": "geNomad is free to use for internal use, research & development, non-commercial use, purposes only.\n", + "content_id": null, + "errors": null, + "id": 2, + "input_connections": {}, + "inputs": [ + { + "description": "geNomad is free to use for internal use, research & development, non-commercial use, purposes only.\n", + "name": "Agree with genomad license" + } + ], + "label": "Agree with genomad license", + "name": "Input parameter", + "outputs": [], + "position": { + "left": 4.666646321614564, + "top": 804.9181678663759 + }, + "tool_id": null, + "tool_state": "{\"validators\": [], \"parameter_type\": \"boolean\", \"optional\": false}", + "tool_version": null, + "type": "parameter_input", + "uuid": "9e1a00c1-bc45-424f-8df9-346d97ee11a1", + "when": null, + "workflow_outputs": [ + { + "label": null, + "output_name": "output", + "uuid": "494d5bb6-011c-426b-b5ef-d9cbf78c58f8" + } + ] + }, + "3": { + "annotation": "Create number 2 to be used a parameter", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/compose_text_param/compose_text_param/0.1.1", + "errors": null, + "id": 3, + "input_connections": {}, + "inputs": [], + "label": "Set number of genomes for filter", + "name": "Compose text parameter value", + "outputs": [ + { + "name": "out1", + "type": "expression.json" + } + ], + "position": { + "left": 0.0, + "top": 904.1848304640322 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/compose_text_param/compose_text_param/0.1.1", + "tool_shed_repository": { + "changeset_revision": "e188c9826e0f", + "name": "compose_text_param", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"components\": [{\"__index__\": 0, \"param_type\": {\"select_param_type\": \"text\", \"__current_case__\": 0, \"component_value\": \"2\"}}], \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "0.1.1", + "type": "tool", + "uuid": "8d21c98a-e609-436e-aef3-de4d2df3fa1f", + "when": null, + "workflow_outputs": [] + }, + "4": { + "annotation": "Assess bacterial genome quality, i.e. completeness and contamination", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/checkm2/checkm2/1.0.2+galaxy1", + "errors": null, + "id": 4, + "input_connections": { + "input": { + "id": 0, + "output_name": "output" + } + }, + "inputs": [], + "label": null, + "name": "checkm2", + "outputs": [ + { + "name": "protein_files", + "type": "input" + }, + { + "name": "diamond_files", + "type": "input" + }, + { + "name": "quality", + "type": "tabular" + } + ], + "position": { + "left": 598.0932444912758, + "top": 58.059059940922 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/checkm2/checkm2/1.0.2+galaxy1", + "tool_shed_repository": { + "changeset_revision": "66acaec9f386", + "name": "checkm2", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"__workflow_invocation_uuid__\": \"29f73c3e201611f0aa6f6c92cf0cf01e\", \"chromInfo\": \"/gpfs1/data/galaxy_server/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"database\": \"1.0.2\", \"genes\": false, \"input\": {\"__class__\": \"ConnectedValue\"}, \"model\": \"\", \"ttable\": null, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "1.0.2+galaxy1", + "type": "tool", + "uuid": "4de2140e-1069-4340-b641-cf75eeca19cc", + "when": null, + "workflow_outputs": [] + }, + "5": { + "annotation": "Scan genomes for known bacterial defense mechanisms such as restriction-modification and CRISPR", + "content_id": "toolshed.g2.bx.psu.edu/repos/rplanel/defense_finder/defense_finder/2.0.1+galaxy1", + "errors": null, + "id": 5, + "input_connections": { + "genome": { + "id": 0, + "output_name": "output" + } + }, + "inputs": [], + "label": null, + "name": "DefenseFinder", + "outputs": [ + { + "name": "genes", + "type": "tabular" + }, + { + "name": "hmmer", + "type": "tabular" + }, + { + "name": "systems", + "type": "tabular" + }, + { + "name": "proteins", + "type": "fasta" + } + ], + "position": { + "left": 598.446693532469, + "top": 229.82678602993713 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/rplanel/defense_finder/defense_finder/2.0.1+galaxy1", + "tool_shed_repository": { + "changeset_revision": "71e221c4906c", + "name": "defense_finder", + "owner": "rplanel", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"antidefensefinder\": false, \"antidefensefinder_only\": false, \"coverage\": \"0.4\", \"db_type\": \"ordered_replicon\", \"genome\": {\"__class__\": \"ConnectedValue\"}, \"models\": \"defense-finder-models_2.0.2\", \"no_cut_ga\": false, \"preserve_raw\": false, \"__page__\": 0, \"__rerun_remap_job_id__\": null}", + "tool_version": "2.0.1+galaxy1", + "type": "tool", + "uuid": "6f71ebbd-5c24-409d-a08a-c621cabe6983", + "when": null, + "workflow_outputs": [] + }, + "6": { + "annotation": "Assignment of taxonomy to bacterial genomes", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/gtdbtk_classify_wf/gtdbtk_classify_wf/2.4.1+galaxy0", + "errors": null, + "id": 6, + "input_connections": { + "gtdbtk_db": { + "id": 1, + "output_name": "output" + }, + "input": { + "id": 0, + "output_name": "output" + } + }, + "inputs": [], + "label": null, + "name": "GTDB-Tk Classify genomes", + "outputs": [ + { + "name": "output_align", + "type": "input" + }, + { + "name": "output_identfy", + "type": "input" + }, + { + "name": "output_classify", + "type": "input" + }, + { + "name": "output_summary", + "type": "input" + } + ], + "position": { + "left": 838.699194124325, + "top": 59.52376497465779 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/gtdbtk_classify_wf/gtdbtk_classify_wf/2.4.1+galaxy0", + "tool_shed_repository": { + "changeset_revision": "30a4bdf88e14", + "name": "gtdbtk_classify_wf", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"advanced\": {\"min_perc_aa\": \"10\", \"force\": false, \"min_af\": \"0.65\", \"output_process_log\": false}, \"gtdbtk_db\": {\"__class__\": \"ConnectedValue\"}, \"input\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": 0, \"__rerun_remap_job_id__\": null}", + "tool_version": "2.4.1+galaxy0", + "type": "tool", + "uuid": "649d0e6f-03c8-4bf1-b8dd-b73967e438fc", + "when": null, + "workflow_outputs": [] + }, + "7": { + "annotation": "Detection and annotation of mobile genetic elements including prophages", + "content_id": "toolshed.g2.bx.psu.edu/repos/ufz/genomad_end_to_end/genomad_end_to_end/1.11.1+galaxy0", + "errors": null, + "id": 7, + "input_connections": { + "INPUT": { + "id": 0, + "output_name": "output" + }, + "license": { + "id": 2, + "output_name": "output" + } + }, + "inputs": [], + "label": null, + "name": "geNomad", + "outputs": [ + { + "name": "summary_plasmid_fna", + "type": "fasta" + }, + { + "name": "summary_plasmid_genes", + "type": "tabular" + }, + { + "name": "summary_plasmid_proteins", + "type": "fasta" + }, + { + "name": "summary_plasmid_summary", + "type": "tabular" + }, + { + "name": "summary_virus_fna", + "type": "fasta" + }, + { + "name": "summary_virus_genes", + "type": "tabular" + }, + { + "name": "summary_virus_proteins", + "type": "fasta" + }, + { + "name": "summary_virus_summary", + "type": "tabular" + } + ], + "position": { + "left": 320.5175608975258, + "top": 65.0287661583699 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/ufz/genomad_end_to_end/genomad_end_to_end/1.11.1+galaxy0", + "tool_shed_repository": { + "changeset_revision": "26f5822743aa", + "name": "genomad_end_to_end", + "owner": "ufz", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"DATABASE\": \"1.9\", \"INPUT\": {\"__class__\": \"ConnectedValue\"}, \"annotation\": {\"lenient_taxonomy\": false, \"full_ictv_lineage\": false, \"sensitivity\": \"4.2\", \"splits\": \"0\"}, \"basic\": {\"disable_find_proviruses\": true, \"disable_nn_classification\": true, \"enable_score_calibration\": false}, \"filter_cond\": {\"filtering_preset\": \"--conservative\", \"__current_case__\": 0}, \"license\": {\"__class__\": \"ConnectedValue\"}, \"provirus\": {\"skip_integrase_identification\": false, \"skip_trna_identification\": false}, \"score\": {\"composition\": \"auto\", \"force_auto\": false}, \"__page__\": 0, \"__rerun_remap_job_id__\": null}", + "tool_version": "1.11.1+galaxy0", + "type": "tool", + "uuid": "0ebcc645-737f-462f-a6e3-1ae0020a45c9", + "when": null, + "workflow_outputs": [] + }, + "8": { + "annotation": "Get the 1st dataset", + "content_id": "__EXTRACT_DATASET__", + "errors": null, + "id": 8, + "input_connections": { + "input": { + "id": 6, + "output_name": "output_summary" + } + }, + "inputs": [], + "label": null, + "name": "Extract dataset", + "outputs": [ + { + "name": "output", + "type": "data" + } + ], + "position": { + "left": 1139.4608083747391, + "top": 61.04291891641901 + }, + "post_job_actions": { + "ChangeDatatypeActionoutput": { + "action_arguments": { + "newtype": "tsv" + }, + "action_type": "ChangeDatatypeAction", + "output_name": "output" + } + }, + "tool_id": "__EXTRACT_DATASET__", + "tool_state": "{\"input\": {\"__class__\": \"ConnectedValue\"}, \"which\": {\"which_dataset\": \"first\", \"__current_case__\": 0}, \"__page__\": 0, \"__rerun_remap_job_id__\": null}", + "tool_version": "1.0.2", + "type": "tool", + "uuid": "43613527-5917-4327-b059-05044daf1dbf", + "when": null, + "workflow_outputs": [] + }, + "9": { + "annotation": "Remove empty sequences, i.e. samples where no phages were detected", + "content_id": "__FILTER_EMPTY_DATASETS__", + "errors": null, + "id": 9, + "input_connections": { + "input": { + "id": 7, + "output_name": "summary_virus_fna" + } + }, + "inputs": [], + "label": null, + "name": "Filter empty datasets", + "outputs": [ + { + "name": "output", + "type": "input" + } + ], + "position": { + "left": 553.6953845274754, + "top": 842.6358080791746 + }, + "post_job_actions": {}, + "tool_id": "__FILTER_EMPTY_DATASETS__", + "tool_state": "{\"input\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "1.0.0", + "type": "tool", + "uuid": "df8a3986-0154-495a-b945-7c470d032c74", + "when": null, + "workflow_outputs": [] + }, + "10": { + "annotation": "Remove FASTA files containing less than 2 sequences", + "id": 10, + "input_connections": { + "Collection of FASTA datasets": { + "id": 7, + "input_subworkflow_step_id": 1, + "output_name": "summary_virus_fna" + }, + "Number of sequences": { + "id": 3, + "input_subworkflow_step_id": 0, + "output_name": "out1" + } + }, + "inputs": [], + "label": null, + "name": "Filter FASTA by number of sequences", + "outputs": [], + "position": { + "left": 351.65095093372537, + "top": 1018.258000461987 + }, + "subworkflow": { + "a_galaxy_workflow": "true", + "annotation": "", + "comments": [], + "creator": [ + { + "class": "Organization", + "name": "Matthias Bernt", + "url": "https://orcid.org/0000-0003-3763-0797" + } + ], + "format-version": "0.1", + "license": "MIT", + "name": "Filter FASTA by number of sequences", + "report": { + "markdown": "\n# Workflow Execution Report\n\n## Workflow Inputs\n```galaxy\ninvocation_inputs()\n```\n\n## Workflow Outputs\n```galaxy\ninvocation_outputs()\n```\n\n## Workflow\n```galaxy\nworkflow_display()\n```\n" + }, + "steps": { + "0": { + "annotation": "Minimum number of sequences", + "content_id": null, + "errors": null, + "id": 0, + "input_connections": {}, + "inputs": [ + { + "description": "Minimum number of sequences", + "name": "Number of sequences" + } + ], + "label": "Number of sequences", + "name": "Input parameter", + "outputs": [], + "position": { + "left": 0.0, + "top": 0.0 + }, + "tool_id": null, + "tool_state": "{\"validators\": [], \"parameter_type\": \"text\", \"optional\": false}", + "tool_version": null, + "type": "parameter_input", + "uuid": "5647a2b2-e9c7-422f-8922-383430aa3982", + "when": null, + "workflow_outputs": [] + }, + "1": { + "annotation": "Collection of FASTA datasets", + "content_id": null, + "errors": null, + "id": 1, + "input_connections": {}, + "inputs": [ + { + "description": "Collection of FASTA datasets", + "name": "Collection of FASTA datasets" + } + ], + "label": "Collection of FASTA datasets", + "name": "Input dataset collection", + "outputs": [], + "position": { + "left": 6.903015792653667, + "top": 194.76141058812067 + }, + "tool_id": null, + "tool_state": "{\"optional\": false, \"format\": [\"fasta\"], \"tag\": null, \"collection_type\": \"list\", \"fields\": null}", + "tool_version": null, + "type": "data_collection_input", + "uuid": "a68dc166-0d7d-41b4-b74d-dd6605ac9776", + "when": null, + "workflow_outputs": [] + }, + "2": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_awk_tool/9.5+galaxy2", + "errors": null, + "id": 2, + "input_connections": { + "infile": { + "id": 1, + "output_name": "output" + }, + "variables_0|value": { + "id": 0, + "output_name": "output" + } + }, + "inputs": [], + "label": null, + "name": "Text reformatting", + "outputs": [ + { + "name": "outfile", + "type": "input" + } + ], + "position": { + "left": 287.7728443960977, + "top": 14.960955148694826 + }, + "post_job_actions": { + "DeleteIntermediatesActionoutfile": { + "action_arguments": {}, + "action_type": "DeleteIntermediatesAction", + "output_name": "outfile" + }, + "HideDatasetActionoutfile": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "outfile" + } + }, + "tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_awk_tool/9.5+galaxy2", + "tool_shed_repository": { + "changeset_revision": "c41d78ae5fee", + "name": "text_processing", + "owner": "bgruening", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"code\": \"BEGIN {\\n count = 0\\n buffering = 1\\n}\\n{\\n if (buffering) {\\n buf[++bufline] = $0\\n } else {\\n print\\n }\\n\\n if ($0 ~ /^>/) {\\n count++\\n if (count == VAR1) {\\n # Print buffered lines\\n for (i = 1; i <= bufline; i++) print buf[i]\\n buffering = 0\\n delete buf\\n }\\n }\\n}\", \"infile\": {\"__class__\": \"ConnectedValue\"}, \"variables\": [{\"__index__\": 0, \"value\": {\"__class__\": \"ConnectedValue\"}}], \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "9.5+galaxy2", + "type": "tool", + "uuid": "93064a0b-9fc8-45c5-81de-4a49d8c0dbf4", + "when": null, + "workflow_outputs": [] + }, + "3": { + "annotation": "", + "content_id": "__FILTER_EMPTY_DATASETS__", + "errors": null, + "id": 3, + "input_connections": { + "input": { + "id": 2, + "output_name": "outfile" + } + }, + "inputs": [], + "label": null, + "name": "Filter empty datasets", + "outputs": [ + { + "name": "output", + "type": "input" + } + ], + "position": { + "left": 520.0311081979519, + "top": 217.04037930788962 + }, + "post_job_actions": { + "DeleteIntermediatesActionoutput": { + "action_arguments": {}, + "action_type": "DeleteIntermediatesAction", + "output_name": "output" + }, + "HideDatasetActionoutput": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "output" + } + }, + "tool_id": "__FILTER_EMPTY_DATASETS__", + "tool_state": "{\"input\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "1.0.0", + "type": "tool", + "uuid": "6aee9217-c24b-4c1f-bf37-165aa17c349e", + "when": null, + "workflow_outputs": [] + }, + "4": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/collection_element_identifiers/collection_element_identifiers/0.0.2", + "errors": null, + "id": 4, + "input_connections": { + "input_collection": { + "id": 3, + "output_name": "output" + } + }, + "inputs": [], + "label": null, + "name": "Extract element identifiers", + "outputs": [ + { + "name": "output", + "type": "txt" + } + ], + "position": { + "left": 806.1282025704038, + "top": 193.67056945375833 + }, + "post_job_actions": { + "DeleteIntermediatesActionoutput": { + "action_arguments": {}, + "action_type": "DeleteIntermediatesAction", + "output_name": "output" + }, + "HideDatasetActionoutput": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "output" + } + }, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/collection_element_identifiers/collection_element_identifiers/0.0.2", + "tool_shed_repository": { + "changeset_revision": "d3c07d270a50", + "name": "collection_element_identifiers", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"input_collection\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "0.0.2", + "type": "tool", + "uuid": "199d4a54-3237-47bd-b3d3-cd15a941a1bb", + "when": null, + "workflow_outputs": [] + }, + "5": { + "annotation": "", + "content_id": "__FILTER_FROM_FILE__", + "errors": null, + "id": 5, + "input_connections": { + "how|filter_source": { + "id": 4, + "output_name": "output" + }, + "input": { + "id": 1, + "output_name": "output" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool Filter collection", + "name": "how" + } + ], + "label": null, + "name": "Filter collection", + "outputs": [ + { + "name": "output_filtered", + "type": "input" + }, + { + "name": "output_discarded", + "type": "input" + } + ], + "position": { + "left": 1100.4078905199567, + "top": 21.087434497079062 + }, + "post_job_actions": { + "DeleteIntermediatesActionoutput_filtered": { + "action_arguments": {}, + "action_type": "DeleteIntermediatesAction", + "output_name": "output_filtered" + }, + "HideDatasetActionoutput_discarded": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "output_discarded" + }, + "RenameDatasetActionoutput_filtered": { + "action_arguments": { + "newname": "Filtered FASTA datasets" + }, + "action_type": "RenameDatasetAction", + "output_name": "output_filtered" + } + }, + "tool_id": "__FILTER_FROM_FILE__", + "tool_state": "{\"how\": {\"how_filter\": \"remove_if_absent\", \"__current_case__\": 0, \"filter_source\": {\"__class__\": \"ConnectedValue\"}}, \"input\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "1.0.0", + "type": "tool", + "uuid": "5df035a6-2921-44de-aa92-21fceb9931f3", + "when": null, + "workflow_outputs": [ + { + "label": "Filtered FASTA datasets", + "output_name": "output_filtered", + "uuid": "804f2146-8c7d-423d-84db-a51c5877a77d" + } + ] + } + }, + "tags": [], + "uuid": "51875bf7-4156-4933-99f0-25d65892e5e2" + }, + "tool_id": null, + "type": "subworkflow", + "uuid": "cc95a5d6-657f-44a0-89c7-fd28f26cbdba", + "when": null, + "workflow_outputs": [] + }, + "11": { + "annotation": "Estimation of genome completeness, contamination of putative viral contigs", + "content_id": "toolshed.g2.bx.psu.edu/repos/ufz/checkv_end_to_end/checkv_end_to_end/1.0.3+galaxy0", + "errors": null, + "id": 11, + "input_connections": { + "input": { + "id": 9, + "output_name": "output" + } + }, + "inputs": [], + "label": null, + "name": "CheckV end to end", + "outputs": [ + { + "name": "quality_summary", + "type": "tabular" + }, + { + "name": "complete_genomes", + "type": "tabular" + }, + { + "name": "proviruses", + "type": "fasta" + }, + { + "name": "viruses", + "type": "fasta" + } + ], + "position": { + "left": 846.7563029351766, + "top": 786.0702681961383 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/ufz/checkv_end_to_end/checkv_end_to_end/1.0.3+galaxy0", + "tool_shed_repository": { + "changeset_revision": "e523d50513a4", + "name": "checkv_end_to_end", + "owner": "ufz", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"__workflow_invocation_uuid__\": \"29f73c3e201611f0aa6f6c92cf0cf01e\", \"chromInfo\": \"/gpfs1/data/galaxy_server/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"input\": {\"__class__\": \"ConnectedValue\"}, \"optional_outputs\": null, \"reference\": \"1.5\", \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "1.0.3+galaxy0", + "type": "tool", + "uuid": "7992edfd-89f9-4ceb-8fbf-d2b477a9b2f6", + "when": null, + "workflow_outputs": [] + }, + "12": { + "annotation": "Prediction of phage-host interactions based on signals such as sequence similarity, CRISPR matches and tRNA matches. Identification of potential host taxa", + "content_id": "toolshed.g2.bx.psu.edu/repos/ufz/iphop_predict/iphop_predict/1.3.3+galaxy0", + "errors": null, + "id": 12, + "input_connections": { + "fa_file": { + "id": 9, + "output_name": "output" + } + }, + "inputs": [], + "label": null, + "name": "iPHoP predict", + "outputs": [ + { + "name": "detailed_per_tool", + "type": "csv" + }, + { + "name": "host_genome", + "type": "csv" + }, + { + "name": "host_genus", + "type": "csv" + } + ], + "position": { + "left": 1402.8197413075973, + "top": 925.1837443139221 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/ufz/iphop_predict/iphop_predict/1.3.3+galaxy0", + "tool_shed_repository": { + "changeset_revision": "d357350b6da0", + "name": "iphop_predict", + "owner": "ufz", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"__workflow_invocation_uuid__\": \"29f73c3e201611f0aa6f6c92cf0cf01e\", \"chromInfo\": \"/gpfs1/data/galaxy_server/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"db_dir\": \"iPHoP_db_Aug23_rw\", \"fa_file\": {\"__class__\": \"ConnectedValue\"}, \"min_score\": \"90\", \"no_qc\": false, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "1.3.3+galaxy0", + "type": "tool", + "uuid": "86c93eaf-0ca9-4bdb-931d-9f426301c6d9", + "when": null, + "workflow_outputs": [] + }, + "13": { + "annotation": "Screen of contigs for antimicrobial resistance or virulence genes", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/abricate/abricate/1.0.1", + "errors": null, + "id": 13, + "input_connections": { + "file_input": { + "id": 9, + "output_name": "output" + } + }, + "inputs": [], + "label": null, + "name": "ABRicate", + "outputs": [ + { + "name": "report", + "type": "tabular" + } + ], + "position": { + "left": 1408.7191471285869, + "top": 1157.8636521814192 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/abricate/abricate/1.0.1", + "tool_shed_repository": { + "changeset_revision": "3f3e247c053d", + "name": "abricate", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"adv\": {\"db\": \"vfdb\", \"no_header\": false, \"min_dna_id\": \"80.0\", \"min_cov\": \"80.0\"}, \"file_input\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "1.0.1", + "type": "tool", + "uuid": "f80b89b4-27c1-4c8c-a955-1ec139cd4461", + "when": null, + "workflow_outputs": [] + }, + "14": { + "annotation": "Identification, annotation and curation of phage sequences in genomes. Also produces gene calls, annotations and quality assessment", + "content_id": "toolshed.g2.bx.psu.edu/repos/ufz/vibrant/vibrant/1.2.1+galaxy2", + "errors": null, + "id": 14, + "input_connections": { + "input": { + "id": 9, + "output_name": "output" + } + }, + "inputs": [], + "label": null, + "name": "VIBRANT", + "outputs": [ + { + "name": "figures", + "type": "input" + }, + { + "name": "phages_combined_fna", + "type": "fasta" + }, + { + "name": "phages_combined_gbk", + "type": "genbank" + }, + { + "name": "phages_combined_txt", + "type": "txt" + }, + { + "name": "AMG_individuals", + "type": "tabular" + } + ], + "position": { + "left": 1681.0607839606766, + "top": 1074.3096425492315 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/ufz/vibrant/vibrant/1.2.1+galaxy2", + "tool_shed_repository": { + "changeset_revision": "e087d0bccff4", + "name": "vibrant", + "owner": "ufz", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"database\": \"1.2.1\", \"input\": {\"__class__\": \"ConnectedValue\"}, \"l\": \"1000\", \"o\": \"4\", \"outputs\": [\"figures\", \"phages_combined_fna\", \"phages_combined_gbk\", \"phages_combined_txt\", \"AMG_individuals\"], \"protein\": false, \"virome\": false, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "1.2.1+galaxy2", + "type": "tool", + "uuid": "ef992853-6df2-429a-a02d-0af8bb968f4f", + "when": null, + "workflow_outputs": [] + }, + "15": { + "annotation": "Split multi-sequence FASTA file into a collection of single-sequence FASTA files", + "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/split_file_to_collection/split_file_to_collection/0.5.2", + "errors": null, + "id": 15, + "input_connections": { + "split_parms|input": { + "id": 10, + "output_name": "Filtered FASTA datasets" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool Split file", + "name": "split_parms" + } + ], + "label": null, + "name": "Split file", + "outputs": [ + { + "name": "list_output_fasta", + "type": "input" + } + ], + "position": { + "left": 589.2065336160172, + "top": 1082.3469083395908 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/split_file_to_collection/split_file_to_collection/0.5.2", + "tool_shed_repository": { + "changeset_revision": "2dae863c8f42", + "name": "split_file_to_collection", + "owner": "bgruening", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"split_parms\": {\"select_ftype\": \"fasta\", \"__current_case__\": 3, \"input\": {\"__class__\": \"ConnectedValue\"}, \"select_mode\": {\"mode\": \"chunk\", \"__current_case__\": 0, \"chunksize\": \"1\"}, \"newfilenames\": \"sequence\", \"select_allocate\": {\"allocate\": \"batch\", \"__current_case__\": 1}}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "0.5.2", + "type": "tool", + "uuid": "7f0aafe6-c875-4408-8a59-b0f6b84ff916", + "when": null, + "workflow_outputs": [] + }, + "16": { + "annotation": "Compare: Dereplication and comparison of viral genomes", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/drep_compare/drep_compare/3.6.2+galaxy1", + "errors": null, + "id": 16, + "input_connections": { + "genomes": { + "id": 15, + "output_name": "list_output_fasta" + } + }, + "inputs": [], + "label": null, + "name": "dRep compare", + "outputs": [ + { + "name": "log", + "type": "txt" + }, + { + "name": "warnings", + "type": "txt" + }, + { + "name": "Primary_clustering_dendrogram", + "type": "pdf" + }, + { + "name": "Clustering_scatterplots", + "type": "pdf" + }, + { + "name": "Cdb", + "type": "csv" + } + ], + "position": { + "left": 836.1287307341365, + "top": 1075.401993674464 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/drep_compare/drep_compare/3.6.2+galaxy1", + "tool_shed_repository": { + "changeset_revision": "f0334664a47b", + "name": "drep_compare", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"comp_clust\": {\"steps\": {\"select\": \"default\", \"__current_case__\": 0, \"MASH_sketch\": \"1000\", \"P_ani\": \"0.9\", \"multiround_primary_clustering\": false, \"primary_chunksize\": \"5000\", \"clustering\": {\"S_algorithm\": \"ANImf\", \"__current_case__\": 1, \"n_PRESET\": \"normal\", \"coverage_method\": \"larger\"}, \"S_ani\": \"0.95\", \"cov_thresh\": \"0.1\"}, \"clusterAlg\": \"average\", \"run_tertiary_clustering\": false}, \"genomes\": {\"__class__\": \"ConnectedValue\"}, \"select_outputs\": [\"log\", \"warnings\", \"Primary_clustering_dendrogram\", \"Clustering_scatterplots\", \"Cdb\"], \"warning\": {\"warn_dist\": \"0.25\", \"warn_sim\": \"0.98\", \"warn_aln\": \"0.25\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "3.6.2+galaxy1", + "type": "tool", + "uuid": "0c554f0a-22bb-4dbc-abd1-6524c36229a3", + "when": null, + "workflow_outputs": [] + }, + "17": { + "annotation": "Restructure collection", + "content_id": "__APPLY_RULES__", + "errors": null, + "id": 17, + "input_connections": { + "input": { + "id": 16, + "output_name": "Cdb" + } + }, + "inputs": [], + "label": null, + "name": "Apply rules", + "outputs": [ + { + "name": "output", + "type": "input" + } + ], + "position": { + "left": 1067.420641230793, + "top": 1288.6919555524382 + }, + "post_job_actions": {}, + "tool_id": "__APPLY_RULES__", + "tool_state": "{\"input\": {\"__class__\": \"ConnectedValue\"}, \"rules\": {\"mapping\": [{\"columns\": [0], \"editing\": false, \"type\": \"list_identifiers\"}], \"rules\": [{\"error\": null, \"type\": \"add_column_metadata\", \"value\": \"identifier0\", \"warn\": null}]}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "1.1.0", + "type": "tool", + "uuid": "a54b5b42-4595-4173-be69-8ab3bc1c367a", + "when": null, + "workflow_outputs": [] + }, + "18": { + "annotation": "Create PHI report", + "content_id": "toolshed.g2.bx.psu.edu/repos/ufz/phi_toolkit_report/phi_toolkit_report/0.2.0+galaxy0", + "errors": null, + "id": 18, + "input_connections": { + "abricate": { + "id": 13, + "output_name": "report" + }, + "checkm": { + "id": 4, + "output_name": "quality" + }, + "checkv": { + "id": 11, + "output_name": "quality_summary" + }, + "defense_finder": { + "id": 5, + "output_name": "systems" + }, + "drep_compare": { + "id": 17, + "output_name": "output" + }, + "drep_compare_clustering_dendrogram": { + "id": 16, + "output_name": "Primary_clustering_dendrogram" + }, + "genomad": { + "id": 7, + "output_name": "summary_virus_summary" + }, + "genomad_annotations": { + "id": 7, + "output_name": "summary_virus_genes" + }, + "genomad_phages": { + "id": 7, + "output_name": "summary_virus_fna" + }, + "genomes": { + "id": 0, + "output_name": "output" + }, + "gtdb_summary": { + "id": 8, + "output_name": "output" + }, + "iphop": { + "id": 12, + "output_name": "host_genome" + }, + "vibrant": { + "id": 14, + "output_name": "AMG_individuals" + } + }, + "inputs": [], + "label": null, + "name": "PHI toolkit report", + "outputs": [ + { + "name": "report", + "type": "html" + } + ], + "position": { + "left": 1598.1942068122391, + "top": 92.34290670938776 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/ufz/phi_toolkit_report/phi_toolkit_report/0.2.0+galaxy0", + "tool_shed_repository": { + "changeset_revision": "3a7f73d638ba", + "name": "phi_toolkit_report", + "owner": "ufz", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"abricate\": {\"__class__\": \"ConnectedValue\"}, \"checkm\": {\"__class__\": \"ConnectedValue\"}, \"checkv\": {\"__class__\": \"ConnectedValue\"}, \"defense_finder\": {\"__class__\": \"ConnectedValue\"}, \"drep_compare\": {\"__class__\": \"ConnectedValue\"}, \"drep_compare_clustering_dendrogram\": {\"__class__\": \"ConnectedValue\"}, \"genomad\": {\"__class__\": \"ConnectedValue\"}, \"genomad_annotations\": {\"__class__\": \"ConnectedValue\"}, \"genomad_phages\": {\"__class__\": \"ConnectedValue\"}, \"genomes\": {\"__class__\": \"ConnectedValue\"}, \"gtdb_summary\": {\"__class__\": \"ConnectedValue\"}, \"iphop\": {\"__class__\": \"ConnectedValue\"}, \"vibrant\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "0.2.0+galaxy0", + "type": "tool", + "uuid": "d70fa775-c065-4a1d-8c1b-2019eff74436", + "when": null, + "workflow_outputs": [ + { + "label": "PHI toolkit report", + "output_name": "report", + "uuid": "e2a1e5b1-3cf1-4d57-bda5-e1092f00b45e" + } + ] + } + }, + "tags": [], + "uuid": "b9c6b899-e37e-4806-85f6-c655b712d479", + "version": 3 +} diff --git a/workflows/virology/phi-toolkit/README.md b/workflows/virology/phi-toolkit/README.md new file mode 100644 index 0000000000..e5c381798d --- /dev/null +++ b/workflows/virology/phi-toolkit/README.md @@ -0,0 +1,21 @@ +# PHI Toolkit: Phage Interaction Toolkit + +Profiling of bacteriophages is essential in expanding our knowledge of phage-host dynamics. +The Phage Interaction Toolkit (PHI) is designed to perform an automated and comprehensive identification and profiling of phages within host genomes. +This workflow integrates state-of-the-art phage identification tools with host interaction analysis tools. +The workflow creates a summary of the outputs of these tools allowing for a convenient analysis of host phage interactions: + +Tools analysing host: + +Genomad: Detection and annotation of mobile genetic elements including prophages. +CheckM2: Assess bacterial genome quality (completeness and contamination). +GTDB-tk: Assignment of taxonomy to bacterial genomes. +DefenseFinder: Scan genomes for known bacterial defense mechanisms such as restriction-modification and CRISPR. + +Tools analysing phages that were identified by Genomad: + +Vibrant: Identification, annotation and curation of phage sequences in genomes. Also produces gene calls, annotations and quality assessment. +CheckV: Estimation of genome completeness, contamination of putative viral contigs. +iPHoP: Prediction of phage-host interactions based on signals such as sequence similarity, CRISPR matches and tRNA matches. Identification of potential host taxa. +dRep Compare: Dereplication and comparison of viral genomes. +ABRIcate: Screen of contigs for antimicrobial resistance or virulence genes. \ No newline at end of file