sequana
diff --git a/‎.github/workflows/apptainer.yml‎
Lines changed: 60 additions & 20 deletions b/‎.github/workflows/apptainer.yml‎
Lines changed: 60 additions & 20 deletions
diff --git a/‎.github/workflows/main.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/main.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/pypi.yml‎
Lines changed: 4 additions & 4 deletions b/‎.github/workflows/pypi.yml‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎README.rst‎
Lines changed: 12 additions & 8 deletions b/‎README.rst‎
Lines changed: 12 additions & 8 deletions
diff --git a/‎environment.yml‎
Lines changed: 5 additions & 2 deletions b/‎environment.yml‎
Lines changed: 5 additions & 2 deletions
diff --git a/‎pyproject.toml‎
Lines changed: 3 additions & 3 deletions b/‎pyproject.toml‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎sequana_pipelines/variant_calling/config.yaml‎
Lines changed: 20 additions & 13 deletions b/‎sequana_pipelines/variant_calling/config.yaml‎
Lines changed: 20 additions & 13 deletions
diff --git a/‎sequana_pipelines/variant_calling/main.py‎
Lines changed: 4 additions & 8 deletions b/‎sequana_pipelines/variant_calling/main.py‎
Lines changed: 4 additions & 8 deletions
@@ -6,53 +6,93 @@ on:
       - main
       - dev
   pull_request:
-    branches-ignore: []
+  workflow_dispatch:
   schedule:
-    - cron: '0 0 2 * *'
+    - cron: '0 0 20 * *'
 
 jobs:
   build-linux:
     runs-on: ubuntu-latest
+
     strategy:
-      max-parallel: 5
       matrix:
-        python: [3.8, '3.10']
+        python: ['3.10', '3.11']
       fail-fast: false
-
+      max-parallel: 5
 
     steps:
 
-    - name: precleanup
+    # Clean up unnecessary preinstalled packages to free disk space
+    - name: Pre-cleanup
       run: |
         sudo rm -rf /usr/share/dotnet
         sudo rm -rf "$AGENT_TOOLSDIRECTORY"
-    - name: install graphviz
+
+    # Cache APT .deb packages
+    - name: Cache APT archives
+      uses: actions/cache@v3
+      with:
+        path: /var/cache/apt/archives
+        key: ${{ runner.os }}-apt-cache-v1
+
+    # Cache Apptainer installation
+    - name: Cache Apptainer install
+      id: cache-apptainer
+      uses: actions/cache@v3
+      with:
+        path: |
+          /usr/bin/apptainer
+          /usr/lib/apptainer
+          /etc/apptainer
+        key: ${{ runner.os }}-apptainer-v1
+
+    # Install Apptainer only if not cached
+    - name: Install Apptainer
+      if: steps.cache-apptainer.outputs.cache-hit != 'true'
       run: |
-        sudo apt update
-        sudo apt-get install -y graphviz software-properties-common
+        sudo apt-get update
+        sudo apt-get install -y software-properties-common
         sudo add-apt-repository -y ppa:apptainer/ppa
-        sudo apt update
-        sudo apt install -y apptainer
+        sudo apt-get update
+        sudo apt-get install -y apptainer
+
+    # Cache Apptainer image cache (~/.apptainer/cache)
+    - name: Cache Apptainer images
+      uses: actions/cache@v3
+      with:
+        path: ~/.apptainer/cache
+        key: ${{ runner.os }}-apptainer-images-v1
 
-    - name: checkout git repo
-      uses: actions/checkout@v3
+    # Checkout repository
+    - name: Checkout repo
+      uses: actions/checkout@v4
 
-    - name: Set up Python 3.X
-      uses: actions/setup-python@v3
+    # 🐍 Set up Python
+    - name: Set up Python ${{ matrix.python }}
+      uses: actions/setup-python@v5
       with:
         python-version: ${{ matrix.python }}
 
+    #  Install dependencies
     - name: Install dependencies
       run: |
+        python -m pip install --upgrade pip
         pip install .[testing]
 
-    - name: install package itself
+    # Install package and pinned dependency (example: pulp)
+    - name: Install package itself
       run: |
-         pip install .
-         pip install "pulp==2.7.0" --no-deps
+        pip install .
+        pip install "pulp==2.7.0" --no-deps
 
-    - name: testing
+    # Run tests using Apptainer
+    - name: Run Apptainer tests
       run: |
-        sequana_variant_calling --input-directory test/data/ --use-apptainer  --annotation-file test/data/JB409847.gbk --reference-file test/data/JB409847.fasta && cd variant_calling && sh variant_calling.sh
+        sequana_variant_calling \
+          --input-directory test/data/ \
+          --apptainer-prefix ~/.apptainer/cache \
+          --annotation-file test/data/JB409847.gbk \
+          --reference-file test/data/JB409847.fasta
 
+        cd variant_calling && bash variant_calling.sh
 
@@ -17,7 +17,7 @@ jobs:
     strategy:
       max-parallel: 5
       matrix:
-        python: [ 3.9, '3.10', '3.11']
+        python: [ '3.10', '3.11']
       fail-fast: false
 
 
 
@@ -11,14 +11,14 @@ jobs:
     runs-on: ubuntu-20.04
     steps:
     - uses: actions/checkout@main
-    - name: Set up Python 3.8
+    - name: Set up Python 3.9
       uses: actions/setup-python@v2
       with:
-        python-version: 3.8
+        python-version: 3.9
 
-    - name: Install package 
+    - name: Install package
       run: |
-          pip install build poetry
+          pip install build "poetry>=2"
 
     - name: Build source tarball
       run: |
 
@@ -9,9 +9,9 @@
 .. image:: https://github.com/sequana/variant_calling/actions/workflows/main.yml/badge.svg
    :target: https://github.com/sequana/variant_calling/actions
 
-.. image:: https://img.shields.io/badge/python-3.8%20%7C%203.9%20%7C3.10-blue.svg
+.. image:: https://img.shields.io/badge/python-3.10%20%7C%203.11%20%7C3.12-blue.svg
     :target: https://pypi.python.org/pypi/sequana
-    :alt: Python 3.8 | 3.9 | 3.10
+    :alt: Python 3.10 | 3.11 | 3.12
 
 This is the **variant_calling** pipeline from the `Sequana <https://sequana.readthedocs.org>`_ projet
 
@@ -152,7 +152,7 @@ and the reference genome with its annnotation::
 
 
 Initiate the pipeline::
- 
+
     sequana_variant_calling --input-directory . --reference-file ecoli.fa --aligner-choice bwa_split \
         --do-coverage --annotation-file ecoli.gff  \
         --use-apptainer --apptainer-prefix ~/.sequana/apptainers \ 
@@ -164,9 +164,7 @@ Explication:
 - we use the reference genome ecoli.fa (--reference-file) and its annotation for SNPeff (--annotation-file)
 - we use the sequana_coverage tool (True by default) to get coverage plots.
 - we use --input-directory to indicatre where to find the input files
-- This data set is paired. In NGS, it is common to have _R1_ and _R2_ tags to differentiate the 2 files. Here the tag
-are _1 and _2. In sequana we define the a wildcard for the read tag. So here we tell the software that thex ecpted tag
-follow this pattern: "_[12]." and everything is then automatic.
+- This data set is paired. In NGS, it is common to have _R1_ and _R2_ tags to differentiate the 2 files. Here the tags are `_1` and `_2`. In sequana we define the a wildcard for the read tag. So here we tell the software that thex expected tags follow this pattern: "_[12]." and everything is then automatic.
 
 Then follow the instructions (prepare and execute the pipeline).
 
@@ -175,11 +173,11 @@ You should end up with a summary.hml report.
 
 You can browse the different samples (only one in this example) and get a table with variant calls:
 
-    https://raw.githubusercontent.com/sequana/variant_calling/refs/heads/main/doc/table.png
+.. image:: https://raw.githubusercontent.com/sequana/variant_calling/refs/heads/main/doc/table.png
 
 If you set the coverage one, (not recommended for eukaryotes), you should see this kind of plots:
 
-    https://raw.githubusercontent.com/sequana/variant_calling/refs/heads/main/doc/coverage.png
+.. image:: https://raw.githubusercontent.com/sequana/variant_calling/refs/heads/main/doc/coverage.png
 
 
 
@@ -191,6 +189,12 @@ Changelog
 ========= ======================================================================
 Version   Description
 ========= ======================================================================
+1.4.0     * handles long reads data. Use sequana html_report to create the VCF
+            html reports instead of wrapper. More dynamic. Updated some 
+            containers, in particular for sequana_coverage.
+          * Fixed regression in bwa mapping
+          * Fixed ordering of contigs on genomecov that was not sorted in the 
+            same way as samtools in some cases. 
 1.3.0     * Updated version to use latest damona containers and latest 
             sequana version 0.19.1. added plot in HTML report with distribution
             of variants. added tutorial. added bwa_split and freebaye split to 
 
@@ -6,16 +6,19 @@ channels:
   - defaults
 
 dependencies:
-  - freebayes>1,<1.3
+  - freebayes>1.3
   - bwa
+  - bcftools
   - 'snpeff==5.1d'
   - sambamba
+  - fastp
+  - fastqc
   - picard>2.26
   - samtools>=1.15
   - bamtools
   - minimap2
   - pip
   - pip:
-    - sequana
+    - "sequana>=0.19.4"
 
 
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
 
 [project]
 name = "sequana-variant-calling"
-version = "1.3.0"
+version = "1.4.0"
 description = "A multi-sample variant calling pipeline"
 authors = [{name="Sequana Team"}]
 license = "BSD-3"
@@ -31,8 +31,8 @@ classifiers = [
 
 requires-python = ">=3.9,<4.0"
 dependencies = [
-    "sequana >=0.19.0",
-    "sequana_pipetools >=0.16.0",
+    "sequana >=0.19.4",
+    "sequana_pipetools >=1.3.0",
     "click-completion >=0.5.2",
     "pytest (>=8.3.4,<9.0.0)"
 ]
 
@@ -32,15 +32,15 @@ general:
 
 apptainers:
     #bwa: https://zenodo.org/record/7970243/files/bwa_0.7.17.img
-    bwa: "https://zenodo.org/record/14945560/files/sequana_tools_0.19.1.img"
+    bwa: https://zenodo.org/record/17535070/files/sequana_tools_0.19.3.img
     samtools: https://zenodo.org/record/7437898/files/samtools_1.16.1.img
     seqkit: https://zenodo.org/record/7821924/files/seqkit_2.4.0.img
-    sequana_coverage: https://zenodo.org/record/14945560/files/sequana_tools_0.19.1.img
-    sequana_tools: "https://zenodo.org/record/14945560/files/sequana_tools_0.19.1.img"
-    graphviz: "https://zenodo.org/record/7928262/files/graphviz_7.0.5.img"
-    minimap2: "https://zenodo.org/record/5799482/files/minimap2_2.23.0.img"
-    multiqc: "https://zenodo.org/record/10205070/files/multiqc_1.16.0.img"
-    freebayes: "https://zenodo.org/record/14930911/files/freebayes_1.3.9.img"
+    sequana_coverage: https://zenodo.org/record/17535070/files/sequana_tools_0.19.3.img
+    sequana_tools: https://zenodo.org/record/17535070/files/sequana_tools_0.19.3.img
+    graphviz: https://zenodo.org/record/7928262/files/graphviz_7.0.5.img
+    minimap2: https://zenodo.org/record/17535070/files/sequana_tools_0.19.3.img
+    multiqc: https://zenodo.org/record/17100751/files/multiqc_1.27.0-zenodo1.img
+    freebayes: https://zenodo.org/record/14930911/files/freebayes_1.3.9.img
     fastqc: https://zenodo.org/record/7015004/files/fastqc_0.11.9-py3.img
     fastp: https://zenodo.org/record/7319782/files/fastp_0.23.2.img
 
@@ -107,7 +107,7 @@ bwa_index:
 
 
 bwa_split:
-    nreads: 100000
+    nreads: 1000000
     index_algorithm: is
     options: -T 30 -M
     threads: 4
@@ -151,11 +151,13 @@ snpeff:
 # :Parameters:
 #
 # - ploidy: set the ploidy of your samples.
-# - options: any options recognised by freebayes.
-#
+# - options: any options recognised by freebayes. One useful options is
+#  --min-alternate-fraction to decreasy minimal frequency to e.g.  1% 
+#  since default if 5%
+# 
 freebayes:
     ploidy: 1
-    chunksize: 100000
+    chunksize: 1000000
     options: --legacy-gls
     resources:
         mem: 8G
@@ -187,6 +189,8 @@ sambamba_markdup:
     remove_duplicates: false
     tmp_directory: ./tmp/
     options:
+    resources:
+        mem: 8G
 
 ##############################################################################
 # Filter reads with a mapping score lower than an integer 
@@ -200,6 +204,8 @@ sambamba_filter:
     do: true
     threshold: 30
     options:
+    resources:
+        mem: 8G
 
 ##############################################################################
 # Sequana coverage - Analyse the coverage of the mapping 
@@ -287,8 +293,8 @@ joint_freebayes_vcf_filter:
 # or -n 5 (minimum number of Ns required to discard a read)
 fastp:
     do: true
-    options: ' --cut_tail '
-    minimum_length: 20
+    options: '--cut_tail'
+    min_length_required: 20
     adapters: ''
     quality: 15
     threads: 4
@@ -305,6 +311,7 @@ fastp:
 # - options: string with any valid FastQC options
 #
 fastqc:
+    do: true
     options: --nogroup
     threads: 4
     resources:
 
@@ -69,14 +69,6 @@
     default=None,
     help="The annotation for snpeff. This is optional but highly recommended to obtain meaningful HTML report.",
 )
-@click.option(
-    "--do-coverage",
-    "do_coverage",
-    is_flag=True,
-    default=False,
-    show_default=True,
-    help="perform the coverage analysis using sequana_coverage.",
-)
 @click.option(
     "--nanopore",
     is_flag=True,
@@ -152,9 +144,13 @@ def fill_reference_file():
     if options["nanopore"]:
         cfg.general.aligner_choice = "minimap2"
         cfg.minimap2.options = "-x map-ont"
+        cfg.input_readtag = ""
+        cfg.fastqc.do = False
     elif options["pacbio"]:
         cfg.general.aligner_choice = "minimap2"
         cfg.minimap2.options = "-x map-pb"
+        cfg.input_readtag = ""
+        cfg.fastqc.do = False
     else:
         cfg.general.aligner_choice = options.aligner