bigbio · ypriverol · Jan 22, 2026 · Jan 22, 2026 · Jan 22, 2026 · Jan 22, 2026
diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml
@@ -305,4 +305,27 @@ jobs:
         name: Upload results
         with:
           name: results_big_dia
-          path: ./results_big_dia
+          path: ./results_big_dia
+
+  test_single_cell_diann:
+    needs: setup
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - name: Set up Python 3.10
+        uses: actions/setup-python@v4
+        with:
+          python-version: "3.10"
+      - name: Install package
+        run: pip install .
+      - name: Test single cell dataset
+        run: |
+          wget -nv https://ftp.pride.ebi.ac.uk/pride/data/archive/2024/08/PXD053464/03_SingleCell_Searches.zip
+          unzip -d ./PXD053464 03_SingleCell_Searches.zip
+          multiqc --diann-plugin ./PXD053464 -o ./results_single_cell_diann
+      - uses: actions/upload-artifact@v4
+        if: always()
+        name: Upload results
+        with:
+          name: results_single_cell_diann
+          path: ./results_single_cell_diann
diff --git a/docs/README.md b/docs/README.md
@@ -242,6 +242,7 @@ You can find example reports on the [docs page](https://bigbio.github.io/pmultiq
 | TMT | Tandem mass tag | [TMT Example](https://pmultiqc.quantms.org/TMT_PXD007683/multiqc_report.html) ([disable_hoverinfo](https://pmultiqc.quantms.org/TMT_PXD007683_disable_hoverinfo/multiqc_report.html)) | [TMT_PXD007683.zip](https://ftp.pride.ebi.ac.uk/pub/databases/pride/resources/proteomes/pmultiqc/example-projects/TMT_PXD007683.zip) |
 | quantms DIA | Data-independent acquisition | [quantms DIA Example](https://pmultiqc.quantms.org/dia/multiqc_report.html) ([disable_hoverinfo](https://pmultiqc.quantms.org/dia_disable_hoverinfo/multiqc_report.html)) | [dia.zip](https://ftp.pride.ebi.ac.uk/pub/databases/pride/resources/proteomes/pmultiqc/dia/dia.zip) |
 | DIA-NN | Data-independent acquisition | [DIA-NN Example](https://pmultiqc.quantms.org/DIANN/multiqc_report.html) ([disable_hoverinfo](https://pmultiqc.quantms.org/DIANN_disable_hoverinfo/multiqc_report.html)) | [PXD063291.zip](https://ftp.pride.ebi.ac.uk/pub/databases/pride/resources/proteomes/pmultiqc/example-projects/PXD063291.zip) |
+| Single cell (DIA-NN) | Single cell dataset | [Single cell Example](https://pmultiqc.quantms.org/PXD053464/multiqc_report.html) ([disable_hoverinfo](https://pmultiqc.quantms.org/PXD053464_disable_hoverinfo/multiqc_report.html)) | [PXD053464 folder](https://ftp.pride.ebi.ac.uk/pride/data/archive/2024/08/PXD053464/) |
 | MaxQuant | MaxQuant results | [MaxQuant Example](https://pmultiqc.quantms.org/PXD003133/multiqc_report.html) ([disable_hoverinfo](https://pmultiqc.quantms.org/PXD003133_disable_hoverinfo/multiqc_report.html)) | [txt_20min.zip](https://ftp.pride.ebi.ac.uk/pride/data/archive/2015/11/PXD003133/txt_20min.zip) |
 | MaxQuant DIA | MaxQuant DIA results | [MaxQuant DIA Example](https://pmultiqc.quantms.org/MaxDIA/multiqc_report.html) ([disable_hoverinfo](https://pmultiqc.quantms.org/MaxDIA_disable_hoverinfo/multiqc_report.html)) | [MaxDIA_txt.zip](https://ftp.pride.ebi.ac.uk/pub/databases/pride/resources/proteomes/pmultiqc/maxquant/MaxDIA_txt.zip) |
 | ProteoBench | ProteoBench results | [ProteoBench Example](https://pmultiqc.quantms.org/ProteoBench/multiqc_report.html) ([disable_hoverinfo](https://pmultiqc.quantms.org/ProteoBench_disable_hoverinfo/multiqc_report.html)) | [ProteoBench data](https://proteobench.cubimed.rub.de/datasets/d01e87b997b84c985868204b1ed26749902fd7f9/d01e87b997b84c985868204b1ed26749902fd7f9_data.zip) |

diff --git a/docs/config.json b/docs/config.json
@@ -195,6 +195,22 @@
             ],
             "path": "docs/PXD062383_disable_hoverinfo",
             "file_type": ["dia", "disable_hoverinfo"]
+        },
+        {
+            "accession": "PXD053464",
+            "urls": [
+                "https://ftp.pride.ebi.ac.uk/pride/data/archive/2024/08/PXD053464/03_SingleCell_Searches.zip"
+            ],
+            "path": "docs/PXD053464",
+            "file_type": ["diann", ""]
+        },
+        {
+            "accession": "PXD053464_disable_hoverinfo",
+            "urls": [
+                "https://ftp.pride.ebi.ac.uk/pride/data/archive/2024/08/PXD053464/03_SingleCell_Searches.zip"
+            ],
+            "path": "docs/PXD053464_disable_hoverinfo",
+            "file_type": ["diann", "disable_hoverinfo"]
         }
     ]
 }
diff --git a/pmultiqc/modules/common/dia_utils.py b/pmultiqc/modules/common/dia_utils.py
@@ -155,17 +155,13 @@ def _draw_heatmap(sub_section, report_data, heatmap_color_list):
 
 def _process_diann_statistics(report_data):
     """Process DIA-NN statistics and create peptide plot."""
-    # Extract sequence information
-    report_data["sequence"] = report_data[
-        "Modified.Sequence"
-    ].astype("string").str.replace(r"\(.*?\)", "", regex=True)
 
     total_protein_quantified = len(set(report_data["Protein.Group"]))
-    total_peptide_count = len(set(report_data["sequence"]))
+    total_peptide_count = len(set(report_data["Modified.Sequence"]))
 
     # Create peptide plot
     log.info("Processing DIA pep_plot.")
-    protein_pep_map = report_data.groupby("Protein.Group")["sequence"].agg(list).to_dict()
+    protein_pep_map = report_data.groupby("Protein.Group")["Modified.Sequence"].agg(list).to_dict()
     pep_plot = Histogram("number of peptides per proteins", plot_category="frequency")
     for _, peps in protein_pep_map.items():
         number = len(set(peps))
@@ -230,9 +226,13 @@ def _process_run_data(df, ms_with_psm, quantms_modified, sdrf_file_df):
 
     log.info("Processing DIA mod_plot_dict.")
 
-    report_data = df[
-        ["Run", "Modified.Sequence", "Modifications", "Protein.Group", "sequence"]
-    ].copy()
+    required_cols = ["Run", "Modified.Sequence", "Modifications", "Protein.Group"]
+    report_data = df[required_cols].copy()
+    if "Proteotypic" in df.columns:
+        report_data["Proteotypic"] = df["Proteotypic"]
+    else:
+        log.warning("Missing Proteotypic column; treating all peptides as proteotypic.")
+        report_data["Proteotypic"] = 1
 
     mod_plot_by_run = dict()
     modified_cats = list()
@@ -277,25 +277,23 @@ def _calculate_run_statistics(group):
     """Calculate statistics for a specific run."""
 
     peptides = set(group["Modified.Sequence"])
+    unique_peptides = set(
+        group.loc[group["Proteotypic"] == 1, "Modified.Sequence"]
+    )
     modified_pep = list(
         filter(lambda x: re.match(r".*?\(.*?\).*?", x) is not None, peptides)
     )
 
-    group_peptides = group.groupby("sequence")["Protein.Group"].apply(list).to_dict()
-    unique_peptides = [
-        pep for pep, prots in group_peptides.items() if len(set(prots)) == 1
-    ]
-
     stat_run = {
         "protein_num": len(set(group["Protein.Group"])),
-        "peptide_num": len(set(group["sequence"])),
+        "peptide_num": len(peptides),
         "unique_peptide_num": len(unique_peptides),
         "modified_peptide_num": len(modified_pep)
     }
 
     data_per_run = {
         "proteins": set(group["Protein.Group"]),
-        "peptides": set(group["sequence"]),
+        "peptides": peptides,
         "unique_peptides": unique_peptides,
         "modified_peps": modified_pep
     }