bigbio · ypriverol · Jan 18, 2026 · Jan 18, 2026 · Jan 18, 2026 · coderabbitai
diff --git a/pmultiqc/modules/common/dia_utils.py b/pmultiqc/modules/common/dia_utils.py
@@ -69,6 +69,9 @@ def parse_diann_report(
     # Handle files without PSM
     ms_without_psm = _handle_files_without_psm(ms_paths, ms_with_psm, cal_num_table_data)
 
+    # Peptide Length Distribution
+    peptide_length = _get_peptide_length(report_data)
+
     return (
         total_protein_quantified,
         total_peptide_count,
@@ -77,7 +80,8 @@ def parse_diann_report(
         ms_with_psm,
         cal_num_table_data,
         quantms_modified,
-        ms_without_psm
+        ms_without_psm,
+        peptide_length
     )
 
 
@@ -316,6 +320,22 @@ def _handle_files_without_psm(ms_paths, ms_with_psm, cal_num_table_data):
     return ms_without_psm
 
 
+def _get_peptide_length(df):
+
+    if not "Stripped.Sequence" in df.columns:
+        return None
+
+    df_sub = df[["Run", "Stripped.Sequence"]].copy()
+    df_sub["length"] = df_sub["Stripped.Sequence"].apply(lambda x: len(x))
+
+    plot_data = {}
+    for run, group in df_sub.groupby("Run"):
+        stats_dict = group["length"].value_counts().sort_index().to_dict()
+        plot_data[run] = stats_dict
+
+    return plot_data
+
+
 ## Removed draw_dia_heatmap wrapper; call cal_dia_heatmap and dia_plots.draw_heatmap directly.
 
 

diff --git a/pmultiqc/modules/common/ms/mztab.py b/pmultiqc/modules/common/ms/mztab.py
@@ -135,6 +135,8 @@ def parse(self, **_kwargs) -> None:
 
         self.total_protein_quantified = len(prot.index)
 
+        psm["pep_length"] = psm["sequence"].apply(lambda x: len(x))
+
         self.mztab_data = mztab_data
         self.pep_table = pep_table
         self.psm = psm

diff --git a/pmultiqc/modules/common/plots/id.py b/pmultiqc/modules/common/plots/id.py
@@ -1133,6 +1133,39 @@ def draw_peptide_intensity(sub_section, plot_data):
             """,
     )
 
+
+# Peptide Length Distribution
+def draw_peptide_length_distribution(sub_section, plot_data):
+
+    draw_config = {
+        "id": "peptide_length_distribution",
+        "cpswitch": False,
+        "cpswitch_c_active": False,
+        "title": "Peptide Length Distribution",
+        "tt_decimals": 2,
+        "xlab": "Peptide Length",
+        "save_data_file": False,
+        "showlegend": True,
+    }
+    box_html = linegraph.plot(plot_data, pconfig=draw_config)
+
+    box_html = plot_html_check(box_html)
+
+    add_sub_section(
+        sub_section=sub_section,
+        plot=box_html,
+        order=8,
+        description="Peptide length distribution per Run.",
+        helptext="""
+            Peptide length distribution.<br>
+            FragPipe: psm.tsv ('Peptide Length': number of residues in the peptide sequence).<br>
+            MaxQuant: evidence.txt ('Length': the length of the sequence stored in the column 'Sequence').<br>
+            DIA-NN: report.tsv (the length of the 'Stripped.Sequence').<br>
+            quantms: *.mzTab (the length of sequence).
+            """,
+    )
+
+
 def draw_long_trends(sub_sections, long_trends_data):
 
     plot_ac_datetime = long_trends_data["time"]

diff --git a/pmultiqc/modules/diann/diann.py b/pmultiqc/modules/diann/diann.py
@@ -20,7 +20,8 @@
     draw_identi_num,
     draw_num_pep_per_protein,
     draw_identification,
-    draw_long_trends
+    draw_long_trends,
+    draw_peptide_length_distribution
 )
 from pmultiqc.modules.common.plots.ms import (
     draw_peak_intensity_distribution,
@@ -154,7 +155,8 @@ def draw_plots(self):
             self.ms_with_psm,
             self.cal_num_table_data,
             self.quantms_modified,
-            self.ms_without_psm
+            self.ms_without_psm,
+            self.peptide_length
         ) = parse_diann_report(
             sub_sections=self.sub_sections,
             diann_report_path=self.diann_report_path,
@@ -213,6 +215,12 @@ def draw_plots(self):
                 long_trends_data=self.long_trends
             )
 
+        if self.peptide_length:
+            draw_peptide_length_distribution(
+                sub_section=self.sub_sections["identification"],
+                plot_data=self.peptide_length
+            )
+
         if self.enable_sdrf:
             ms_io.del_openms_convert_tsv()
 

diff --git a/pmultiqc/modules/fragpipe/fragpipe.py b/pmultiqc/modules/fragpipe/fragpipe.py
@@ -16,7 +16,6 @@
     combined_protein_reader,
     get_protein_intensity_distribution,
     combined_peptide_reader,
-    get_mbr_stats,
     combined_ion_reader,
     get_msms_counts_per_peak,
     cal_peptide_id_gain
@@ -40,7 +39,8 @@
     draw_top_n_contaminants,
     draw_potential_contaminants,
     draw_modifications,
-    draw_oversampling
+    draw_oversampling,
+    draw_peptide_length_distribution
 )
 from pmultiqc.modules.core.section_groups import (
     add_group_modules,
@@ -95,6 +95,7 @@ def __init__(self, find_log_files_func, sub_sections, heatmap_colors):
         self.contam_df = []
         self.mods = []
         self.hm_data = []
+        self.peptide_length = []
 
         # Ion-level intensity data from ion.tsv
         self.ion_intensity_data = None
@@ -144,7 +145,8 @@ def get_data(self):
                 self.hyperscores,
                 self.contam_df,
                 self.mods,
-                self.hm_data
+                self.hm_data,
+                self.peptide_length
             ) = self.parse_psm(
                 fragpipe_files=self.fragpipe_files
             )
@@ -365,6 +367,13 @@ def draw_plots(self):
                 missed_cleavages=mc_plot_data
             )
 
+        # Peptide Length Distribution
+        if self.peptide_length:
+            self.draw_peptide_length(
+                sub_section=self.sub_sections["identification"],
+                peptide_length=self.peptide_length
+            )
+
         # IDs over RT
         if self.retentions:
             self.draw_ids_over_rt(
@@ -435,6 +444,7 @@ def parse_psm(fragpipe_files):
         contam_df = []
         mods = []
         hm_data = []
+        peptide_length = []
 
         for psm in fragpipe_files.get("psm", []):
 
@@ -504,6 +514,10 @@ def parse_psm(fragpipe_files):
             ):
                 hm_data.append(psm_cont_df[hm_requires].copy())
 
+            # Peptide Length
+            if "Peptide Length" in psm_df.columns:
+                peptide_length.append(psm_df[["Run", "Peptide Length"]].copy())
+
             # Contaminants
             if _has_valid_contaminant(psm_cont_df):
                 log.info(f"{psm} contains contaminants.")
@@ -531,7 +545,8 @@ def parse_psm(fragpipe_files):
             hyperscores,
             contam_df,
             mods,
-            hm_data
+            hm_data,
+            peptide_length
         )
 
     # Delta Mass
@@ -875,6 +890,33 @@ def draw_fragpipe_heatmap(sub_section, hm: list, hm_color: list, missed_cleavage
         )
 
 
+    # Peptide Length Distribution
+    @staticmethod
+    def draw_peptide_length(sub_section, peptide_length: list):
+
+        if not peptide_length:
+            log.warning("No peptide_length data; skipping peptide_length.")
+            return
+
+        df = pd.concat(peptide_length, ignore_index=True)
+
+        if df.empty:
+            log.warning("Peptide Length DataFrame is empty; skipping Peptide Length.")
+            return
+
+        log.info(f"Number of Peptide Length rows in DataFrame: {len(df)}")
+
+        plot_data = {}
+        for sample, group in df.groupby("Run"):
+            stats_dict = group["Peptide Length"].value_counts().sort_index().to_dict()
+            plot_data[sample] = stats_dict
+
+        draw_peptide_length_distribution(
+            sub_section=sub_section,
+            plot_data=plot_data
+        )
+
+
     # IDs over RT
     @staticmethod
     def draw_ids_over_rt(sub_section, retentions: list):

diff --git a/pmultiqc/modules/fragpipe/fragpipe_io.py b/pmultiqc/modules/fragpipe/fragpipe_io.py
@@ -14,7 +14,7 @@
     "psm": [
         "Spectrum", "Peptide", "Modified Peptide", "Charge", "Retention", "Intensity",
         "Delta Mass", "Number of Missed Cleavages", "Is Unique", "Protein", "Hyperscore",
-        "Assigned Modifications"
+        "Assigned Modifications", "Peptide Length"
     ],
     "ion": [
         "Peptide Sequence", "Modified Sequence", "Charge", "Protein", "Intensity"

diff --git a/pmultiqc/modules/maxquant/maxquant.py b/pmultiqc/modules/maxquant/maxquant.py
@@ -194,6 +194,7 @@ def _process_evidence_file(self):
             "maxquant_delta_mass_da": None,
             "peptides_quant_table": None,
             "protein_quant_table": None,
+            "peptide_length": None,
         }
 
         if "evidence" not in self.maxquant_paths.keys():
@@ -371,6 +372,14 @@ def _draw_quantification_plots(self):
             error_name="draw_peptide_table"
         )
 
+        # Peptide Length Distribution
+        self._safe_draw_if_exists(
+            id_plots.draw_peptide_length_distribution,
+            self.sub_sections["identification"],
+            self.mq_results["get_evidence_dicts"].get("peptide_length"),
+            error_name="draw_peptide_length_distribution"
+        )
+
         self._safe_draw_if_exists(
             maxquant_plots.draw_protein_table,
             self.sub_sections["quantification"],

diff --git a/pmultiqc/modules/maxquant/maxquant_utils.py b/pmultiqc/modules/maxquant/maxquant_utils.py
@@ -550,6 +550,9 @@ def get_evidence(file_path):
     # Peptides Quantification Table / Protein Quantification Table
     peptides_quant_table, protein_quant_table = evidence_peptides_table(evidence_df)
 
+    # Peptide Length Distribution
+    peptide_length = evidence_peptide_length(evidence_df)
+
     result = {
         "top_contaminants": top_cont_dict,
         "peptide_intensity": peptide_intensity_dict,
@@ -569,6 +572,7 @@ def get_evidence(file_path):
         "maxquant_delta_mass_da": maxquant_delta_mass_da,
         "peptides_quant_table": peptides_quant_table,
         "protein_quant_table": protein_quant_table,
+        "peptide_length": peptide_length,
     }
 
     logger.info("Completed processing evidence data")
@@ -1228,6 +1232,22 @@ def evidence_peptides_table(evidence_data):
     return peptides_result_dict, protein_result_dict
 
 
+# evidence_peptide_length
+def evidence_peptide_length(df):
+    if any(
+            column not in df.columns
+            for column in ["length", "sequence"]
+    ):
+        return None
+
+    plot_data = {}
+    for run, group in df.groupby("raw file"):
+        stats_dict = group["length"].value_counts().sort_index().to_dict()
+        plot_data[run] = stats_dict
+
+    return plot_data
+
+
 # 4.msms.txt
 def get_msms(file_path: Union[Path, str], evidence_df: pd.DataFrame = None):
     """

diff --git a/pmultiqc/modules/quantms/quantms.py b/pmultiqc/modules/quantms/quantms.py
@@ -65,7 +65,8 @@
     draw_summary_protein_ident_table,
     draw_identi_num,
     draw_peptide_intensity,
-    draw_long_trends
+    draw_long_trends,
+    draw_peptide_length_distribution
 )
 from pmultiqc.modules.common.plots.general import (
     draw_heatmap,
@@ -176,6 +177,7 @@ def __init__(self, find_log_files_func, sub_sections, heatmap_colors):
         self.sample_df = pd.DataFrame()
         self.file_df = pd.DataFrame()
         self.long_trends = {}
+        self.peptide_length = {}
 
     def get_data(self):
 
@@ -322,7 +324,8 @@ def draw_plots(self):
                 self.ms_with_psm,
                 self.cal_num_table_data,
                 self.quantms_modified,
-                self.ms_without_psm
+                self.ms_without_psm,
+                self.peptide_length
             ) = parse_diann_report(
                 sub_sections=self.sub_sections,
                 diann_report_path=self.diann_report_path,
@@ -476,6 +479,13 @@ def draw_plots(self):
                 long_trends_data=self.long_trends
             )
 
+        # Peptide Length Distribution
+        if self.peptide_length:
+            draw_peptide_length_distribution(
+                sub_section=self.sub_sections["identification"],
+                plot_data=self.peptide_length
+            )
+
         if self.quantms_pep_intensity:
             draw_peptide_intensity(
                 sub_section=self.sub_sections["quantification"],
@@ -1384,8 +1394,9 @@ def get_unimod_modification(modifis):
         mod_plot_by_run = dict()
         modified_cats = list()
 
-        data_per_run = dict()
-        num_table_at_run = dict()
+        data_per_run = {}
+        num_table_at_run = {}
+        peptide_length = {}
 
         if config.kwargs["remove_decoy"]:
             psm = psm[psm["opt_global_cv_MS:1002217_decoy_peptide"] == 0].copy()
@@ -1455,8 +1466,13 @@ def get_unimod_modification(modifis):
 
             ml_spec_ident_final[m] = len(set(self.identified_spectrum[m]))
 
+            stats_dict = group["pep_length"].value_counts().sort_index().to_dict()
+            peptide_length[m] = stats_dict
+
         num_table_at_sample = cal_num_table_at_sample(self.file_df, data_per_run)
 
+        self.peptide_length = peptide_length
+
         self.cal_num_table_data = {
             "sdrf_samples": num_table_at_sample,
             "ms_runs": num_table_at_run