Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 24 additions & 1 deletion .github/workflows/python-app.yml
Original file line number Diff line number Diff line change
Expand Up @@ -305,4 +305,27 @@ jobs:
name: Upload results
with:
name: results_big_dia
path: ./results_big_dia
path: ./results_big_dia

test_single_cell_diann:
needs: setup
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Set up Python 3.10
uses: actions/setup-python@v4
with:
python-version: "3.10"
- name: Install package
run: pip install .
- name: Test single cell dataset
run: |
wget -nv https://ftp.pride.ebi.ac.uk/pride/data/archive/2024/08/PXD053464/03_SingleCell_Searches.zip
unzip -d ./PXD053464 03_SingleCell_Searches.zip
multiqc --diann-plugin ./PXD053464 -o ./results_single_cell_diann
- uses: actions/upload-artifact@v4
if: always()
name: Upload results
with:
name: results_single_cell_diann
path: ./results_single_cell_diann
1 change: 1 addition & 0 deletions docs/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,7 @@ You can find example reports on the [docs page](https://bigbio.github.io/pmultiq
| TMT | Tandem mass tag | [TMT Example](https://pmultiqc.quantms.org/TMT_PXD007683/multiqc_report.html) ([disable_hoverinfo](https://pmultiqc.quantms.org/TMT_PXD007683_disable_hoverinfo/multiqc_report.html)) | [TMT_PXD007683.zip](https://ftp.pride.ebi.ac.uk/pub/databases/pride/resources/proteomes/pmultiqc/example-projects/TMT_PXD007683.zip) |
| quantms DIA | Data-independent acquisition | [quantms DIA Example](https://pmultiqc.quantms.org/dia/multiqc_report.html) ([disable_hoverinfo](https://pmultiqc.quantms.org/dia_disable_hoverinfo/multiqc_report.html)) | [dia.zip](https://ftp.pride.ebi.ac.uk/pub/databases/pride/resources/proteomes/pmultiqc/dia/dia.zip) |
| DIA-NN | Data-independent acquisition | [DIA-NN Example](https://pmultiqc.quantms.org/DIANN/multiqc_report.html) ([disable_hoverinfo](https://pmultiqc.quantms.org/DIANN_disable_hoverinfo/multiqc_report.html)) | [PXD063291.zip](https://ftp.pride.ebi.ac.uk/pub/databases/pride/resources/proteomes/pmultiqc/example-projects/PXD063291.zip) |
| Single cell (DIA-NN) | Single cell dataset | [Single cell Example](https://pmultiqc.quantms.org/PXD053464/multiqc_report.html) ([disable_hoverinfo](https://pmultiqc.quantms.org/PXD053464_disable_hoverinfo/multiqc_report.html)) | [PXD053464 folder](https://ftp.pride.ebi.ac.uk/pride/data/archive/2024/08/PXD053464/) |
| MaxQuant | MaxQuant results | [MaxQuant Example](https://pmultiqc.quantms.org/PXD003133/multiqc_report.html) ([disable_hoverinfo](https://pmultiqc.quantms.org/PXD003133_disable_hoverinfo/multiqc_report.html)) | [txt_20min.zip](https://ftp.pride.ebi.ac.uk/pride/data/archive/2015/11/PXD003133/txt_20min.zip) |
| MaxQuant DIA | MaxQuant DIA results | [MaxQuant DIA Example](https://pmultiqc.quantms.org/MaxDIA/multiqc_report.html) ([disable_hoverinfo](https://pmultiqc.quantms.org/MaxDIA_disable_hoverinfo/multiqc_report.html)) | [MaxDIA_txt.zip](https://ftp.pride.ebi.ac.uk/pub/databases/pride/resources/proteomes/pmultiqc/maxquant/MaxDIA_txt.zip) |
| ProteoBench | ProteoBench results | [ProteoBench Example](https://pmultiqc.quantms.org/ProteoBench/multiqc_report.html) ([disable_hoverinfo](https://pmultiqc.quantms.org/ProteoBench_disable_hoverinfo/multiqc_report.html)) | [ProteoBench data](https://proteobench.cubimed.rub.de/datasets/d01e87b997b84c985868204b1ed26749902fd7f9/d01e87b997b84c985868204b1ed26749902fd7f9_data.zip) |
Expand Down
16 changes: 16 additions & 0 deletions docs/config.json
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,22 @@
],
"path": "docs/PXD062383_disable_hoverinfo",
"file_type": ["dia", "disable_hoverinfo"]
},
{
"accession": "PXD053464",
"urls": [
"https://ftp.pride.ebi.ac.uk/pride/data/archive/2024/08/PXD053464/03_SingleCell_Searches.zip"
],
"path": "docs/PXD053464",
"file_type": ["diann", ""]
},
{
"accession": "PXD053464_disable_hoverinfo",
"urls": [
"https://ftp.pride.ebi.ac.uk/pride/data/archive/2024/08/PXD053464/03_SingleCell_Searches.zip"
],
"path": "docs/PXD053464_disable_hoverinfo",
"file_type": ["diann", "disable_hoverinfo"]
}
]
}
30 changes: 14 additions & 16 deletions pmultiqc/modules/common/dia_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,17 +155,13 @@ def _draw_heatmap(sub_section, report_data, heatmap_color_list):

def _process_diann_statistics(report_data):
"""Process DIA-NN statistics and create peptide plot."""
# Extract sequence information
report_data["sequence"] = report_data[
"Modified.Sequence"
].astype("string").str.replace(r"\(.*?\)", "", regex=True)

total_protein_quantified = len(set(report_data["Protein.Group"]))
total_peptide_count = len(set(report_data["sequence"]))
total_peptide_count = len(set(report_data["Modified.Sequence"]))

# Create peptide plot
log.info("Processing DIA pep_plot.")
protein_pep_map = report_data.groupby("Protein.Group")["sequence"].agg(list).to_dict()
protein_pep_map = report_data.groupby("Protein.Group")["Modified.Sequence"].agg(list).to_dict()
pep_plot = Histogram("number of peptides per proteins", plot_category="frequency")
for _, peps in protein_pep_map.items():
number = len(set(peps))
Expand Down Expand Up @@ -230,9 +226,13 @@ def _process_run_data(df, ms_with_psm, quantms_modified, sdrf_file_df):

log.info("Processing DIA mod_plot_dict.")

report_data = df[
["Run", "Modified.Sequence", "Modifications", "Protein.Group", "sequence"]
].copy()
required_cols = ["Run", "Modified.Sequence", "Modifications", "Protein.Group"]
report_data = df[required_cols].copy()
if "Proteotypic" in df.columns:
report_data["Proteotypic"] = df["Proteotypic"]
else:
log.warning("Missing Proteotypic column; treating all peptides as proteotypic.")
report_data["Proteotypic"] = 1

mod_plot_by_run = dict()
modified_cats = list()
Expand Down Expand Up @@ -277,25 +277,23 @@ def _calculate_run_statistics(group):
"""Calculate statistics for a specific run."""

peptides = set(group["Modified.Sequence"])
unique_peptides = set(
group.loc[group["Proteotypic"] == 1, "Modified.Sequence"]
)
modified_pep = list(
filter(lambda x: re.match(r".*?\(.*?\).*?", x) is not None, peptides)
)

group_peptides = group.groupby("sequence")["Protein.Group"].apply(list).to_dict()
unique_peptides = [
pep for pep, prots in group_peptides.items() if len(set(prots)) == 1
]

stat_run = {
"protein_num": len(set(group["Protein.Group"])),
"peptide_num": len(set(group["sequence"])),
"peptide_num": len(peptides),
"unique_peptide_num": len(unique_peptides),
"modified_peptide_num": len(modified_pep)
}

data_per_run = {
"proteins": set(group["Protein.Group"]),
"peptides": set(group["sequence"]),
"peptides": peptides,
"unique_peptides": unique_peptides,
"modified_peps": modified_pep
}
Expand Down