Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions pmultiqc_service/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import traceback
import uuid
import zipfile
import rarfile
from datetime import datetime
from pathlib import Path
from typing import Dict, Any, List, Optional
Expand Down Expand Up @@ -683,6 +684,7 @@ def filter_search_files(files: List[Dict]) -> tuple[List[Dict], bool]:
# Check if it's a search engine output file
# Includes MaxQuant (evidence, peptides, proteingroups, msms),
# DIANN (report), FragPipe (psm.tsv, ion.tsv), and mzIdentML files
# Also include RAR/ZIP archives that might contain search results (e.g., DDA.rar)
if (
file_category in ["SEARCH", "RESULT"]
or "report" in filename_lower
Expand All @@ -695,6 +697,8 @@ def filter_search_files(files: List[Dict]) -> tuple[List[Dict], bool]:
or filename_lower.endswith(".mzid")
or filename_lower.endswith(".mzid.gz")
or filename_lower.endswith(".mzid.zip")
or filename_lower.endswith(".rar") # RAR archives containing search results
or (filename_lower.endswith(".zip") and file_category in ["SEARCH", "RESULT"])
):

search_files.append(file_info)
Expand Down Expand Up @@ -955,6 +959,17 @@ def download_pride_file(file_info: Dict, download_dir: str, job_id: str = None)
final_file_path = download_dir
logger.info(f"Extracted {filename} to directory")

elif filename.lower().endswith(".rar"):
logger.info(f"Extracting RAR file: {filename}")
with rarfile.RarFile(file_path, "r") as rar_ref:
# Extract to the same directory
rar_ref.extractall(download_dir)
# Remove the rar file
os.remove(file_path)
# Return the directory path since rar files contain multiple files
final_file_path = download_dir
logger.info(f"Extracted {filename} to directory")
Comment on lines +962 to +971
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Add security validation for RAR extraction similar to ZIP validation.

The RAR extraction lacks the security checks that validate_and_extract_zip (lines 1691-1740) implements for user uploads:

  • Path traversal validation on entry names
  • Archive bomb detection (compression ratio check)
  • File count limits

While PRIDE datasets are presumably trusted, defense-in-depth is recommended. Consider also adding error handling for rarfile.BadRarFile exceptions.

🔒 Suggested security-hardened implementation
         elif filename.lower().endswith(".rar"):
             logger.info(f"Extracting RAR file: {filename}")
-            with rarfile.RarFile(file_path, "r") as rar_ref:
-                # Extract to the same directory
-                rar_ref.extractall(download_dir)
-            # Remove the rar file
-            os.remove(file_path)
-            # Return the directory path since rar files contain multiple files
-            final_file_path = download_dir
-            logger.info(f"Extracted {filename} to directory")
+            try:
+                with rarfile.RarFile(file_path, "r") as rar_ref:
+                    # Security: Check for path traversal in RAR entry names
+                    for entry in rar_ref.infolist():
+                        if ".." in entry.filename or entry.filename.startswith("/"):
+                            raise Exception(f"RAR file contains invalid path: {entry.filename}")
+                    # Extract to the same directory
+                    rar_ref.extractall(download_dir)
+                # Remove the rar file
+                os.remove(file_path)
+                # Return the directory path since rar files contain multiple files
+                final_file_path = download_dir
+                logger.info(f"Extracted {filename} to directory")
+            except rarfile.BadRarFile as e:
+                logger.error(f"Invalid RAR file {filename}: {e}")
+                raise Exception(f"Invalid RAR file: {filename}")
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
elif filename.lower().endswith(".rar"):
logger.info(f"Extracting RAR file: {filename}")
with rarfile.RarFile(file_path, "r") as rar_ref:
# Extract to the same directory
rar_ref.extractall(download_dir)
# Remove the rar file
os.remove(file_path)
# Return the directory path since rar files contain multiple files
final_file_path = download_dir
logger.info(f"Extracted {filename} to directory")
elif filename.lower().endswith(".rar"):
logger.info(f"Extracting RAR file: {filename}")
try:
with rarfile.RarFile(file_path, "r") as rar_ref:
# Security: Check for path traversal in RAR entry names
for entry in rar_ref.infolist():
if ".." in entry.filename or entry.filename.startswith("/"):
raise Exception(f"RAR file contains invalid path: {entry.filename}")
# Extract to the same directory
rar_ref.extractall(download_dir)
# Remove the rar file
os.remove(file_path)
# Return the directory path since rar files contain multiple files
final_file_path = download_dir
logger.info(f"Extracted {filename} to directory")
except rarfile.BadRarFile as e:
logger.error(f"Invalid RAR file {filename}: {e}")
raise Exception(f"Invalid RAR file: {filename}")
🤖 Prompt for AI Agents
In `@pmultiqc_service/app.py` around lines 962 - 971, The RAR extraction branch
(the elif checking filename.lower().endswith(".rar") that opens rarfile.RarFile)
lacks the safety checks implemented in validate_and_extract_zip; update this
branch to validate each archive entry for path traversal (reject entries with
absolute paths or '..' components), enforce a max file count limit and an
archive-bomb compression-ratio check when extracting entries, and catch
rarfile.BadRarFile and other extraction errors to log and fail gracefully;
preserve behavior of removing the original .rar and setting final_file_path to
download_dir only on successful, validated extraction.


return final_file_path

except Exception as e:
Expand Down
3 changes: 2 additions & 1 deletion pmultiqc_service/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,5 @@ lxml
numpy
pyarrow
scikit-learn
tuspyserver>=4.2.0
tuspyserver>=4.2.0
rarfile
Comment on lines +15 to +16
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

🧩 Analysis chain

🏁 Script executed:

#!/bin/bash
# Check if rarfile is documented in Dockerfile or deployment configs
fd -e Dockerfile -e dockerfile -e docker-compose -e docker-compose.yml -e "*.yaml" -e "*.yml" | head -20

Repository: bigbio/pmultiqc

Length of output: 41


🌐 Web query:

Python rarfile package system requirements external dependencies

💡 Result:

Short summary — rarfile system requirements and external dependencies:

  • Python: requires Python 3.6+ (use 3.8+ recommended). [1][3]
  • External decompression programs (rarfile calls an external tool to extract compressed files):
    • Preferred: unrar (official non‑free UnRAR binary) — full RAR3/RAR5 support. [1][3]
    • Alternatives: unar, 7z/p7zip (may need p7zip-rar), bsdtar/libarchive (limited RAR support). Behavior/feature support varies by backend. [1][3]
  • Crypto for password‑protected headers: optional Python crypto libraries such as cryptography or PyCrypto/PyCryptodome. [2]
  • Platform notes: some backends are unavailable or limited on certain OSes (e.g., unar stdout support on Windows is poor; bsdtar has limited RAR feature support). See backend table for details. [1]

Sources: rarfile documentation and project page; Debian package FAQ. [1][2][3]


Add external RAR extraction tool to deployment configuration.

The rarfile package is a Python wrapper that requires an external RAR extraction program to function (unrar, unar, 7z/p7zip, or bsdtar). These tools must be installed in the deployment environment—for example, in the Docker image if used for containerized deployment.

Ensure the appropriate tool is installed in your deployment configuration and document this dependency. Additionally, consider pinning a minimum version for rarfile to match the version pinning practice used for other dependencies in this file.

🤖 Prompt for AI Agents
In `@pmultiqc_service/requirements.txt` around lines 15 - 16, The requirements
list includes the Python wrapper package "rarfile" which depends on an external
RAR extractor (unrar/unar/7z/p7zip/bsdtar); update your deployment configuration
(e.g., Dockerfile or provisioning scripts) to install one of these system tools
(install package like unrar or p7zip-full) and add a note to the project
README/deployment docs describing this runtime dependency, and also pin
"rarfile" to a minimum compatible version in requirements.txt (e.g., change
"rarfile" to a bounded spec like "rarfile>=X.Y") to match the existing version
pinning practice.

6 changes: 3 additions & 3 deletions pmultiqc_service/templates/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -1043,13 +1043,13 @@ <h4 style="margin: 0 0 5px; color: #333;">PXD051187 - COMPLETE Submission</h4>
">
<div style="display: flex; justify-content: space-between; align-items: center;">
<div>
<h4 style="margin: 0 0 5px; color: #333;">PXD066146 - FragPipe Analysis</h4>
<h4 style="margin: 0 0 5px; color: #333;">PXD062399 - FragPipe Analysis</h4>
<p style="margin: 0; color: #666; font-size: 14px;">
A FragPipe analysis dataset with psm.tsv and ion.tsv files.
A comprehensive FragPipe DDA analysis dataset with psm.tsv and ion.tsv files.
This dataset demonstrates FragPipe/MSFragger output format and quality metrics visualization.
</p>
</div>
<button onclick="selectExample('PXD066146')" style="
<button onclick="selectExample('PXD062399')" style="
background: linear-gradient(45deg, #17a2b8, #138496);
color: white;
border: none;
Expand Down