From 5598668a94cc3ae418e2d3afabc144b3446adb44 Mon Sep 17 00:00:00 2001 From: selvaebi Date: Wed, 14 Jan 2026 22:32:48 +0000 Subject: [PATCH 01/24] fix: tuspyserver clean up --- pmultiqc_service/app.py | 113 ++++++++++++- pmultiqc_service/requirements.txt | 5 +- pmultiqc_service/templates/index.html | 222 ++++++++++++++------------ 3 files changed, 236 insertions(+), 104 deletions(-) diff --git a/pmultiqc_service/app.py b/pmultiqc_service/app.py index 1b538db4..0a863b24 100644 --- a/pmultiqc_service/app.py +++ b/pmultiqc_service/app.py @@ -17,6 +17,7 @@ import uuid import zipfile from datetime import datetime +from pathlib import Path from typing import Dict, Any, List, Optional import redis @@ -29,6 +30,7 @@ from fastapi.responses import JSONResponse, FileResponse, HTMLResponse from fastapi.staticfiles import StaticFiles from fastapi.templating import Jinja2Templates +from tuspyserver import create_tus_router # Configuration # Use environment variables with fallback to current working directory subdirectories @@ -64,7 +66,7 @@ def get_pride_button_visible(): os.makedirs(HTML_REPORTS_FOLDER, exist_ok=True) # Initialize Jinja2 templates -templates = Jinja2Templates(directory="templates") +templates = Jinja2Templates(directory=str(Path(__file__).parent / "templates")) # Allowed file extensions ALLOWED_EXTENSIONS = {"zip"} @@ -373,12 +375,115 @@ def cleanup_old_jobs(days_to_keep: int = 30): CORSMiddleware, allow_origins=ALLOWED_ORIGINS, allow_credentials=True, - allow_methods=["GET", "POST", "OPTIONS"], # Restrict to only needed methods - allow_headers=["Content-Type", "Authorization"], # Restrict to only needed headers + allow_methods=["GET", "POST", "PUT", "PATCH", "DELETE", "OPTIONS", "HEAD"], # TUS needs PATCH and HEAD + allow_headers=["*"], # TUS needs custom headers (Upload-*, Tus-*) + expose_headers=["*"], # TUS needs to expose custom headers ) + +# TUS Upload Protocol Implementation (using tuspyserver) +# Callback function for when TUS upload completes +def handle_upload_complete(file_path: str, metadata: dict): + """ + Called by tuspyserver when upload completes. + + Args: + file_path: Path to the uploaded file + metadata: Upload metadata (filename, filetype, etc.) + """ + try: + # Extract metadata + filename = metadata.get("filename", "upload.zip") + filetype = metadata.get("filetype", "application/zip") + + logger.info(f"TUS upload complete: file={file_path}, filename={filename}") + + # Validate filename + if not filename.lower().endswith(".zip"): + logger.error(f"Invalid file type: {filename}") + # Clean up uploaded file + if os.path.exists(file_path): + os.remove(file_path) + raise ValueError(f"Only ZIP files are allowed. Received: {filename}") + + # Get file size + file_size = os.path.getsize(file_path) + + # Generate job ID + job_id = str(uuid.uuid4()) + + # Create job directories + job_upload_dir = os.path.join(UPLOAD_FOLDER, job_id) + output_dir = os.path.join(OUTPUT_FOLDER, job_id) + os.makedirs(job_upload_dir, exist_ok=True) + os.makedirs(output_dir, exist_ok=True) + + # Move uploaded file to job directory + final_zip_path = os.path.join(job_upload_dir, filename) + shutil.move(file_path, final_zip_path) + + logger.info(f"Moved upload to {final_zip_path}, job_id={job_id}") + + # Initialize job in database + initial_job_data = { + "job_id": job_id, + "status": "extracting", + "progress": 25, + "started_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), + "filename": filename, + "file_size": file_size, + } + save_job_to_db(job_id, initial_job_data) + + # Extract and process + extract_path = os.path.join(job_upload_dir, "extracted") + os.makedirs(extract_path, exist_ok=True) + + validate_and_extract_zip(final_zip_path, extract_path, file_size) + + # Detect input type + input_type, quantms_config = detect_input_type(extract_path) + logger.info(f"Detected input type: {input_type}") + + if input_type == "unknown": + update_job_progress( + job_id, + "failed", + error="Could not detect input type", + finished_at=datetime.now().strftime("%Y-%m-%d %H:%M:%S"), + ) + else: + # Start async processing + update_job_progress(job_id, "queued", 50, input_type=input_type) + thread = threading.Thread( + target=process_job_async, + args=(job_id, extract_path, output_dir, input_type, quantms_config), + ) + thread.daemon = True + thread.start() + + logger.info(f"Job {job_id} processing started") + + except Exception as e: + logger.error(f"Error handling upload completion: {e}") + logger.error(traceback.format_exc()) + raise + + +# Mount TUS upload router +# This handles resumable file uploads via TUS protocol +tus_router = create_tus_router( + prefix="/files", # Endpoint: /files + files_dir=UPLOAD_FOLDER, # Where to store uploads + max_size=MAX_FILE_SIZE, # 10GB default + on_upload_complete=handle_upload_complete, # Callback function + days_to_keep=7, # Auto-cleanup after 7 days +) +app.include_router(tus_router) +logger.info(f"TUS upload router mounted at /files with max size {MAX_FILE_SIZE / (1024**3):.1f} GB") + # Mount static files -app.mount("/static", StaticFiles(directory="templates"), name="static") +app.mount("/static", StaticFiles(directory=str(Path(__file__).parent / "templates")), name="static") # Configure OpenAPI for subpath deployment diff --git a/pmultiqc_service/requirements.txt b/pmultiqc_service/requirements.txt index 4dd82c8d..202cf86e 100644 --- a/pmultiqc_service/requirements.txt +++ b/pmultiqc_service/requirements.txt @@ -1,4 +1,4 @@ -fastapi>=0.104.0,<0.105.0 +fastapi>=0.110.0,<0.111.0 uvicorn[standard]>=0.24.0 python-multipart>=0.0.6 jinja2>=3.0.0 @@ -11,4 +11,5 @@ sdrf-pipelines lxml numpy pyarrow -scikit-learn \ No newline at end of file +scikit-learn +tuspyserver>=4.2.0 \ No newline at end of file diff --git a/pmultiqc_service/templates/index.html b/pmultiqc_service/templates/index.html index f1ba4f0b..dbfb348a 100644 --- a/pmultiqc_service/templates/index.html +++ b/pmultiqc_service/templates/index.html @@ -6,6 +6,8 @@ pmultiqc - Proteomics MultiQC Analysis + + + + +