KatherLab
diff --git a/‎README.md‎
Lines changed: 2 additions & 1 deletion b/‎README.md‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎getting-started.md‎
Lines changed: 42 additions & 0 deletions b/‎getting-started.md‎
Lines changed: 42 additions & 0 deletions
diff --git a/‎mcp/server.py‎
Lines changed: 1 addition & 1 deletion b/‎mcp/server.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎pyproject.toml‎
Lines changed: 5 additions & 4 deletions b/‎pyproject.toml‎
Lines changed: 5 additions & 4 deletions
diff --git a/‎src/stamp/__main__.py‎
Lines changed: 22 additions & 10 deletions b/‎src/stamp/__main__.py‎
Lines changed: 22 additions & 10 deletions
diff --git a/‎src/stamp/config.yaml‎
Lines changed: 34 additions & 7 deletions b/‎src/stamp/config.yaml‎
Lines changed: 34 additions & 7 deletions
diff --git a/‎src/stamp/encoding/__init__.py‎
Lines changed: 2 additions & 2 deletions b/‎src/stamp/encoding/__init__.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/stamp/encoding/config.py‎
Lines changed: 1 addition & 1 deletion b/‎src/stamp/encoding/config.py‎
Lines changed: 1 addition & 1 deletion
@@ -19,7 +19,8 @@ STAMP is an **end‑to‑end, weakly‑supervised deep‑learning pipeline** tha
 * 🎓 **Beginner‑friendly & expert‑ready**: Zero‑code CLI and YAML config for routine use; optional code‑level customization for advanced research.  
 * 🧩 **Model‑rich**: Out‑of‑the‑box support for **+20 foundation models** at [tile level](getting-started.md#feature-extraction) (e.g., *Virchow‑v2*, *UNI‑v2*) and [slide level](getting-started.md#slide-level-encoding) (e.g., *TITAN*, *COBRA*).  
 * 🔬 **Weakly‑supervised**: End‑to‑end MIL with Transformer aggregation for training, cross‑validation and external deployment; no pixel‑level labels required.  
-* 📊 **Stats & results**: Built‑in metrics (AUROC/AUPRC \+ 95% CI) and patient‑level predictions, ready for analysis and reporting.  
+* 🧮 **Multi-task learning**: Unified framework for **classification**, **regression**, and **cox-based survival analysis**.
+* 📊 **Stats & results**: Built‑in metrics and patient‑level predictions, ready for analysis and reporting.  
 * 🖼️ **Explainable**: Generates heatmaps and top‑tile exports out‑of‑the‑box for transparent model auditing and publication‑ready figures.  
 * 🤝 **Collaborative by design**: Clinicians drive hypothesis & interpretation while engineers handle compute; STAMP’s modular CLI mirrors real‑world workflows and tracks every step for full reproducibility.  
 * 📑 **Peer‑reviewed**: Protocol published in [*Nature Protocols*](https://www.nature.com/articles/s41596-024-01047-2) and validated across multiple tumor types and centers.  
 
@@ -471,3 +471,45 @@ heatmaps:
   ```
 
 
+## Advanced configuration
+
+Advanced experiment settings can be specified under the `advanced_config` section in your configuration file.
+This section lets you control global training parameters, model type, and the target task (classification, regression, or survival).
+
+```yaml
+# stamp-test-experiment/config.yaml
+
+advanced_config:
+  seed: 42
+  task: "classification" # or regression/survial
+  max_epochs: 32
+  patience: 16
+  batch_size: 64
+  # Only for tile-level training. Reducing its amount could affect
+  # model performance. Reduces memory consumption. Default value works
+  # fine for most cases.
+  bag_size: 512
+  #num_workers: 16 # Default chosen by cpu cores
+  # One Cycle Learning Rate Scheduler parameters. Check docs for more info.
+  # Determines the initial learning rate via initial_lr = max_lr/div_factor
+  max_lr: 1e-4
+  div_factor: 25. 
+  # Select a model regardless of task
+  # Available models are: vit, trans_mil, mlp
+  model_name: "vit"
+
+  model_params:
+    vit: # Vision Transformer
+      dim_model: 512
+      dim_feedforward: 512
+      n_heads: 8
+      n_layers: 2
+      dropout: 0.25
+      use_alibi: false
+```
+
+STAMP automatically adapts its **model architecture**, **loss function**, and **evaluation metrics** based on the task specified in the configuration file.
+ 
+**Regression** tasks only require `ground_truth_label`.  
+**Survival analysis** tasks require `time_label` (follow-up time) and `status_label` (event indicator).  
+These requirements apply consistently across cross-validation, training, deployment, and statistics.
@@ -1,10 +1,10 @@
 import asyncio
 import logging
 import os
-from pathlib import Path
 import platform
 import subprocess
 import tempfile
+from pathlib import Path
 from typing import Annotated
 
 import torch
 
@@ -1,6 +1,6 @@
 [project]
 name = "stamp"
-version = "2.3.0"
+version = "2.4.0"
 authors = [
     { name = "Omar El Nahhas", email = "omar.el_nahhas@tu-dresden.de" },
     { name = "Marko van Treeck", email = "markovantreeck@gmail.com" },
@@ -9,7 +9,8 @@ authors = [
     { name = "Laura Žigutytė", email = "laura.zigutyte@tu-dresden.de" },
     { name = "Cornelius Kummer", email = "cornelius.kummer@tu-dresden.de" },
     { name = "Juan Pablo Ricapito", email = "juan_pablo.ricapito@tu-dresden.de" },
-    { name = "Fabian Wolf", email = "fabian.wolf2@tu-dresden.de" }
+    { name = "Fabian Wolf", email = "fabian.wolf2@tu-dresden.de" },
+    { name = "Minh Duc Nguyen", email = "minh_duc.nguyen1@tu-dresden.de" }
 ]
 description = "A protocol for Solid Tumor Associative Modeling in Pathology"
 readme = "README.md"
@@ -45,7 +46,8 @@ dependencies = [
     "torchvision>=0.22.1",
     "tqdm>=4.67.1",
     "timm>=1.0.19",
-    "transformers>=4.55.0"
+    "transformers>=4.55.0",
+    "lifelines>=0.28.0",
 ]
 
 [project.optional-dependencies]
@@ -84,7 +86,6 @@ gigapath = [
     "monai",
     "scikit-image",
     "webdataset",
-    "lifelines",
     "scikit-survival>=0.24.1",
     "fairscale",
     "wandb",
 
@@ -53,7 +53,7 @@ def _run_cli(args: argparse.Namespace) -> None:
     # use default advanced config in case none is provided
     if config.advanced_config is None:
         config.advanced_config = AdvancedConfig(
-            model_params=ModelParams(vit=VitModelParams(), mlp=MlpModelParams())
+            model_params=ModelParams(vit=VitModelParams(), mlp=MlpModelParams()),
         )
 
     # Set global random seed
@@ -65,7 +65,7 @@ def _run_cli(args: argparse.Namespace) -> None:
             raise RuntimeError("this case should be handled above")
 
         case "config":
-            print(yaml.dump(config.model_dump(mode="json")))
+            print(yaml.dump(config.model_dump(mode="json", exclude_none=True)))
 
         case "preprocess":
             from stamp.preprocessing import extract_
@@ -76,7 +76,7 @@ def _run_cli(args: argparse.Namespace) -> None:
             _add_file_handle_(_logger, output_dir=config.preprocessing.output_dir)
             _logger.info(
                 "using the following configuration:\n"
-                f"{yaml.dump(config.preprocessing.model_dump(mode='json'))}"
+                f"{yaml.dump(config.preprocessing.model_dump(mode='json', exclude_none=True))}"
             )
             extract_(
                 output_dir=config.preprocessing.output_dir,
@@ -104,7 +104,7 @@ def _run_cli(args: argparse.Namespace) -> None:
             _add_file_handle_(_logger, output_dir=config.slide_encoding.output_dir)
             _logger.info(
                 "using the following configuration:\n"
-                f"{yaml.dump(config.slide_encoding.model_dump(mode='json'))}"
+                f"{yaml.dump(config.slide_encoding.model_dump(mode='json', exclude_none=True))}"
             )
             init_slide_encoder_(
                 encoder=config.slide_encoding.encoder,
@@ -124,7 +124,7 @@ def _run_cli(args: argparse.Namespace) -> None:
             _add_file_handle_(_logger, output_dir=config.patient_encoding.output_dir)
             _logger.info(
                 "using the following configuration:\n"
-                f"{yaml.dump(config.patient_encoding.model_dump(mode='json'))}"
+                f"{yaml.dump(config.patient_encoding.model_dump(mode='json', exclude_none=True))}"
             )
             init_patient_encoder_(
                 encoder=config.patient_encoding.encoder,
@@ -147,9 +147,12 @@ def _run_cli(args: argparse.Namespace) -> None:
             _add_file_handle_(_logger, output_dir=config.training.output_dir)
             _logger.info(
                 "using the following configuration:\n"
-                f"{yaml.dump(config.training.model_dump(mode='json'))}"
+                f"{yaml.dump(config.training.model_dump(mode='json', exclude_none=True))}"
             )
 
+            if config.training.task is None:
+                raise ValueError("task must be set in training configuration")
+
             train_categorical_model_(
                 config=config.training, advanced=config.advanced_config
             )
@@ -163,19 +166,21 @@ def _run_cli(args: argparse.Namespace) -> None:
             _add_file_handle_(_logger, output_dir=config.deployment.output_dir)
             _logger.info(
                 "using the following configuration:\n"
-                f"{yaml.dump(config.deployment.model_dump(mode='json'))}"
+                f"{yaml.dump(config.deployment.model_dump(mode='json', exclude_none=True))}"
             )
             deploy_categorical_model_(
                 output_dir=config.deployment.output_dir,
                 checkpoint_paths=config.deployment.checkpoint_paths,
                 clini_table=config.deployment.clini_table,
                 slide_table=config.deployment.slide_table,
                 feature_dir=config.deployment.feature_dir,
-                ground_truth_label=config.deployment.ground_truth_label,
                 patient_label=config.deployment.patient_label,
                 filename_label=config.deployment.filename_label,
                 num_workers=config.deployment.num_workers,
                 accelerator=config.deployment.accelerator,
+                ground_truth_label=config.deployment.ground_truth_label,
+                time_label=config.deployment.time_label,
+                status_label=config.deployment.status_label,
             )
 
         case "crossval":
@@ -184,10 +189,13 @@ def _run_cli(args: argparse.Namespace) -> None:
             if config.crossval is None:
                 raise ValueError("no crossval configuration supplied")
 
+            if config.crossval.task is None:
+                raise ValueError("task must be set in crossval configuration")
+
             _add_file_handle_(_logger, output_dir=config.crossval.output_dir)
             _logger.info(
                 "using the following configuration:\n"
-                f"{yaml.dump(config.crossval.model_dump(mode='json'))}"
+                f"{yaml.dump(config.crossval.model_dump(mode='json', exclude_none=True))}"
             )
 
             categorical_crossval_(
@@ -204,13 +212,17 @@ def _run_cli(args: argparse.Namespace) -> None:
             _add_file_handle_(_logger, output_dir=config.statistics.output_dir)
             _logger.info(
                 "using the following configuration:\n"
-                f"{yaml.dump(config.statistics.model_dump(mode='json'))}"
+                f"{yaml.dump(config.statistics.model_dump(mode='json', exclude_none=True))}"
             )
+
             compute_stats_(
+                task=config.statistics.task,
                 output_dir=config.statistics.output_dir,
                 pred_csvs=config.statistics.pred_csvs,
                 ground_truth_label=config.statistics.ground_truth_label,
                 true_class=config.statistics.true_class,
+                time_label=config.statistics.time_label,
+                status_label=config.statistics.status_label,
             )
 
         case "heatmaps":
 
@@ -68,9 +68,16 @@ crossval:
   # are ignored. NOTE: Don't forget to add the .h5 file extension.
   slide_table: "/path/of/slide.csv"
 
+  # Task to infer (classification, regression, survival)
+  task: "classification"
+
   # Name of the column from the clini table to train on.
   ground_truth_label: "KRAS"
 
+  # For survival (should be status and follow-up days columns in clini table)
+  # status_label: "event"
+  # time_label: "time"
+
   # Optional settings:
   patient_label: "PATIENT"
   filename_label: "FILENAME"
@@ -118,9 +125,16 @@ training:
   # are ignored. NOTE: Don't forget to add the .h5 file extension.
   slide_table: "/path/of/slide.csv"
 
+  # Task to infer (classification, regression, survival)
+  task: "classification"
+
   # Name of the column from the clini table to train on.
   ground_truth_label: "KRAS"
 
+  # For survival (should be status and follow-up days columns in clini table)
+  # status_label: "event"
+  # time_label: "time"
+
   # Optional settings:
 
   # The categories occurring in the target label column of the clini table.
@@ -156,9 +170,16 @@ deployment:
   # paths are ignored. NOTE: Don't forget to add the .h5 file extension.
   slide_table: "/path/of/slide.csv"
 
+  # Task to infer (classification, regression, survival)
+  task: "classification"
+
   # Name of the column from the clini to compare predictions to.
   ground_truth_label: "KRAS"
 
+  # For survival (should be status and follow-up days columns in clini table)
+  # status_label: "event"
+  # time_label: "time"
+
   patient_label: "PATIENT"
   filename_label: "FILENAME"
 
@@ -174,13 +195,20 @@ deployment:
 statistics:
   output_dir: "/path/to/save/files/to"
 
+  # Task to infer (classification, regression, survival)
+  task: "classification"
+
   # Name of the target label.
   ground_truth_label: "KRAS"
 
   # A lot of the statistics are computed "one-vs-all", i.e. there needs to be
   # a positive class to calculate the statistics for.
   true_class: "mutated"
 
+  # For survival (should be status and follow-up days columns in clini table)
+  # status_label: "event"
+  # time_label: "time"
+
   # The patient predictions to generate the statistics from.
   # For a single deployment, it could look like this:
   pred_csvs:
@@ -277,8 +305,7 @@ patient_encoding:
 
 
 advanced_config:
-  # Optional random seed
-  # seed: 42
+  seed: 42
   max_epochs: 32
   patience: 16
   batch_size: 64
@@ -291,12 +318,10 @@ advanced_config:
   # Determines the initial learning rate via initial_lr = max_lr/div_factor
   max_lr: 1e-4
   div_factor: 25. 
-  # Select a model. Not working yet, added for future support.
-  # Now it uses a ViT for tile features and a MLP for patient features.
-  #model_name: "vit"
+  # Select a model regardless of task
+  model_name: "vit" # or mlp, trans_mil
 
   model_params:
-    # Tile-level training models:
     vit: # Vision Transformer
       dim_model: 512
       dim_feedforward: 512
@@ -306,7 +331,9 @@ advanced_config:
       # Experimental feature: Use ALiBi positional embedding
       use_alibi: false
 
-    # Patient-level training models:
+    trans_mil: # https://arxiv.org/abs/2106.00908
+      dim_hidden: 512
+
     mlp: # Multilayer Perceptron
       dim_hidden: 512
       num_layers: 2
 
@@ -54,7 +54,7 @@ def init_slide_encoder_(
 
             selected_encoder: Encoder = Gigapath()
 
-        case EncoderName.CHIEF:
+        case EncoderName.CHIEF_CTRANSPATH:
             from stamp.encoding.encoder.chief import CHIEF
 
             selected_encoder: Encoder = CHIEF()
@@ -140,7 +140,7 @@ def init_patient_encoder_(
 
             selected_encoder: Encoder = Gigapath()
 
-        case EncoderName.CHIEF:
+        case EncoderName.CHIEF_CTRANSPATH:
             from stamp.encoding.encoder.chief import CHIEF
 
             selected_encoder: Encoder = CHIEF()
 
@@ -9,7 +9,7 @@
 class EncoderName(StrEnum):
     COBRA = "cobra"
     EAGLE = "eagle"
-    CHIEF = "chief"
+    CHIEF_CTRANSPATH = "chief"
     TITAN = "titan"
     GIGAPATH = "gigapath"
     MADELEINE = "madeleine"