From 39133e344f0a0b39d277527531061a6208ef4f92 Mon Sep 17 00:00:00 2001
From: Brian Dellabetta <bdellabe@redhat.com>
Date: Wed, 3 Sep 2025 14:45:42 +0000
Subject: [PATCH 1/6] only init processor if dataset provided

Signed-off-by: Brian Dellabetta <bdellabe@redhat.com>
---
 src/llmcompressor/entrypoints/train.py |  2 +-
 src/llmcompressor/entrypoints/utils.py | 16 ++++++++++++----
 2 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/src/llmcompressor/entrypoints/train.py b/src/llmcompressor/entrypoints/train.py
index 0bfb26e53..f543b619e 100644
--- a/src/llmcompressor/entrypoints/train.py
+++ b/src/llmcompressor/entrypoints/train.py
@@ -63,7 +63,7 @@ def train(**kwargs) -> PreTrainedModel:
         include_training_args=True, **kwargs
     )
 
-    pre_process(model_args)
+    pre_process(model_args, dataset_args)
     dispatch_for_generation(model_args.model)  # train is dispatched same as generation
 
     processed_dataset = get_processed_dataset(
diff --git a/src/llmcompressor/entrypoints/utils.py b/src/llmcompressor/entrypoints/utils.py
index 95ec832fb..2d5ae4573 100644
--- a/src/llmcompressor/entrypoints/utils.py
+++ b/src/llmcompressor/entrypoints/utils.py
@@ -15,7 +15,12 @@
 )
 from transformers.utils.quantization_config import CompressedTensorsConfig
 
-from llmcompressor.args import ModelArguments, RecipeArguments, TrainingArguments
+from llmcompressor.args import (
+    ModelArguments,
+    RecipeArguments,
+    TrainingArguments,
+    DatasetArguments,
+)
 from llmcompressor.core import reset_session
 from llmcompressor.pytorch.model_load.helpers import parse_dtype
 from llmcompressor.transformers.sparsification.compressed_tensors_utils import (
@@ -30,7 +35,7 @@
 from llmcompressor.utils.fsdp.helpers import is_fsdp_model
 
 
-def pre_process(model_args: "ModelArguments"):
+def pre_process(model_args: ModelArguments, dataset_args: DatasetArguments):
     """
     Prepares the model and tokenizer/processor for calibration.
     - Initializes the model if it's specified as a path or string.
@@ -54,8 +59,11 @@ def pre_process(model_args: "ModelArguments"):
         model_args.model = model
         model_args.distill_teacher = distill_teacher
 
-    # Initialize processor
-    if isinstance(model_args.processor, (str, type(None))):
+    # Initialize processor if dataset provided
+    if (
+        isinstance(model_args.processor, (str, type(None)))
+        and dataset_args.dataset is not None
+    ):
         model_args.processor = initialize_processor_from_path(
             model_args, model_args.model
         )

From fe1ee28f1c4da118b384a24bb053372e1ef3c9cc Mon Sep 17 00:00:00 2001
From: Brian Dellabetta <bdellabe@redhat.com>
Date: Wed, 3 Sep 2025 16:36:57 +0000
Subject: [PATCH 2/6] bugfix

Signed-off-by: Brian Dellabetta <bdellabe@redhat.com>
---
 src/llmcompressor/entrypoints/oneshot.py | 2 +-
 src/llmcompressor/entrypoints/utils.py   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/llmcompressor/entrypoints/oneshot.py b/src/llmcompressor/entrypoints/oneshot.py
index 07884fa8c..2b4391f10 100644
--- a/src/llmcompressor/entrypoints/oneshot.py
+++ b/src/llmcompressor/entrypoints/oneshot.py
@@ -125,7 +125,7 @@ def __init__(
         self.output_dir = output_dir
 
         # initialize the model and processor
-        pre_process(model_args)
+        pre_process(model_args, dataset_args)
 
         # Set instance attributes
         self.model = self.model_args.model
diff --git a/src/llmcompressor/entrypoints/utils.py b/src/llmcompressor/entrypoints/utils.py
index 2d5ae4573..aa82f960a 100644
--- a/src/llmcompressor/entrypoints/utils.py
+++ b/src/llmcompressor/entrypoints/utils.py
@@ -16,10 +16,10 @@
 from transformers.utils.quantization_config import CompressedTensorsConfig
 
 from llmcompressor.args import (
+    DatasetArguments,
     ModelArguments,
     RecipeArguments,
     TrainingArguments,
-    DatasetArguments,
 )
 from llmcompressor.core import reset_session
 from llmcompressor.pytorch.model_load.helpers import parse_dtype

From 3e3ef71a0146d612b602b0db55e860796bea315a Mon Sep 17 00:00:00 2001
From: Brian Dellabetta <bdellabe@redhat.com>
Date: Thu, 4 Sep 2025 16:50:25 +0000
Subject: [PATCH 3/6] post-deep dive revisions

Signed-off-by: Brian Dellabetta <bdellabe@redhat.com>
---
 src/llmcompressor/args/dataset_arguments.py |  3 ++
 src/llmcompressor/entrypoints/oneshot.py    |  2 +-
 src/llmcompressor/entrypoints/train.py      |  4 +--
 src/llmcompressor/entrypoints/utils.py      | 33 ++++++++++++++++-----
 4 files changed, 31 insertions(+), 11 deletions(-)

diff --git a/src/llmcompressor/args/dataset_arguments.py b/src/llmcompressor/args/dataset_arguments.py
index 7f74e1f32..738ea37f9 100644
--- a/src/llmcompressor/args/dataset_arguments.py
+++ b/src/llmcompressor/args/dataset_arguments.py
@@ -217,3 +217,6 @@ class DatasetArguments(CustomDatasetArguments):
             "Default is set to True."
         },
     )
+
+    def is_dataset_required(self) -> bool:
+        return self.dataset is not None or self.dataset_path is not None
diff --git a/src/llmcompressor/entrypoints/oneshot.py b/src/llmcompressor/entrypoints/oneshot.py
index 2b4391f10..befec1cea 100644
--- a/src/llmcompressor/entrypoints/oneshot.py
+++ b/src/llmcompressor/entrypoints/oneshot.py
@@ -125,7 +125,7 @@ def __init__(
         self.output_dir = output_dir
 
         # initialize the model and processor
-        pre_process(model_args, dataset_args)
+        pre_process(model_args, dataset_args, output_dir)
 
         # Set instance attributes
         self.model = self.model_args.model
diff --git a/src/llmcompressor/entrypoints/train.py b/src/llmcompressor/entrypoints/train.py
index f543b619e..b04c49d8f 100644
--- a/src/llmcompressor/entrypoints/train.py
+++ b/src/llmcompressor/entrypoints/train.py
@@ -59,11 +59,11 @@ def train(**kwargs) -> PreTrainedModel:
         ```
 
     """
-    model_args, dataset_args, recipe_args, training_args, _ = parse_args(
+    model_args, dataset_args, recipe_args, training_args, output_dir = parse_args(
         include_training_args=True, **kwargs
     )
 
-    pre_process(model_args, dataset_args)
+    pre_process(model_args, dataset_args, output_dir)
     dispatch_for_generation(model_args.model)  # train is dispatched same as generation
 
     processed_dataset = get_processed_dataset(
diff --git a/src/llmcompressor/entrypoints/utils.py b/src/llmcompressor/entrypoints/utils.py
index aa82f960a..53ed0b9ae 100644
--- a/src/llmcompressor/entrypoints/utils.py
+++ b/src/llmcompressor/entrypoints/utils.py
@@ -35,7 +35,11 @@
 from llmcompressor.utils.fsdp.helpers import is_fsdp_model
 
 
-def pre_process(model_args: ModelArguments, dataset_args: DatasetArguments):
+def pre_process(
+    model_args: ModelArguments,
+    dataset_args: DatasetArguments,
+    output_dir: Optional[str],
+):
     """
     Prepares the model and tokenizer/processor for calibration.
     - Initializes the model if it's specified as a path or string.
@@ -60,13 +64,26 @@ def pre_process(model_args: ModelArguments, dataset_args: DatasetArguments):
         model_args.distill_teacher = distill_teacher
 
     # Initialize processor if dataset provided
-    if (
-        isinstance(model_args.processor, (str, type(None)))
-        and dataset_args.dataset is not None
-    ):
-        model_args.processor = initialize_processor_from_path(
-            model_args, model_args.model
-        )
+    if isinstance(model_args.processor, (str, type(None))):
+        try:
+            model_args.processor = initialize_processor_from_path(
+                model_args, model_args.model
+            )
+        except Exception as e:
+            if dataset_args.is_dataset_required():
+                raise RuntimeError(
+                    "An error occurred when attempting to initialize "
+                    "model processor, which is required when a dataset "
+                    "is provided. To resolve, create and pass in a "
+                    f"processor directly to `oneshot`/`train`."
+                ) from e
+            elif output_dir:
+                logger.warning(
+                    "Model processor could not be auto-initialized and "
+                    "will not be saved along with the model. To resolve, "
+                    "create and pass in a processor directly to "
+                    "`oneshot`/`train`.\nInitialization Error: {e}"
+                )
 
     # untie tie_word_embeddings weights
     if not model_args.tie_word_embeddings:

From 088398172b2b093e9fd1d0539f4e0a28e791be39 Mon Sep 17 00:00:00 2001
From: Brian Dellabetta <bdellabe@redhat.com>
Date: Thu, 4 Sep 2025 16:58:32 +0000
Subject: [PATCH 4/6] stylefix

Signed-off-by: Brian Dellabetta <bdellabe@redhat.com>
---
 src/llmcompressor/entrypoints/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/llmcompressor/entrypoints/utils.py b/src/llmcompressor/entrypoints/utils.py
index 53ed0b9ae..6ddb8b6fa 100644
--- a/src/llmcompressor/entrypoints/utils.py
+++ b/src/llmcompressor/entrypoints/utils.py
@@ -82,7 +82,7 @@ def pre_process(
                     "Model processor could not be auto-initialized and "
                     "will not be saved along with the model. To resolve, "
                     "create and pass in a processor directly to "
-                    "`oneshot`/`train`.\nInitialization Error: {e}"
+                    f"`oneshot`/`train`.\nInitialization Error: {e}"
                 )
 
     # untie tie_word_embeddings weights

From 588f04f1fa47bfdcbcdd74d9a3d1ff801be29e70 Mon Sep 17 00:00:00 2001
From: Brian Dellabetta <bdellabe@redhat.com>
Date: Thu, 4 Sep 2025 17:35:55 +0000
Subject: [PATCH 5/6] stylefix

Signed-off-by: Brian Dellabetta <bdellabe@redhat.com>
---
 src/llmcompressor/entrypoints/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/llmcompressor/entrypoints/utils.py b/src/llmcompressor/entrypoints/utils.py
index 6ddb8b6fa..0be3fba4e 100644
--- a/src/llmcompressor/entrypoints/utils.py
+++ b/src/llmcompressor/entrypoints/utils.py
@@ -75,7 +75,7 @@ def pre_process(
                     "An error occurred when attempting to initialize "
                     "model processor, which is required when a dataset "
                     "is provided. To resolve, create and pass in a "
-                    f"processor directly to `oneshot`/`train`."
+                    "processor directly to `oneshot`/`train`."
                 ) from e
             elif output_dir:
                 logger.warning(

From f44cdc1ad43ad42be9b9171f972e34c4e53c90a2 Mon Sep 17 00:00:00 2001
From: Brian Dellabetta <bdellabe@redhat.com>
Date: Thu, 4 Sep 2025 19:08:51 +0000
Subject: [PATCH 6/6] rename helper

Signed-off-by: Brian Dellabetta <bdellabe@redhat.com>
---
 src/llmcompressor/args/dataset_arguments.py | 2 +-
 src/llmcompressor/entrypoints/utils.py      | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/llmcompressor/args/dataset_arguments.py b/src/llmcompressor/args/dataset_arguments.py
index 738ea37f9..67ccda6b7 100644
--- a/src/llmcompressor/args/dataset_arguments.py
+++ b/src/llmcompressor/args/dataset_arguments.py
@@ -218,5 +218,5 @@ class DatasetArguments(CustomDatasetArguments):
         },
     )
 
-    def is_dataset_required(self) -> bool:
+    def is_dataset_provided(self) -> bool:
         return self.dataset is not None or self.dataset_path is not None
diff --git a/src/llmcompressor/entrypoints/utils.py b/src/llmcompressor/entrypoints/utils.py
index 0be3fba4e..c0731b805 100644
--- a/src/llmcompressor/entrypoints/utils.py
+++ b/src/llmcompressor/entrypoints/utils.py
@@ -70,7 +70,7 @@ def pre_process(
                 model_args, model_args.model
             )
         except Exception as e:
-            if dataset_args.is_dataset_required():
+            if dataset_args.is_dataset_provided():
                 raise RuntimeError(
                     "An error occurred when attempting to initialize "
                     "model processor, which is required when a dataset "