From 7e13be2a7617b2beb24575e339fd11f598657c21 Mon Sep 17 00:00:00 2001
From: Sarat Kannan <48374050+sparklerz@users.noreply.github.com>
Date: Tue, 10 Jun 2025 13:01:12 +0530
Subject: [PATCH] Fix indentation in dataloader config

---
 llm-foundry-finetune/configs/finetune_mpt7b.yaml | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/llm-foundry-finetune/configs/finetune_mpt7b.yaml b/llm-foundry-finetune/configs/finetune_mpt7b.yaml
index 6744158..543f982 100644
--- a/llm-foundry-finetune/configs/finetune_mpt7b.yaml
+++ b/llm-foundry-finetune/configs/finetune_mpt7b.yaml
@@ -32,13 +32,13 @@ tokenizer:
 train_loader:
   name: finetuning
   dataset:
-    hf_name: text
+    hf_name: json
     split: train
-    # Use Hugging Face's text loader which expects plain-text files.
-    # Point to the split's .txt file generated by ``prepare_dolly.py``.
+    # Use the JSONL files generated by ``prepare_dolly.py`` which contain
+    # ``prompt`` and ``response`` keys.
     data_files:
-      train: data/dolly_15k_txt/train/train.txt
-    decoder_only_format: true
+      train: data/dolly_15k_txt/train.jsonl
+    decoder_only_format: false
     shuffle: true
     max_seq_len: 1024
   drop_last: false
@@ -52,11 +52,11 @@ train_loader:
 eval_loader:
   name: finetuning
   dataset:
-    hf_name: text
+    hf_name: json
     split: validation
     data_files:
-      validation: data/dolly_15k_txt/validation/validation.txt
-    decoder_only_format: true
+      validation: data/dolly_15k_txt/validation.jsonl
+    decoder_only_format: false
     shuffle: false
     max_seq_len: 1024
   drop_last: false