From 7e13be2a7617b2beb24575e339fd11f598657c21 Mon Sep 17 00:00:00 2001 From: Sarat Kannan <48374050+sparklerz@users.noreply.github.com> Date: Tue, 10 Jun 2025 13:01:12 +0530 Subject: [PATCH] Fix indentation in dataloader config --- llm-foundry-finetune/configs/finetune_mpt7b.yaml | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/llm-foundry-finetune/configs/finetune_mpt7b.yaml b/llm-foundry-finetune/configs/finetune_mpt7b.yaml index 6744158..543f982 100644 --- a/llm-foundry-finetune/configs/finetune_mpt7b.yaml +++ b/llm-foundry-finetune/configs/finetune_mpt7b.yaml @@ -32,13 +32,13 @@ tokenizer: train_loader: name: finetuning dataset: - hf_name: text + hf_name: json split: train - # Use Hugging Face's text loader which expects plain-text files. - # Point to the split's .txt file generated by ``prepare_dolly.py``. + # Use the JSONL files generated by ``prepare_dolly.py`` which contain + # ``prompt`` and ``response`` keys. data_files: - train: data/dolly_15k_txt/train/train.txt - decoder_only_format: true + train: data/dolly_15k_txt/train.jsonl + decoder_only_format: false shuffle: true max_seq_len: 1024 drop_last: false @@ -52,11 +52,11 @@ train_loader: eval_loader: name: finetuning dataset: - hf_name: text + hf_name: json split: validation data_files: - validation: data/dolly_15k_txt/validation/validation.txt - decoder_only_format: true + validation: data/dolly_15k_txt/validation.jsonl + decoder_only_format: false shuffle: false max_seq_len: 1024 drop_last: false