Skip to content

Commit a2a0bff

Browse files
committed
add chat template to tokenizer
1 parent 685696f commit a2a0bff

File tree

2 files changed

+7
-1
lines changed

2 files changed

+7
-1
lines changed

mftcoder_accelerate/src/pefts/mft_accelerate.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -543,7 +543,7 @@ def main():
543543
model.parameters(),
544544
weight_decay=args.weight_decay,
545545
lr=args.learning_rate,
546-
betas=(0.9, 0.95),
546+
betas=(0.9, 0.999),
547547
)
548548
# for group in optimizer.param_groups:
549549
# group.setdefault("initial_lr", group["lr"])

mftcoder_accelerate/src/tokenizer/tokenizer.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from typing import List, Union
99
from utils.common_utils import print_rank_0
1010
from transformers import AutoTokenizer
11+
from tokenizer.chat_template import MFTCoder_template
1112

1213

1314
def build_tokenizer(args):
@@ -20,6 +21,11 @@ def build_tokenizer(args):
2021
tokenizer = AutoTokenizer.from_pretrained(args.pretrained_model_path, trust_remote_code=True)
2122
tokenizer.eod_id = tokenizer.convert_tokens_to_ids(args.eos_token)
2223
tokenizer.pad_id = tokenizer.convert_tokens_to_ids(args.pad_token)
24+
try:
25+
tokenizer.eos_token = args.eos_token
26+
tokenizer.pad_token = args.pad_token
27+
except:
28+
print(f"[WARNING]Cannot set tokenizer.eos_token")
2329
print_rank_0(f"Tokenizer: {type(tokenizer)}")
2430
print_rank_0(f"Length of tokenizer: {len(tokenizer)}")
2531
print_rank_0(f"build_tokenizer PAD id: {tokenizer.pad_id}, EOD id: {tokenizer.eod_id}")

0 commit comments

Comments
 (0)