model = MosaicGPT.from_pretrained(
"mosaicml/mpt-1b-redpajama-200b",
trust_remote_code=True,
attn_impl='torch'
)
trainer = SFTTrainer(
model=model,
tokenizer=tokenizer,
train_dataset=tokenized_train_data["train"],
eval_dataset=tokenized_val_data["validation"],
dataset_text_field="text",
args=training_args,
neftune_noise_alpha=5 #the only one important thing for me
)
and potentially others.