From 49b49c632c1ed9fe749351ec60d73aa379d0e4b8 Mon Sep 17 00:00:00 2001 From: Anjila Date: Fri, 31 Oct 2025 17:06:31 -0400 Subject: [PATCH] fix: qwen2 teacher student model unmatched model vocab size --- minillm/finetune.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/minillm/finetune.py b/minillm/finetune.py index 56ff7584..ecf71a7f 100644 --- a/minillm/finetune.py +++ b/minillm/finetune.py @@ -159,6 +159,10 @@ def get_distil_loss(args, tokenizer, model, teacher_model, model_batch, no_model teacher_model.eval() teacher_outputs = teacher_model(**model_batch, use_cache=False) teacher_logits = teacher_outputs.logits + if args.model_type == 'qwen2': + # If ZeRO, Get vocab size under module, No ZeRO - directly from config + student_vocab_size = model.module.config.vocab_size if hasattr(model, "module") else model.config.vocab_size + teacher_logits = teacher_logits[:, :, :student_vocab_size] if args.model_parallel: distil_losses = mpu.parallel_soft_cross_entropy_loss(logits.float(), teacher_logits.float()) distil_losses = distil_losses.view(-1)