From 49b49c632c1ed9fe749351ec60d73aa379d0e4b8 Mon Sep 17 00:00:00 2001
From: Anjila <anzilabudathoki@gmail.com>
Date: Fri, 31 Oct 2025 17:06:31 -0400
Subject: [PATCH] fix: qwen2 teacher student model unmatched model vocab size

---
 minillm/finetune.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/minillm/finetune.py b/minillm/finetune.py
index 56ff7584..ecf71a7f 100644
--- a/minillm/finetune.py
+++ b/minillm/finetune.py
@@ -159,6 +159,10 @@ def get_distil_loss(args, tokenizer, model, teacher_model, model_batch, no_model
         teacher_model.eval()
         teacher_outputs = teacher_model(**model_batch, use_cache=False)
         teacher_logits = teacher_outputs.logits
+        if args.model_type  == 'qwen2':
+            # If ZeRO, Get vocab size under module, No ZeRO - directly from config
+            student_vocab_size = model.module.config.vocab_size if hasattr(model, "module") else model.config.vocab_size
+            teacher_logits = teacher_logits[:, :, :student_vocab_size]
     if args.model_parallel:
         distil_losses = mpu.parallel_soft_cross_entropy_loss(logits.float(), teacher_logits.float())
         distil_losses = distil_losses.view(-1)