From 68b44bc46e18181a470c7a902e699ab6a7f964b5 Mon Sep 17 00:00:00 2001 From: itz-sidd Date: Thu, 19 Feb 2026 23:15:13 +0530 Subject: [PATCH 1/2] fix: push NLI model and input tensors to device in AnswerPredictor --- backend/Generator/main.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/backend/Generator/main.py b/backend/Generator/main.py index 04aed79f..ed4ebbdb 100644 --- a/backend/Generator/main.py +++ b/backend/Generator/main.py @@ -23,6 +23,9 @@ import fitz import mammoth + + + class MCQGenerator: def __init__(self): @@ -251,6 +254,9 @@ def __init__(self): self.nli_tokenizer = AutoTokenizer.from_pretrained(self.nli_model_name) self.nli_model = AutoModelForSequenceClassification.from_pretrained(self.nli_model_name) + # Explicitly push the NLI model to the detected hardware (GPU or CPU) + self.nli_model.to(self.device) + self.set_seed(42) def set_seed(self, seed): @@ -296,6 +302,10 @@ def predict_boolean_answer(self, payload): for question in input_questions: hypothesis = question inputs = self.nli_tokenizer.encode_plus(input_text, hypothesis, return_tensors="pt") + + # Push the input tensors to the same device as the model + inputs = {key: value.to(self.device) for key, value in inputs.items()} + outputs = self.nli_model(**inputs) logits = outputs.logits probabilities = torch.softmax(logits, dim=1) From 13cad6e3e02a3e9dd05876bc4bb2e8a83d863b20 Mon Sep 17 00:00:00 2001 From: itz-sidd Date: Sun, 1 Mar 2026 23:24:06 +0530 Subject: [PATCH 2/2] Optimize NLI model with eval(), no_grad(), and device-aware inputs --- backend/Generator/main.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/backend/Generator/main.py b/backend/Generator/main.py index ed4ebbdb..40646cef 100644 --- a/backend/Generator/main.py +++ b/backend/Generator/main.py @@ -256,6 +256,7 @@ def __init__(self): # Explicitly push the NLI model to the detected hardware (GPU or CPU) self.nli_model.to(self.device) + self.nli_model.eval() self.set_seed(42) @@ -292,7 +293,8 @@ def predict_answer(self, payload): torch.cuda.empty_cache() return answers - + + @torch.no_grad() def predict_boolean_answer(self, payload): input_text = payload.get("input_text", "") input_questions = payload.get("input_question", [])