From 68b44bc46e18181a470c7a902e699ab6a7f964b5 Mon Sep 17 00:00:00 2001
From: itz-sidd <siddhantofficial002@gmail.com>
Date: Thu, 19 Feb 2026 23:15:13 +0530
Subject: [PATCH 1/2] fix: push NLI model and input tensors to device in
 AnswerPredictor

---
 backend/Generator/main.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/backend/Generator/main.py b/backend/Generator/main.py
index 04aed79f..ed4ebbdb 100644
--- a/backend/Generator/main.py
+++ b/backend/Generator/main.py
@@ -23,6 +23,9 @@
 import fitz 
 import mammoth
 
+
+
+
 class MCQGenerator:
     
     def __init__(self):
@@ -251,6 +254,9 @@ def __init__(self):
         self.nli_tokenizer = AutoTokenizer.from_pretrained(self.nli_model_name)
         self.nli_model = AutoModelForSequenceClassification.from_pretrained(self.nli_model_name)
         
+        # Explicitly push the NLI model to the detected hardware (GPU or CPU)
+        self.nli_model.to(self.device)
+        
         self.set_seed(42)
         
     def set_seed(self, seed):
@@ -296,6 +302,10 @@ def predict_boolean_answer(self, payload):
         for question in input_questions:
             hypothesis = question
             inputs = self.nli_tokenizer.encode_plus(input_text, hypothesis, return_tensors="pt")
+
+            # Push the input tensors to the same device as the model
+            inputs = {key: value.to(self.device) for key, value in inputs.items()}
+
             outputs = self.nli_model(**inputs)
             logits = outputs.logits
             probabilities = torch.softmax(logits, dim=1)

From 13cad6e3e02a3e9dd05876bc4bb2e8a83d863b20 Mon Sep 17 00:00:00 2001
From: itz-sidd <siddhantofficial002@gmail.com>
Date: Sun, 1 Mar 2026 23:24:06 +0530
Subject: [PATCH 2/2] Optimize NLI model with eval(), no_grad(), and
 device-aware inputs

---
 backend/Generator/main.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/backend/Generator/main.py b/backend/Generator/main.py
index ed4ebbdb..40646cef 100644
--- a/backend/Generator/main.py
+++ b/backend/Generator/main.py
@@ -256,6 +256,7 @@ def __init__(self):
         
         # Explicitly push the NLI model to the detected hardware (GPU or CPU)
         self.nli_model.to(self.device)
+        self.nli_model.eval()
         
         self.set_seed(42)
         
@@ -292,7 +293,8 @@ def predict_answer(self, payload):
             torch.cuda.empty_cache()
 
         return answers
-
+    
+    @torch.no_grad()
     def predict_boolean_answer(self, payload):
         input_text = payload.get("input_text", "")
         input_questions = payload.get("input_question", [])