diff --git a/backend/Generator/main.py b/backend/Generator/main.py index 04aed79f..40646cef 100644 --- a/backend/Generator/main.py +++ b/backend/Generator/main.py @@ -23,6 +23,9 @@ import fitz import mammoth + + + class MCQGenerator: def __init__(self): @@ -251,6 +254,10 @@ def __init__(self): self.nli_tokenizer = AutoTokenizer.from_pretrained(self.nli_model_name) self.nli_model = AutoModelForSequenceClassification.from_pretrained(self.nli_model_name) + # Explicitly push the NLI model to the detected hardware (GPU or CPU) + self.nli_model.to(self.device) + self.nli_model.eval() + self.set_seed(42) def set_seed(self, seed): @@ -286,7 +293,8 @@ def predict_answer(self, payload): torch.cuda.empty_cache() return answers - + + @torch.no_grad() def predict_boolean_answer(self, payload): input_text = payload.get("input_text", "") input_questions = payload.get("input_question", []) @@ -296,6 +304,10 @@ def predict_boolean_answer(self, payload): for question in input_questions: hypothesis = question inputs = self.nli_tokenizer.encode_plus(input_text, hypothesis, return_tensors="pt") + + # Push the input tensors to the same device as the model + inputs = {key: value.to(self.device) for key, value in inputs.items()} + outputs = self.nli_model(**inputs) logits = outputs.logits probabilities = torch.softmax(logits, dim=1)