IBM · yoavkatz · Feb 9, 2026 · Jan 18, 2026 · Jan 18, 2026
diff --git a/examples/qa_evaluation.py b/examples/qa_evaluation.py
@@ -26,6 +26,7 @@
     template="templates.qa.open",
     split="test",
     format="formats.chat_api",
+    data_classification_policy=["public"],
 )
 
 model = CrossProviderInferenceEngine(model="SmolLM2-1.7B-Instruct", provider="hf-local")

diff --git a/src/unitxt/inference.py b/src/unitxt/inference.py
@@ -728,9 +728,9 @@ def _get_model_args(self) -> Dict[str, Any]:
             args["quantization_config"] = quantization_config
         elif self.use_fp16:
             if self.device == torch.device("mps"):
-                args["torch_dtype"] = torch.float16
+                args["dtype"] = torch.float16
             else:
-                args["torch_dtype"] = torch.bfloat16
+                args["dtype"] = torch.bfloat16
 
         # We do this, because in some cases, using device:auto will offload some weights to the cpu
         # (even though the model might *just* fit to a single gpu), even if there is a gpu available, and this will
@@ -937,7 +937,7 @@ def _init_model(self):
 
         self.model = LlavaForConditionalGeneration.from_pretrained(
             self.model_name,
-            torch_dtype=self._get_torch_dtype(),
+            dtype=self._get_torch_dtype(),
             low_cpu_mem_usage=self.low_cpu_mem_usage,
             device_map=self.device_map,
         )
@@ -1108,7 +1108,7 @@ def _init_model(self):
             trust_remote_code=True,
             device_map=self.device_map,
             low_cpu_mem_usage=self.low_cpu_mem_usage,
-            torch_dtype=self._get_torch_dtype(),
+            dtype=self._get_torch_dtype(),
         )
         self.model = self.model.to(
             dtype=self._get_torch_dtype()
@@ -1197,9 +1197,9 @@ def _get_model_args(self) -> Dict[str, Any]:
             args["quantization_config"] = quantization_config
         elif self.use_fp16:
             if self.device == torch.device("mps"):
-                args["torch_dtype"] = torch.float16
+                args["dtype"] = torch.float16
             else:
-                args["torch_dtype"] = torch.bfloat16
+                args["dtype"] = torch.bfloat16
 
         # We do this, because in some cases, using device:auto will offload some weights to the cpu
         # (even though the model might *just* fit to a single gpu), even if there is a gpu available, and this will