From 74dad0fd39b2c4db0e7985cc5a362fc7718904e5 Mon Sep 17 00:00:00 2001
From: Steve Heim <heim.steve@gmail.com>
Date: Sun, 18 Oct 2020 16:44:25 +0200
Subject: [PATCH 1/3] separately save and load data (collected samples), using
 torch

---
 edge/model/inference/inference.py | 39 ++++++++++++++++++++++++++-----
 test/gp_test.py                   |  8 +++++++
 2 files changed, 41 insertions(+), 6 deletions(-)

diff --git a/edge/model/inference/inference.py b/edge/model/inference/inference.py
index 1031e55..66ed2ee 100644
--- a/edge/model/inference/inference.py
+++ b/edge/model/inference/inference.py
@@ -1,3 +1,4 @@
+import numpy as np
 import gpytorch
 import torch
 from sklearn.neighbors import KDTree
@@ -184,12 +185,14 @@ def append_data(self, x, y, **kwargs):
         self._set_gp_data_to_dataset()
         return self
 
-    def save(self, save_path):
+    def save(self, save_path, save_data=None):
         """
-        Saves the GP in PyTorch format.
-        PyTorch does NOT save samples or class structure. Such a model cannot be loaded by a simple "file.open" method.
+        Saves the GP in PyTorch format, and optionally the Dataset object.
+        PyTorch does NOT save samples or class structure. Such a model cannot
+        be loaded by a simple "file.open" method.
         See the GP.load method for more information.
-        :param save_path: str or Path: the path of the file where to save the model
+        :param save_path: str or Path: where to save the GP model
+        :param save_data: str or Path: where to save the Dataset
         """
         save_path = str(save_path)
         if not save_path.endswith('.pth'):
@@ -205,17 +208,22 @@ def save(self, save_path):
 
         torch.save(save_dict, save_path)
 
+        if save_data:
+            self.dataset.save(save_data)
+
+
     # Careful: composing decorators with @staticmethod can be tricky. The @staticmethod decorator should be the last
     # one, because it does NOT return a method but an observer object
     @staticmethod
     @tensorwrap('train_x', 'train_y')
-    def load(load_path, train_x, train_y):
+    def load(load_path, train_x, train_y, load_dataset=None):
         """
-        Loads a model saved by the GP.save method, and sets its dataset with train_x, train_y.
+        Loads a model saved by the GP.save method, and sets its dataset with train_x, train_y. If `load_dataset` evaluates to true, it will then load and replace with a saved dataset.
         This method may fail if the GP was saved with an older version of the code.
         :param load_path: str or Path: the path to the file where the GP is saved
         :param train_x: np.ndarray: training input data. Should be 2D, and interpreted as a list of points.
         :param train_y: np.ndarray: training output data. Should be 1D, or of shape (train_x.shape[0], 1).
+        :param load_dataset: optional str or Path to a file where the dataset is saved.
         :return: GP: an instance of the appropriate subclass of GP
         """
         load_path = str(load_path)
@@ -234,6 +242,10 @@ def load(load_path, train_x, train_y):
             **construction_parameters
         )
         model.load_state_dict(save_dict['state_dict'])
+
+        if load_dataset:
+            model.dataset.load(load_dataset)
+
         return model
 
 
@@ -268,6 +280,21 @@ def append(self, append_x, append_y, **kwargs):
         self.train_x = torch.cat((self.train_x, atleast_2d(append_x)), dim=0)
         self.train_y = torch.cat((self.train_y, append_y), dim=0)
 
+    def save(self, save_path):
+        save_path = str(save_path)
+        if not save_path.endswith('.pth'):
+            save_path += '.pth'
+
+        torch.save({'train_x': self.train_x,
+                    'train_y': self.train_y},
+                   save_path)
+
+    def load(self, load_path):
+        load_path = str(load_path)
+        save_dict = torch.load(load_path)
+        self.train_x = save_dict['train_x']
+        self.train_y = save_dict['train_y']
+
 
 class TimeForgettingDataset(Dataset):
     """
diff --git a/test/gp_test.py b/test/gp_test.py
index 7cc4945..55ebafb 100644
--- a/test/gp_test.py
+++ b/test/gp_test.py
@@ -127,6 +127,14 @@ def test_load_save(self):
         self.assertEqual(model.covar_module.outputscale,
                          loaded.covar_module.outputscale)
 
+        save_data = tempfile.NamedTemporaryFile(suffix='.pth').name
+        model.save(save_file, save_data=save_data)
+        self.assertTrue(os.path.isfile(save_file))
+        self.assertTrue(os.path.isfile(save_data))
+        x2 = np.linspace(2, 3, 11)
+        loaded = MaternGP.load(save_file, x2, y, save_data)
+        self.assertTrue(torch.all(torch.eq(model.train_x, loaded.train_x)))
+
     def test_hyper_optimization_0(self):
         warnings.simplefilter('ignore', gpytorch.utils.warnings.GPInputWarning)
 

From db39f5ab349cea9e3e25bc71e46394b2f77d2f6e Mon Sep 17 00:00:00 2001
From: Steve Heim <heim.steve@gmail.com>
Date: Sun, 18 Oct 2020 16:46:06 +0200
Subject: [PATCH 2/3] zap unneeded numpy import

---
 edge/model/inference/inference.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/edge/model/inference/inference.py b/edge/model/inference/inference.py
index 66ed2ee..093b0ae 100644
--- a/edge/model/inference/inference.py
+++ b/edge/model/inference/inference.py
@@ -1,4 +1,3 @@
-import numpy as np
 import gpytorch
 import torch
 from sklearn.neighbors import KDTree

From 92af77ac1c2642e44271cee64725fceeec763fbd Mon Sep 17 00:00:00 2001
From: Steve Heim <heim.steve@gmail.com>
Date: Wed, 21 Oct 2020 20:49:15 +0200
Subject: [PATCH 3/3] split saving model and saving the dataset into separate
 methods

---
 edge/model/inference/inference.py | 32 ++++++++++++++++++++-----------
 test/gp_test.py                   |  7 ++++---
 2 files changed, 25 insertions(+), 14 deletions(-)

diff --git a/edge/model/inference/inference.py b/edge/model/inference/inference.py
index 093b0ae..044f8ab 100644
--- a/edge/model/inference/inference.py
+++ b/edge/model/inference/inference.py
@@ -184,14 +184,13 @@ def append_data(self, x, y, **kwargs):
         self._set_gp_data_to_dataset()
         return self
 
-    def save(self, save_path, save_data=None):
+    def save(self, save_path):
         """
         Saves the GP in PyTorch format, and optionally the Dataset object.
         PyTorch does NOT save samples or class structure. Such a model cannot
         be loaded by a simple "file.open" method.
         See the GP.load method for more information.
         :param save_path: str or Path: where to save the GP model
-        :param save_data: str or Path: where to save the Dataset
         """
         save_path = str(save_path)
         if not save_path.endswith('.pth'):
@@ -207,22 +206,27 @@ def save(self, save_path, save_data=None):
 
         torch.save(save_dict, save_path)
 
-        if save_data:
-            self.dataset.save(save_data)
-
+    def save_dataset(self, save_path):
+        """
+        Saves a dataset, using the method implemented in the dataset class.
+        :param save_data: str or Path: where to save the Dataset
+        """
+        save_path = str(save_path)
+        if not save_path.endswith('.pth'):
+            save_path += '.pth'
+        self.dataset.save(save_path)
 
     # Careful: composing decorators with @staticmethod can be tricky. The @staticmethod decorator should be the last
     # one, because it does NOT return a method but an observer object
     @staticmethod
     @tensorwrap('train_x', 'train_y')
-    def load(load_path, train_x, train_y, load_dataset=None):
+    def load(load_path, train_x, train_y):
         """
-        Loads a model saved by the GP.save method, and sets its dataset with train_x, train_y. If `load_dataset` evaluates to true, it will then load and replace with a saved dataset.
+        Loads a model saved by the GP.save method, and sets its dataset with train_x, train_y.
         This method may fail if the GP was saved with an older version of the code.
         :param load_path: str or Path: the path to the file where the GP is saved
         :param train_x: np.ndarray: training input data. Should be 2D, and interpreted as a list of points.
         :param train_y: np.ndarray: training output data. Should be 1D, or of shape (train_x.shape[0], 1).
-        :param load_dataset: optional str or Path to a file where the dataset is saved.
         :return: GP: an instance of the appropriate subclass of GP
         """
         load_path = str(load_path)
@@ -242,11 +246,17 @@ def load(load_path, train_x, train_y, load_dataset=None):
         )
         model.load_state_dict(save_dict['state_dict'])
 
-        if load_dataset:
-            model.dataset.load(load_dataset)
-
         return model
 
+    def load_dataset(self, load_path):
+        """
+        Loads and sets `train_x` and `train_y`.
+        :param load_path: str or Path: the path to the data file
+        """
+        load_path = str(load_path)
+        self.dataset.load(load_path)
+        self._set_gp_data_to_dataset()
+
 
 class Dataset:
     """
diff --git a/test/gp_test.py b/test/gp_test.py
index 55ebafb..007c617 100644
--- a/test/gp_test.py
+++ b/test/gp_test.py
@@ -128,11 +128,12 @@ def test_load_save(self):
                          loaded.covar_module.outputscale)
 
         save_data = tempfile.NamedTemporaryFile(suffix='.pth').name
-        model.save(save_file, save_data=save_data)
-        self.assertTrue(os.path.isfile(save_file))
+        model.save_dataset(save_data)
         self.assertTrue(os.path.isfile(save_data))
+        # load a new GP with different seed, then load the dataset
         x2 = np.linspace(2, 3, 11)
-        loaded = MaternGP.load(save_file, x2, y, save_data)
+        loaded = MaternGP.load(save_file, x2, y)
+        loaded.load_dataset(save_data)
         self.assertTrue(torch.all(torch.eq(model.train_x, loaded.train_x)))
 
     def test_hyper_optimization_0(self):