From 74dad0fd39b2c4db0e7985cc5a362fc7718904e5 Mon Sep 17 00:00:00 2001 From: Steve Heim Date: Sun, 18 Oct 2020 16:44:25 +0200 Subject: [PATCH 1/3] separately save and load data (collected samples), using torch --- edge/model/inference/inference.py | 39 ++++++++++++++++++++++++++----- test/gp_test.py | 8 +++++++ 2 files changed, 41 insertions(+), 6 deletions(-) diff --git a/edge/model/inference/inference.py b/edge/model/inference/inference.py index 1031e55..66ed2ee 100644 --- a/edge/model/inference/inference.py +++ b/edge/model/inference/inference.py @@ -1,3 +1,4 @@ +import numpy as np import gpytorch import torch from sklearn.neighbors import KDTree @@ -184,12 +185,14 @@ def append_data(self, x, y, **kwargs): self._set_gp_data_to_dataset() return self - def save(self, save_path): + def save(self, save_path, save_data=None): """ - Saves the GP in PyTorch format. - PyTorch does NOT save samples or class structure. Such a model cannot be loaded by a simple "file.open" method. + Saves the GP in PyTorch format, and optionally the Dataset object. + PyTorch does NOT save samples or class structure. Such a model cannot + be loaded by a simple "file.open" method. See the GP.load method for more information. - :param save_path: str or Path: the path of the file where to save the model + :param save_path: str or Path: where to save the GP model + :param save_data: str or Path: where to save the Dataset """ save_path = str(save_path) if not save_path.endswith('.pth'): @@ -205,17 +208,22 @@ def save(self, save_path): torch.save(save_dict, save_path) + if save_data: + self.dataset.save(save_data) + + # Careful: composing decorators with @staticmethod can be tricky. The @staticmethod decorator should be the last # one, because it does NOT return a method but an observer object @staticmethod @tensorwrap('train_x', 'train_y') - def load(load_path, train_x, train_y): + def load(load_path, train_x, train_y, load_dataset=None): """ - Loads a model saved by the GP.save method, and sets its dataset with train_x, train_y. + Loads a model saved by the GP.save method, and sets its dataset with train_x, train_y. If `load_dataset` evaluates to true, it will then load and replace with a saved dataset. This method may fail if the GP was saved with an older version of the code. :param load_path: str or Path: the path to the file where the GP is saved :param train_x: np.ndarray: training input data. Should be 2D, and interpreted as a list of points. :param train_y: np.ndarray: training output data. Should be 1D, or of shape (train_x.shape[0], 1). + :param load_dataset: optional str or Path to a file where the dataset is saved. :return: GP: an instance of the appropriate subclass of GP """ load_path = str(load_path) @@ -234,6 +242,10 @@ def load(load_path, train_x, train_y): **construction_parameters ) model.load_state_dict(save_dict['state_dict']) + + if load_dataset: + model.dataset.load(load_dataset) + return model @@ -268,6 +280,21 @@ def append(self, append_x, append_y, **kwargs): self.train_x = torch.cat((self.train_x, atleast_2d(append_x)), dim=0) self.train_y = torch.cat((self.train_y, append_y), dim=0) + def save(self, save_path): + save_path = str(save_path) + if not save_path.endswith('.pth'): + save_path += '.pth' + + torch.save({'train_x': self.train_x, + 'train_y': self.train_y}, + save_path) + + def load(self, load_path): + load_path = str(load_path) + save_dict = torch.load(load_path) + self.train_x = save_dict['train_x'] + self.train_y = save_dict['train_y'] + class TimeForgettingDataset(Dataset): """ diff --git a/test/gp_test.py b/test/gp_test.py index 7cc4945..55ebafb 100644 --- a/test/gp_test.py +++ b/test/gp_test.py @@ -127,6 +127,14 @@ def test_load_save(self): self.assertEqual(model.covar_module.outputscale, loaded.covar_module.outputscale) + save_data = tempfile.NamedTemporaryFile(suffix='.pth').name + model.save(save_file, save_data=save_data) + self.assertTrue(os.path.isfile(save_file)) + self.assertTrue(os.path.isfile(save_data)) + x2 = np.linspace(2, 3, 11) + loaded = MaternGP.load(save_file, x2, y, save_data) + self.assertTrue(torch.all(torch.eq(model.train_x, loaded.train_x))) + def test_hyper_optimization_0(self): warnings.simplefilter('ignore', gpytorch.utils.warnings.GPInputWarning) From db39f5ab349cea9e3e25bc71e46394b2f77d2f6e Mon Sep 17 00:00:00 2001 From: Steve Heim Date: Sun, 18 Oct 2020 16:46:06 +0200 Subject: [PATCH 2/3] zap unneeded numpy import --- edge/model/inference/inference.py | 1 - 1 file changed, 1 deletion(-) diff --git a/edge/model/inference/inference.py b/edge/model/inference/inference.py index 66ed2ee..093b0ae 100644 --- a/edge/model/inference/inference.py +++ b/edge/model/inference/inference.py @@ -1,4 +1,3 @@ -import numpy as np import gpytorch import torch from sklearn.neighbors import KDTree From 92af77ac1c2642e44271cee64725fceeec763fbd Mon Sep 17 00:00:00 2001 From: Steve Heim Date: Wed, 21 Oct 2020 20:49:15 +0200 Subject: [PATCH 3/3] split saving model and saving the dataset into separate methods --- edge/model/inference/inference.py | 32 ++++++++++++++++++++----------- test/gp_test.py | 7 ++++--- 2 files changed, 25 insertions(+), 14 deletions(-) diff --git a/edge/model/inference/inference.py b/edge/model/inference/inference.py index 093b0ae..044f8ab 100644 --- a/edge/model/inference/inference.py +++ b/edge/model/inference/inference.py @@ -184,14 +184,13 @@ def append_data(self, x, y, **kwargs): self._set_gp_data_to_dataset() return self - def save(self, save_path, save_data=None): + def save(self, save_path): """ Saves the GP in PyTorch format, and optionally the Dataset object. PyTorch does NOT save samples or class structure. Such a model cannot be loaded by a simple "file.open" method. See the GP.load method for more information. :param save_path: str or Path: where to save the GP model - :param save_data: str or Path: where to save the Dataset """ save_path = str(save_path) if not save_path.endswith('.pth'): @@ -207,22 +206,27 @@ def save(self, save_path, save_data=None): torch.save(save_dict, save_path) - if save_data: - self.dataset.save(save_data) - + def save_dataset(self, save_path): + """ + Saves a dataset, using the method implemented in the dataset class. + :param save_data: str or Path: where to save the Dataset + """ + save_path = str(save_path) + if not save_path.endswith('.pth'): + save_path += '.pth' + self.dataset.save(save_path) # Careful: composing decorators with @staticmethod can be tricky. The @staticmethod decorator should be the last # one, because it does NOT return a method but an observer object @staticmethod @tensorwrap('train_x', 'train_y') - def load(load_path, train_x, train_y, load_dataset=None): + def load(load_path, train_x, train_y): """ - Loads a model saved by the GP.save method, and sets its dataset with train_x, train_y. If `load_dataset` evaluates to true, it will then load and replace with a saved dataset. + Loads a model saved by the GP.save method, and sets its dataset with train_x, train_y. This method may fail if the GP was saved with an older version of the code. :param load_path: str or Path: the path to the file where the GP is saved :param train_x: np.ndarray: training input data. Should be 2D, and interpreted as a list of points. :param train_y: np.ndarray: training output data. Should be 1D, or of shape (train_x.shape[0], 1). - :param load_dataset: optional str or Path to a file where the dataset is saved. :return: GP: an instance of the appropriate subclass of GP """ load_path = str(load_path) @@ -242,11 +246,17 @@ def load(load_path, train_x, train_y, load_dataset=None): ) model.load_state_dict(save_dict['state_dict']) - if load_dataset: - model.dataset.load(load_dataset) - return model + def load_dataset(self, load_path): + """ + Loads and sets `train_x` and `train_y`. + :param load_path: str or Path: the path to the data file + """ + load_path = str(load_path) + self.dataset.load(load_path) + self._set_gp_data_to_dataset() + class Dataset: """ diff --git a/test/gp_test.py b/test/gp_test.py index 55ebafb..007c617 100644 --- a/test/gp_test.py +++ b/test/gp_test.py @@ -128,11 +128,12 @@ def test_load_save(self): loaded.covar_module.outputscale) save_data = tempfile.NamedTemporaryFile(suffix='.pth').name - model.save(save_file, save_data=save_data) - self.assertTrue(os.path.isfile(save_file)) + model.save_dataset(save_data) self.assertTrue(os.path.isfile(save_data)) + # load a new GP with different seed, then load the dataset x2 = np.linspace(2, 3, 11) - loaded = MaternGP.load(save_file, x2, y, save_data) + loaded = MaternGP.load(save_file, x2, y) + loaded.load_dataset(save_data) self.assertTrue(torch.all(torch.eq(model.train_x, loaded.train_x))) def test_hyper_optimization_0(self):