diff --git a/.gitignore b/.gitignore
index abe4a4a..a52bb86 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,9 @@
 *.pyc
 *.swp
-*.json
 *.h5
-*.txt
-.DS_Store
\ No newline at end of file
+.DS_Store
+build/
+dist/
+nnlo.egg-info/
+publish.sh
+.local/
diff --git a/models/__init__.py b/HISTORY.md
similarity index 100%
rename from models/__init__.py
rename to HISTORY.md
diff --git a/MANIFEST.in b/MANIFEST.in
new file mode 100644
index 0000000..e69de29
diff --git a/README.md b/README.md
index 88445f8..6ccd65e 100644
--- a/README.md
+++ b/README.md
@@ -8,38 +8,45 @@ The original package was implemented by [Dustin Anderson](https://github.com/dua
 
 ## Examples
 
-Test with the MNIST dataset, with keras+tensorflow
+Install the package
 ```
-git clone https://github.com/vlimant/NNLO.git
+pip install nnlo
 cd NNLO
 ```
-Example with mnist provided in a python file
+Example with mnist using pre-defined model
 ```
-python3 models/get_mnist.py
-mpirun -np 3 --tag-output python3 TrainingDriver.py --model examples/example_mnist.py --loss categorical_crossentropy --epochs 3
-mpirun -np 3 --tag-output python3 TrainingDriver.py --model examples/example_mnist_torch.py --loss categorical_crossentropy --epochs 3
+GetData mnist
+mpirun -np 3 TrainingDriver --model mnist --loss categorical_crossentropy --epochs 3 --trial-name n3g1epoch3 --train_data /path/to/train_mnist.list --val_data /path/to/test_mnist.list
+jsrun -n 3 -g 1 TrainingDriver --model mnist --loss categorical_crossentropy --epochs 3 --trial-name n3g1epoch3 --train_data /path/to/train_mnist.list --val_data /path/to/test_mnist.list
+```
+Example with mnist using user-defined model
+```
+export PYTHONPATH=/path/to/:$PYTHONPATH
+mpirun -np 3 TrainingDriver --model /path/to/mymodel.py --loss categorical_crossentropy --epochs 3 --trial-name n3g1epoch3 --train_data /path/to/train_mnist.list --val_data /path/to/test_mnist.list
+jsrun -n 3 -g 1 TrainingDriver --model /path/to/mymodel.py --loss categorical_crossentropy --epochs 3 --trial-name n3g1epoch3 --train_data /path/to/train_mnist.list --val_data /path/to/test_mnist.list
 ```
 
-Example with the cifar10 with model json
+Example with the cifar10 using pre-defined model
 ```
-python3 models/BuildModel.py cifar10
+GetData cifar10
 python3 models/get_cifar10.py
-mpirun -np 3 --tag-output python3 TrainingDriver.py --model cifar10_arch.json --train train_cifar10.list  --val test_cifar10.list --loss categorical_crossentropy --epochs 5
+mpirun -np 3 TrainingDriver --model cifar10 --loss categorical_crossentropy --epochs 3 --trial-name n3g1epoch3 --train_data /path/to/train_cifar10.list --val_data /path/to/test_cifar10.list
+jsrun -n 3 -g 1 TrainingDriver --model cifar10 --loss categorical_crossentropy --epochs 3 --trial-name n3g1epoch3 --train_data /path/to/train_cifar10.list --val_data /path/to/test_cifar10.list
 ```
 
 Example of training mnist with 2 workers, each with 2 process per Horovod ring
 ```
-mpirun -np 5 --tag-output python3 TrainingDriver.py --model examples/example_mnist.py --loss categorical_crossentropy --epochs 3 --n-processes 2
+mpirun -np 5 python3 TrainingDriver.py --model examples/example_mnist.py --loss categorical_crossentropy --epochs 3 --n-processes 2
 ```
 
 Example of training mnist with early stopping
 ```
-mpirun -np 3 --tag-output python3 TrainingDriver.py --model examples/example_mnist.py --loss categorical_crossentropy --epochs 10000 --early "val_loss,~<,4"
+mpirun -np 3 python3 TrainingDriver.py --model examples/example_mnist.py --loss categorical_crossentropy --epochs 10000 --early "val_loss,~<,4"
 ```
 
 Example of training with a fixed target
 ```
-mpirun -np 3 --tag-output python3 TrainingDriver.py --model examples/example_mnist.py --loss categorical_crossentropy --epochs 10000 --target-metric "val_acc,>,0.97"
+mpirun -np 3 python3 TrainingDriver.py --model examples/example_mnist.py --loss categorical_crossentropy --epochs 10000 --target-metric "val_acc,>,0.97"
 ```
 
 ## GAN Examples (experimental)
@@ -63,6 +70,14 @@ mpirun -tag-output -n 3 python3 MPIGDriver.py dummy.json train_3d.list test_1_3d
 
 See `TrainingDriver.py` for supported optional arguments.  Run the script via `mpirun` or `mpiexec`.  It automatically detects available NVIDIA GPUs and allocate them among the MPI worker processes.
 
+## Analyse scaling
+
+After running jobs with multiple GPUs, a number of `model_*_history.json` files are created
+```
+PrintTable model_*_history.json
+PlotLoss model_*_history.json
+```
+
 ## Customizing the training process
 
 The provided `TrainingDriver.py` script handles the case of a model that is specified in JSON format and training data that is stored in HDF5 files. However, the construction of the model and the loading of input data are easily customized.  
@@ -70,7 +85,7 @@ The provided `TrainingDriver.py` script handles the case of a model that is spec
 #### Model
 
 Use the ModelBuilder class to specify how your model should be constructed:
-[mpi_learn/train/model.py](mpi_learn/train/model.py)
+[nnlo/train/model.py](nnlo/train/model.py)
 
 To specify your model, create a new class deriving from ModelBuilder and override the `build_model()` method.  This method should take no arguments and return the Keras model you wish to train.
 
@@ -81,7 +96,7 @@ The provided ModelFromJson class is a specialized ModelBuilder that constructs a
 #### Training/Testing data 
 
 Use the Data class to specify how batches of training data should be generated:
-[mpi_learn/train/data.py](mpi_learn/train/data.py)
+[nnlo/train/data.py](nnlo/train/data.py)
 
 To specify your training data, create a new class deriving from Data and override the `generate_data()` method.  The `generate_data` method should act as follows:
 - yield batches of training data in the form required for training with Keras, i.e. ( [x1, x2, ...], [y1, y2, ...] )
@@ -94,10 +109,10 @@ Note: `generate_data` should not continue to yield training batches forever; rat
 #### Optimization Procedure
 
 Use the Algo class to configure the details of the training algorithm:
-[mpi_learn/train/algo.py](mpi_learn/train/algo.py)
+[nnlo/train/algo.py](nnlo/train/algo.py)
 
 Provide an instance of the Algo class when you construct the MPIManager (see below).  The Algo constructor takes several arguments that specify aspects of the training process:
-- `optimizer`: supported arguments are `'sgd'`, `'adadelta'`, `'rmsprop'`, and `'adam'`.  For optimizers that have tunable parameters, please specify the values of those parameters as additional arguments (see [mpi_learn/train/optimizer.py](mpi_learn/train/optimizer.py) for details on the individual optimizers)
+- `optimizer`: supported arguments are `'sgd'`, `'adadelta'`, `'rmsprop'`, and `'adam'`.  For optimizers that have tunable parameters, please specify the values of those parameters as additional arguments (see [nnlo/train/optimizer.py](nnlo/train/optimizer.py) for details on the individual optimizers)
 - `loss`: loss function, specified as a string, e.g. 'categorical_crossentropy'
 - `validate_every`: number of gradient updates to process before performing validation.  Set to 0 to disable validation.
 - `sync_every`: number of batches for workers to process between gradient updates (default 1)
@@ -123,7 +138,7 @@ Training is initiated by an instance of the MPIManager class, which initializes
 - `train_list`, `val_list`: lists of inputs files to use for training and validation.  Each MPI process should be able to access any/all of the input files; the MPIManager will split the input files among the available worker processes.
 - `callbacks`: list of `keras` callback objects, to be executed by the master process
 
-Other options are available as well: see [mpi_learn/mpi/manager.py](mpi_learn/mpi/manager.py)
+Other options are available as well: see [nnlo/mpi/manager.py](nnlo/mpi/manager.py)
 
 ### Training algorithm overview
 
diff --git a/examples/example_mnist.py b/examples/example_mnist.py
deleted file mode 100644
index a6b11bd..0000000
--- a/examples/example_mnist.py
+++ /dev/null
@@ -1,39 +0,0 @@
-from models.Models import make_mnist_model
-
-get_model = make_mnist_model
-def get_name():
-    return 'mnist'
-
-def get_all():
-    import socket,os,glob
-    host = os.environ.get('HOST',os.environ.get('HOSTNAME',socket.gethostname()))
-
-    if 'daint' in host:
-        all_list = glob.glob('/scratch/snx3000/vlimant/data/mnist/*.h5')
-    elif 'titan' in host:
-        all_list = glob.glob('/ccs/proj/csc291/DATA/mnist/*.h5')
-    else:
-        all_list = glob.glob('/bigdata/shared/mnist/*.h5')
-    if not all_list:
-        all_list = glob.glob('mnist_*.h5')
-    return all_list
-    
-def get_train():
-    all_list = get_all()
-    l = int( len(all_list)*0.70)
-    train_list = all_list[:l]
-    return train_list
-
-def get_val():
-    all_list = get_all()
-    l = int( len(all_list)*0.70)
-    val_list = all_list[l:]
-    return val_list
-
-def get_features():
-    #return ('features', lambda x: x) ##example of data adaptor
-    return 'features'
-
-def get_labels():
-    return 'labels'
-    
diff --git a/examples/example_mnist_torch.py b/examples/example_mnist_torch.py
deleted file mode 100644
index af916c9..0000000
--- a/examples/example_mnist_torch.py
+++ /dev/null
@@ -1,5 +0,0 @@
-from models.Models import make_mnist_torch_model
-from examples.example_mnist import *
-
-get_model = make_mnist_torch_model
-
diff --git a/models/BuildModel.py b/models/BuildModel.py
deleted file mode 100644
index 1529ca7..0000000
--- a/models/BuildModel.py
+++ /dev/null
@@ -1,50 +0,0 @@
-### Builds one of the available models.  
-# Saves model architecture to <model_name>_arch.json
-# and model weights to <model_name>_weights.h5
-
-import os
-os.environ['CUDA_VISIBLE_DEVICES']=""
-import argparse
-
-from Models import make_model
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser()
-    parser.add_argument('model_name', help='model to construct')
-    parser.add_argument('model_args', nargs='*', help='key=value to pass to the model',default=[])
-    args = parser.parse_args()
-    model_name = args.model_name
-    model_args = {}
-    for kw in args.model_args:
-        k,v = kw.split('=')
-        try:
-            v = int(v)
-        except:
-            v= float(v)
-        model_args[k] = v
-    if model_args:
-        print ("passing",model_args,"to the model builder")
-        model = make_model( model_name ,**model_args)
-    else:
-        model = make_model( model_name)
-    weights_filename = "%s_weights.h5" % model_name
-    arch_filename = "%s_arch.json" % model_name
-
-    if not "torch" in model_name:
-        model.summary()
-        model.save_weights( weights_filename, overwrite=True )
-        print ("Saved model weights to {0}".format(weights_filename))
-
-        model_arch = model.to_json()
-        with open( arch_filename, 'w' ) as arch_file:
-            arch_file.write( model_arch )
-        print ("Saved model architecture to {0}".format(arch_filename))
-    else:
-        import torch
-        weights_filename = weights_filename.replace('h5','torch')
-        arch_filename = arch_filename.replace('json','torch')
-        torch.save(model.state_dict(), weights_filename)
-        print ("Saved model weights to {0}".format(weights_filename))
-        torch.save(model, arch_filename)
-        print ("Saved model architecture to {0}".format(arch_filename))
-                        
diff --git a/models/Models.py b/models/Models.py
deleted file mode 100644
index 734f784..0000000
--- a/models/Models.py
+++ /dev/null
@@ -1,247 +0,0 @@
-### Predefined Keras models
-
-import sys
-import logging
-
-def model_function(model_name):
-    """Constructs the Keras model indicated by model_name"""
-    model_maker_dict = {
-            'example':make_example_model,
-            'mnist':make_mnist_model,
-            'cifar10':make_cifar10_model,
-            'mnist_torch':make_mnist_torch_model,
-            'topclass': make_topclass_model,
-            'topclass_torch':make_topclass_torch_model
-        
-            }
-    return model_maker_dict[model_name]    
-def make_model(model_name, **args):
-    m_fn = model_function(model_name)
-    if args and hasattr(m_fn,'parameter_range'):
-        provided = set(args.keys())
-        accepted = set([a.name for a in m_fn.parameter_range])
-        if not provided.issubset( accepted ):
-            logging.error("provided arguments {} do not match the accepted ones {}".format(sorted(provided),sorted(accepted)))
-            sys.exit(-1)
-    return model_function(model_name)(**args)
-
-def make_example_model():
-    """Example model from keras documentation"""
-    from keras.models import Sequential
-    from keras.layers import Dense, Activation
-    model = Sequential()
-    model.add(Dense(output_dim=64, input_dim=100))
-    model.add(Activation("relu"))
-    model.add(Dense(output_dim=10))
-    model.add(Activation("softmax"))
-    return model
-
-def make_topclass_model(**args):
-    from keras.models import Sequential, Model
-    from keras.layers import Dense, Activation, Dropout, Flatten, Input, Permute
-    from keras.layers import Convolution2D, MaxPooling2D, Conv2D
-    if args:logging.debug("receiving arguments {}".format(args))
-    conv_layers=args.get('conv_layers',2)
-    dense_layers=args.get('dense_layers',2)
-    dropout=args.get('dropout',0.2)
-    kernel = args.get('kernel_size',3)
-    classes=3
-    in_channels=5
-    in_ch = in_channels
-    ## the trace in the input file is 750, 150, 94, 5
-    input = Input( (150,94,in_ch))
-    ## convs
-    c = input
-    for i in range(conv_layers):
-        channel_in = in_ch*((i+1)%5)
-        channel_out = in_ch*((i+2)%5)
-        if channel_in == 0: channel_in += 1
-        if channel_out == 0: channel_out += 1
-        c = Conv2D( filters=channel_out, kernel_size=(kernel,kernel) , strides=1, padding="same", activation = 'relu') (c)
-    c = Conv2D(1, (kernel,kernel), activation = 'relu',strides=2, padding="same")(c)
-
-    ## pooling
-    pool = args.get('pool', 10)
-    m  = MaxPooling2D((pool,pool))(c)
-    f = Flatten()(m)
-    d = f
-    base = args.get('hidden_factor',5)*100
-    for i in range(dense_layers):
-        N = int(base//(2**(i+1)))
-        d = Dense( N, activation='relu')(d)
-        if dropout:
-            d = Dropout(dropout)(d)
-    o = Dense(classes, activation='softmax')(d)
-
-    model = Model(inputs=input, outputs=o)
-    #model.summary()
-    return model
-
-def make_cifar10_model(**args):
-    from keras.models import Sequential, Model
-    from keras.layers import Dense, Activation, Dropout, Flatten, Input, Permute
-    from keras.layers import Convolution2D, MaxPooling2D, Conv2D
-    import keras.backend as K
-    if args:logging.debug("receiving arguments {}".format(args))
-    nb_classes = 10
-    img_rows, img_cols = 32, 32
-    
-    # use 1 kernel size for all convolutional layers
-    ks = args.get('kernel_size', 3)
-    
-    # tune the number of filters for each convolution layer
-    nb_filters1 = args.get('nb_filters1', 48)
-    nb_filters2 = args.get('nb_filters2', 96)
-    nb_filters3 = args.get('nb_filters3', 192)
-    
-    # tune the pool size once
-    ps = args.get('pool_size', 2)
-    pool_size = (ps,ps)
-    
-    # tune the dropout rates independently
-    do4 = args.get('dropout1', 0.25)
-    do5 = args.get('dropout2', 0.5)
-    
-    # tune the dense layers independently
-    dense1 = args.get('dense1', 512)
-    dense2 = args.get('dense2', 256)
-    
-    if K.image_dim_ordering() == 'th':
-        input_shape = (3, img_rows, img_cols)
-    else:
-        input_shape = (img_rows, img_cols, 3)
-
-    #act = 'sigmoid'
-    act = 'relu'
-        
-    i = Input( input_shape)
-    l = Conv2D(nb_filters1,( ks, ks), padding='same', activation = act)(i)
-    l = MaxPooling2D(pool_size=pool_size)(l)
-    #l = Dropout(do1)(l)
-
-    l = Conv2D(nb_filters2, (ks, ks), padding='same',activation=act)(l)
-    #l = Conv2D(nb_filters2, (ks, ks))(l)
-    l = MaxPooling2D(pool_size=pool_size)(l)
-    #l = Dropout(do2)(l)
-
-    l = Conv2D(nb_filters3, (ks, ks), padding='same',activation=act)(l)
-    #l = Conv2D(nb_filters3, (ks, ks))(l)
-    l = MaxPooling2D(pool_size=pool_size)(l)
-    #l = Dropout(do3)(l)
-
-    l = Flatten()(l)
-    l = Dense(dense1,activation=act)(l)
-    l = Dropout(do4)(l)
-    l = Dense(dense2,activation=act)(l)
-    l =Dropout(do5)(l)
-    
-    o = Dense(nb_classes, activation='softmax')(l)
-
-    model = Model(inputs=i, outputs=o)
-    #model.summary()
-    
-    return model
-
-def make_mnist_model(**args):
-    from keras.models import Sequential, Model
-    from keras.layers import Dense, Activation, Dropout, Flatten, Input, Permute
-    from keras.layers import Convolution2D, MaxPooling2D, Conv2D
-    import keras.backend as K
-    """MNIST ConvNet from keras/examples/mnist_cnn.py"""
-    #np.random.seed(1337)  # for reproducibility
-    if args:logging.debug("receiving arguments {}".format(args))
-    nb_classes = 10
-    # input image dimensions
-    img_rows, img_cols = 28, 28
-    # number of convolutional filters to use
-    nb_filters = args.get('nb_filters',32)
-    # size of pooling area for max pooling
-    ps = args.get('pool_size',2)
-    
-    # convolution kernel size
-    ks = args.get('kernel_size',3)
-    do = args.get('dropout', 0.25)
-    dense = args.get('dense', 128)
-
-    pool_size = (ps,ps)
-    if K.image_dim_ordering() == 'th':
-        input_shape = (1, img_rows, img_cols)
-    else:
-        input_shape = (img_rows, img_cols, 1)
-    model = Sequential()
-    model.add(Convolution2D(nb_filters, (ks, ks),
-                            border_mode='valid',
-                            input_shape=input_shape))
-    model.add(Activation('relu'))
-    model.add(Convolution2D(nb_filters, (ks, ks)))
-    model.add(Activation('relu'))
-    model.add(MaxPooling2D(pool_size=pool_size))
-    model.add(Dropout(do))
-    model.add(Flatten())
-    model.add(Dense(dense))
-    model.add(Activation('relu'))
-    model.add(Dropout(do))
-    model.add(Dense(nb_classes))
-    model.add(Activation('softmax'))
-    return model
-
-def make_mnist_torch_model(**args):
-    if args:logging.debug("receiving arguments {}".format(args))
-    try:
-        from TorchModels import MNistNet
-    except:
-        from .TorchModels import MNistNet
-    model = MNistNet(**args)
-    return model
-
-def make_topclass_torch_model(**args):
-    if args:logging.debug("receiving arguments {}".format(args))
-    conv_layers=args.get('conv_layers',2)
-    dense_layers=args.get('dense_layers',2)
-    dropout=args.get('dropout',0.5)
-    classes=3
-    in_channels=5
-    try:
-        from TorchModels import CNN
-    except:
-        from .TorchModels import CNN
-    model = CNN(conv_layers=conv_layers, dense_layers=dense_layers, dropout=dropout, classes=classes, in_channels=in_channels)
-    return model
-
-try:
-    from skopt.space import Real, Integer, Categorical
-    make_mnist_model.parameter_range =     [
-        Integer(10,50, name='nb_filters'),
-        Integer(2,10, name='pool_size'),
-        Integer(2,10, name='kernel_size'),
-        Integer(50,200, name='dense'),
-        Real(0.0, 1.0, name='dropout')
-    ]
-    make_mnist_torch_model.parameter_range = [
-        Integer(2,10, name='kernel_size'),
-        Integer(50,200, name='dense'),
-        Real(0.0, 1.0, name='dropout')
-    ]
-    make_topclass_model.parameter_range =   [
-        Integer(1,6, name='conv_layers'),
-        Integer(1,6, name='dense_layers'),
-        Integer(1,6, name='kernel_size'),
-        Real(0.0, 1.0, name='dropout')
-    ]
-    make_topclass_torch_model.parameter_range =    [
-        Integer(1,6, name='conv_layers'),
-        Integer(1,6, name='dense_layers'),
-        Real(0.0,1.0, name='dropout')
-    ]
-    make_cifar10_model.parameter_range = [
-        Integer(10,300, name='nb_filters1'),
-        Integer(10,300, name='nb_filters2'),
-        Integer(10,300, name='nb_filters3'),
-        Integer(50,1000, name='dense1'),
-        Integer(50,1000, name='dense2'),
-        Real(0.0, 1.0, name='dropout1'),
-        Real(0.0, 1.0, name='dropout2')
-    ]
-except:
-    pass
-
diff --git a/models/get_3d.py b/models/get_3d.py
deleted file mode 100644
index 53a4a7b..0000000
--- a/models/get_3d.py
+++ /dev/null
@@ -1,91 +0,0 @@
-import os
-import glob
-try:
-    import h5py
-    pass
-except:
-    print ("hum")
-import numpy as np
-import sys
-
-def get_data(datafile):
-    #get data for training
-    #print ('Loading Data from .....', datafile)
-    f=h5py.File(datafile,'r')
-    y=f.get('target')
-    X=np.array(f.get('ECAL'))
-    y=(np.array(y[:,1]))
-    X[X < 1e-4] = 0
-    X = np.expand_dims(X, axis=-1)
-    X = X.astype(np.float32)
-    y = y.astype(np.float32)
-    y = y/100.
-    ecal = np.squeeze(np.sum(X, axis=(1, 2, 3)))
-    print (X.shape)
-    print (y.shape)
-    print (ecal.shape)
-
-    f.close()
-    return X, y, ecal
-
-dest='/data/shared/3DGAN/'
-import socket
-host = os.environ.get('HOST', os.environ.get('HOSTNAME',socket.gethostname()))
-if 'daint' in host:
-    dest='/scratch/snx3000/vlimant/3DGAN/'
-if 'titan' in host:
-    dest='/ccs/proj/csc291/DATA/3DGAN/'
-
-sub_split = int(sys.argv[1]) if len(sys.argv)>1 else 1
-                                          
-for F in glob.glob('/bigdata/shared/LCD/NewV1/*scan/*.h5'):
-    _,d,f = F.rsplit('/',2)
-    if not 'Ele' in d: continue
-    X = None
-    if sub_split==1:
-        nf = '%s/%s_%s.h5'%( dest,d,f)
-        if os.path.isfile( nf) :
-            continue
-        print ("processing files",F,"into",nf)
-        if X is None:
-            X,y,ecal = get_data(F)
-        o = h5py.File(nf,'w')
-        o['X'] = X
-        o.create_group("y")
-        o['y']['a'] = np.ones(y.shape)
-        o['y']['b'] = y
-        o['y']['c'] = ecal
-        o.close()        
-    else:
-        for sub in range(sub_split):
-            nf = '%s/%s_%s_sub%s.h5'%(dest, d,f,sub)
-            if os.path.isfile( nf) :
-                continue
-            print ("processing files",F,"into",nf)
-            if X is None:
-                X,y,ecal = get_data(F)
-                N = X.shape[0]
-                splits = [i*N/sub_split for i in range(sub_split)]+[-1]
-            o = h5py.File(nf,'w')
-            o['X'] = X[splits[sub]:splits[sub+1],...]
-            o.create_group("y")
-            o['y']['a'] = np.ones(y[splits[sub]:splits[sub+1],...].shape)
-            o['y']['b'] = y[splits[sub]:splits[sub+1],...]
-            o['y']['c'] = ecal[splits[sub]:splits[sub+1],...]
-            o.close()
-            X = None
-
-if sub_split == 1:
-    sub_files = lambda f:not 'sub' in f
-else:
-    sub_files = lambda f:'sub' in f
-
-open('train_3d.list','w').write( '\n'.join(filter(sub_files,glob.glob(dest+'/*.h5')[:-4])))
-open('test_3d.list','w').write( '\n'.join(filter(sub_files,glob.glob(dest+'/*.h5')[-4:])))
-
-open('train_small_3d.list','w').write( '\n'.join(filter(sub_files,glob.glob(dest+'/*.h5')[:-4])))
-open('test_small_3d.list','w').write( '\n'.join(filter(sub_files,glob.glob(dest+'/*.h5')[-4:])))
-
-open('train_7_3d.list','w').write( '\n'.join(filter(sub_files,glob.glob(dest+'/*.h5')[:7])))
-open('test_1_3d.list','w').write( '\n'.join(filter(sub_files,glob.glob(dest+'/*.h5')[-1:])))
-    
diff --git a/models/get_cifar10.py b/models/get_cifar10.py
deleted file mode 100644
index bebcb8a..0000000
--- a/models/get_cifar10.py
+++ /dev/null
@@ -1,53 +0,0 @@
-### This script downloads the cifar10 dataset, unpacks it, splits it into four pieces, and saves 
-# each piece in a separate h5 file.
-
-from numpy import array_split
-from keras.datasets import cifar10
-from keras.utils import np_utils
-from keras import backend as K
-import h5py
-import sys
-
-(X_train, Y_train), (X_test, Y_test) = cifar10.load_data()
-
-img_rows = 32
-img_cols = 32
-if K.image_dim_ordering() == 'th':
-    X_train = X_train.reshape(X_train.shape[0], 3, img_rows, img_cols)
-    X_test = X_test.reshape(X_test.shape[0], 3, img_rows, img_cols)
-    input_shape = (3, img_rows, img_cols)
-else:
-    X_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, 3)
-    X_test = X_test.reshape(X_test.shape[0], img_rows, img_cols, 3)
-    input_shape = (img_rows, img_cols, 3)
-
-num_train_pieces = int(sys.argv[1]) if len(sys.argv)>1 else 24
-num_test_pieces = int(sys.argv[2]) if len(sys.argv)>1 else 4
-split_X_train = [ X.astype('float32') / 255 for X in array_split(X_train, num_train_pieces) ]
-split_Y_train = [ np_utils.to_categorical(Y,10) for Y in array_split(Y_train, num_train_pieces) ]
-split_X_test = [ X.astype('float32') / 255 for X in array_split(X_test, num_test_pieces) ]
-split_Y_test = [ np_utils.to_categorical(Y,10) for Y in array_split(Y_test, num_test_pieces) ]
-
-train_list = []
-for i in range(num_train_pieces):
-    train_name = "cifar10_train_%d.h5" % i
-    train_list.append(train_name+"\n")
-    train_outfile = h5py.File( train_name, 'w' )
-    train_outfile.create_dataset( "features", data=split_X_train[i] )
-    train_outfile.create_dataset( "labels", data=split_Y_train[i] )
-    train_outfile.close()
-with open('train_cifar10.list', 'w') as train_list_file:
-    for f in train_list:
-        train_list_file.write(f)
-
-test_list = []
-for i in range(num_test_pieces):
-    test_name = "cifar10_test_%d.h5" % i
-    test_list.append(test_name+"\n")
-    test_outfile = h5py.File( test_name, 'w' )
-    test_outfile.create_dataset( "features", data=split_X_test[i] )
-    test_outfile.create_dataset( "labels", data=split_Y_test[i] )
-    test_outfile.close()
-with open('test_cifar10.list', 'w') as test_list_file:
-    for f in test_list:
-        test_list_file.write(f)
diff --git a/models/get_mnist.py b/models/get_mnist.py
deleted file mode 100644
index b5b2a14..0000000
--- a/models/get_mnist.py
+++ /dev/null
@@ -1,53 +0,0 @@
-### This script downloads the MNIST dataset, unpacks it, splits it into four pieces, and saves 
-# each piece in a separate h5 file.
-
-from numpy import array_split
-from keras.datasets import mnist
-from keras.utils import np_utils
-from keras import backend as K
-import h5py
-import sys
-
-(X_train, Y_train), (X_test, Y_test) = mnist.load_data()
-
-img_rows = 28
-img_cols = 28
-if K.image_dim_ordering() == 'th':
-    X_train = X_train.reshape(X_train.shape[0], 1, img_rows, img_cols)
-    X_test = X_test.reshape(X_test.shape[0], 1, img_rows, img_cols)
-    input_shape = (1, img_rows, img_cols)
-else:
-    X_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, 1)
-    X_test = X_test.reshape(X_test.shape[0], img_rows, img_cols, 1)
-input_shape = (img_rows, img_cols, 1)
-
-num_train_pieces = int(sys.argv[1]) if len(sys.argv)>1 else 24
-num_test_pieces = int(sys.argv[2]) if len(sys.argv)>1 else 4
-split_X_train = [ X.astype('float32') / 255 for X in array_split(X_train, num_train_pieces) ]
-split_Y_train = [ np_utils.to_categorical(Y,10) for Y in array_split(Y_train, num_train_pieces) ]
-split_X_test = [ X.astype('float32') / 255 for X in array_split(X_test, num_test_pieces) ]
-split_Y_test = [ np_utils.to_categorical(Y,10) for Y in array_split(Y_test, num_test_pieces) ]
-
-train_list = []
-for i in range(num_train_pieces):
-    train_name = "mnist_train_%d.h5" % i
-    train_list.append(train_name+"\n")
-    train_outfile = h5py.File( train_name, 'w' )
-    train_outfile.create_dataset( "features", data=split_X_train[i] )
-    train_outfile.create_dataset( "labels", data=split_Y_train[i] )
-    train_outfile.close()
-with open('train_mnist.list', 'w') as train_list_file:
-    for f in train_list:
-        train_list_file.write(f)
-
-test_list = []
-for i in range(num_test_pieces):
-    test_name = "mnist_test_%d.h5" % i
-    test_list.append(test_name+"\n")
-    test_outfile = h5py.File( test_name, 'w' )
-    test_outfile.create_dataset( "features", data=split_X_test[i] )
-    test_outfile.create_dataset( "labels", data=split_Y_test[i] )
-    test_outfile.close()
-with open('test_mnist.list', 'w') as test_list_file:
-    for f in test_list:
-        test_list_file.write(f)
diff --git a/models/get_topclass.py b/models/get_topclass.py
deleted file mode 100644
index f3e1998..0000000
--- a/models/get_topclass.py
+++ /dev/null
@@ -1,25 +0,0 @@
-import os
-import glob
-import sys
-
-dest='/bigdata/shared/LCDJets_Abstract_IsoLep_lt_20'
-import socket
-host = os.environ.get('HOST', os.environ.get('HOSTNAME',socket.gethostname()))
-if 'titan' in host:
-    dest='/ccs/proj/csc291/DATA/LCDJets_Abstract_IsoLep_lt_20'
-train = glob.glob(dest+'/train/*.h5')
-test  = glob.glob(dest+'/val/*.h5')
-
-N=10
-Nt=N/5
-if len(sys.argv)>=1:
-    a = sys.argv[1]
-    if a.isdigit():
-        N = int(a)
-        Nt=N/5            
-    else:
-        N,Nt = map(int, a.split(','))
-
-
-open('train_topclass.list','w').write( '\n'.join(sorted( train[:N] )))
-open('test_topclass.list','w').write( '\n'.join(sorted( test[:Nt] )))
diff --git a/nnlo/data/__init__.py b/nnlo/data/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/nnlo/data/get_3d.py b/nnlo/data/get_3d.py
new file mode 100644
index 0000000..bd8b4fa
--- /dev/null
+++ b/nnlo/data/get_3d.py
@@ -0,0 +1,92 @@
+import os
+import glob
+import logging
+try:
+    import h5py
+    pass
+except:
+    logging.info("import h5py failed")
+import numpy as np
+import sys
+
+def get_data(datafile):
+    #get data for training
+    f=h5py.File(datafile,'r')
+    y=f.get('target')
+    X=np.array(f.get('ECAL'))
+    y=(np.array(y[:,1]))
+    X[X < 1e-4] = 0
+    X = np.expand_dims(X, axis=-1)
+    X = X.astype(np.float32)
+    y = y.astype(np.float32)
+    y = y/100.
+    ecal = np.squeeze(np.sum(X, axis=(1, 2, 3)))
+    logging.info("X shape {}; y shape {}; ecal shape {}".format(str(X.shape)), str(y.shape), str(ecal.shape))
+
+    f.close()
+    return X, y, ecal
+
+def main():
+    dest='/data/shared/3DGAN/'
+    import socket
+    host = os.environ.get('HOST', os.environ.get('HOSTNAME',socket.gethostname()))
+    if 'daint' in host:
+        dest='/scratch/snx3000/vlimant/3DGAN/'
+    if 'titan' in host:
+        dest='/ccs/proj/csc291/DATA/3DGAN/'
+    
+    sub_split = int(sys.argv[1]) if len(sys.argv)>1 else 1
+                                              
+    for F in glob.glob('/bigdata/shared/LCD/NewV1/*scan/*.h5'):
+        _,d,f = F.rsplit('/',2)
+        if not 'Ele' in d: continue
+        X = None
+        if sub_split==1:
+            nf = '%s/%s_%s.h5'%( dest,d,f)
+            if os.path.isfile( nf) :
+                continue
+            logging.info("processing files {} into {}".format(F,nf))
+            if X is None:
+                X,y,ecal = get_data(F)
+            o = h5py.File(nf,'w')
+            o['X'] = X
+            o.create_group("y")
+            o['y']['a'] = np.ones(y.shape)
+            o['y']['b'] = y
+            o['y']['c'] = ecal
+            o.close()        
+        else:
+            for sub in range(sub_split):
+                nf = '%s/%s_%s_sub%s.h5'%(dest, d,f,sub)
+                if os.path.isfile( nf) :
+                    continue
+                logging.info("processing files {} into {}".format(F,nf))
+                if X is None:
+                    X,y,ecal = get_data(F)
+                    N = X.shape[0]
+                    splits = [i*N/sub_split for i in range(sub_split)]+[-1]
+                o = h5py.File(nf,'w')
+                o['X'] = X[splits[sub]:splits[sub+1],...]
+                o.create_group("y")
+                o['y']['a'] = np.ones(y[splits[sub]:splits[sub+1],...].shape)
+                o['y']['b'] = y[splits[sub]:splits[sub+1],...]
+                o['y']['c'] = ecal[splits[sub]:splits[sub+1],...]
+                o.close()
+                X = None
+
+    if sub_split == 1:
+        sub_files = lambda f:not 'sub' in f
+    else:
+        sub_files = lambda f:'sub' in f
+    
+    open('train_3d.list','w').write( '\n'.join(filter(sub_files,glob.glob(dest+'/*.h5')[:-4])))
+    open('test_3d.list','w').write( '\n'.join(filter(sub_files,glob.glob(dest+'/*.h5')[-4:])))
+    
+    open('train_small_3d.list','w').write( '\n'.join(filter(sub_files,glob.glob(dest+'/*.h5')[:-4])))
+    open('test_small_3d.list','w').write( '\n'.join(filter(sub_files,glob.glob(dest+'/*.h5')[-4:])))
+    
+    open('train_7_3d.list','w').write( '\n'.join(filter(sub_files,glob.glob(dest+'/*.h5')[:7])))
+    open('test_1_3d.list','w').write( '\n'.join(filter(sub_files,glob.glob(dest+'/*.h5')[-1:])))
+    
+if __name__ == '__main__':
+    main()
diff --git a/nnlo/data/get_cifar10.py b/nnlo/data/get_cifar10.py
new file mode 100644
index 0000000..82425bf
--- /dev/null
+++ b/nnlo/data/get_cifar10.py
@@ -0,0 +1,57 @@
+### This script downloads the cifar10 dataset, unpacks it, splits it into four pieces, and saves 
+# each piece in a separate h5 file.
+
+from numpy import array_split
+from tensorflow.keras.datasets import cifar10
+from tensorflow.python.keras.utils import np_utils
+from tensorflow.python.keras import backend as K
+import h5py
+import os
+
+def main(argv):
+    (X_train, Y_train), (X_test, Y_test) = cifar10.load_data()
+    
+    img_rows = 32
+    img_cols = 32
+    if K.image_data_format() == 'channels_first':
+        X_train = X_train.reshape(X_train.shape[0], 3, img_rows, img_cols)
+        X_test = X_test.reshape(X_test.shape[0], 3, img_rows, img_cols)
+        input_shape = (3, img_rows, img_cols)
+    else:
+        X_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, 3)
+        X_test = X_test.reshape(X_test.shape[0], img_rows, img_cols, 3)
+        input_shape = (img_rows, img_cols, 3)
+    
+    num_train_pieces = int(argv[1]) if len(argv)>1 else 24
+    num_test_pieces = int(argv[2]) if len(argv)>1 else 4
+    split_X_train = [ X.astype('float32') / 255 for X in array_split(X_train, num_train_pieces) ]
+    split_Y_train = [ np_utils.to_categorical(Y,10) for Y in array_split(Y_train, num_train_pieces) ]
+    split_X_test = [ X.astype('float32') / 255 for X in array_split(X_test, num_test_pieces) ]
+    split_Y_test = [ np_utils.to_categorical(Y,10) for Y in array_split(Y_test, num_test_pieces) ]
+    
+    train_list = []
+    for i in range(num_train_pieces):
+        train_name = f"{os.getcwd()}/cifar10_train_%d.h5" % i
+        train_list.append(train_name+"\n")
+        train_outfile = h5py.File( train_name, 'w' )
+        train_outfile.create_dataset( "features", data=split_X_train[i] )
+        train_outfile.create_dataset( "labels", data=split_Y_train[i] )
+        train_outfile.close()
+    with open('train_cifar10.list', 'w') as train_list_file:
+        for f in train_list:
+            train_list_file.write(f)
+    
+    test_list = []
+    for i in range(num_test_pieces):
+        test_name = f"{os.getcwd()}/cifar10_test_%d.h5" % i
+        test_list.append(test_name+"\n")
+        test_outfile = h5py.File( test_name, 'w' )
+        test_outfile.create_dataset( "features", data=split_X_test[i] )
+        test_outfile.create_dataset( "labels", data=split_Y_test[i] )
+        test_outfile.close()
+    with open('test_cifar10.list', 'w') as test_list_file:
+        for f in test_list:
+            test_list_file.write(f)
+
+if __name__ == '__main__':
+    main()
diff --git a/nnlo/data/get_mnist.py b/nnlo/data/get_mnist.py
new file mode 100644
index 0000000..93675f0
--- /dev/null
+++ b/nnlo/data/get_mnist.py
@@ -0,0 +1,57 @@
+### This script downloads the MNIST dataset, unpacks it, splits it into four pieces, and saves 
+# each piece in a separate h5 file.
+
+from numpy import array_split
+from tensorflow.keras.datasets import mnist
+from tensorflow.python.keras.utils import np_utils
+from tensorflow.python.keras import backend as K
+import h5py
+import os
+
+def main(argv):
+    (X_train, Y_train), (X_test, Y_test) = mnist.load_data()
+    
+    img_rows = 28
+    img_cols = 28
+    if K.image_data_format() == 'channels_first':
+        X_train = X_train.reshape(X_train.shape[0], 1, img_rows, img_cols)
+        X_test = X_test.reshape(X_test.shape[0], 1, img_rows, img_cols)
+        input_shape = (1, img_rows, img_cols)
+    else:
+        X_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, 1)
+        X_test = X_test.reshape(X_test.shape[0], img_rows, img_cols, 1)
+        input_shape = (img_rows, img_cols, 1)
+    
+    num_train_pieces = int(argv[1]) if len(argv)>1 else 24
+    num_test_pieces = int(argv[2]) if len(argv)>1 else 4
+    split_X_train = [ X.astype('float32') / 255 for X in array_split(X_train, num_train_pieces) ]
+    split_Y_train = [ np_utils.to_categorical(Y,10) for Y in array_split(Y_train, num_train_pieces) ]
+    split_X_test = [ X.astype('float32') / 255 for X in array_split(X_test, num_test_pieces) ]
+    split_Y_test = [ np_utils.to_categorical(Y,10) for Y in array_split(Y_test, num_test_pieces) ]
+    
+    train_list = []
+    for i in range(num_train_pieces):
+        train_name = f"{os.getcwd()}/mnist_train_%d.h5" % i
+        train_list.append(train_name+"\n")
+        train_outfile = h5py.File( train_name, 'w' )
+        train_outfile.create_dataset( "features", data=split_X_train[i] )
+        train_outfile.create_dataset( "labels", data=split_Y_train[i] )
+        train_outfile.close()
+    with open('train_mnist.list', 'w') as train_list_file:
+        for f in train_list:
+            train_list_file.write(f)
+    
+    test_list = []
+    for i in range(num_test_pieces):
+        test_name = f"{os.getcwd()}/mnist_test_%d.h5" % i
+        test_list.append(os.getcwd()+test_name+"\n")
+        test_outfile = h5py.File( test_name, 'w' )
+        test_outfile.create_dataset( "features", data=split_X_test[i] )
+        test_outfile.create_dataset( "labels", data=split_Y_test[i] )
+        test_outfile.close()
+    with open('test_mnist.list', 'w') as test_list_file:
+        for f in test_list:
+            test_list_file.write(f)
+
+if __name__ == '__main__':
+    main()
diff --git a/nnlo/data/get_topclass.py b/nnlo/data/get_topclass.py
new file mode 100644
index 0000000..b30c167
--- /dev/null
+++ b/nnlo/data/get_topclass.py
@@ -0,0 +1,29 @@
+import os
+import glob
+import sys
+
+def main():
+    dest='/bigdata/shared/LCDJets_Abstract_IsoLep_lt_20'
+    import socket
+    host = os.environ.get('HOST', os.environ.get('HOSTNAME',socket.gethostname()))
+    if 'titan' in host:
+        dest='/ccs/proj/csc291/DATA/LCDJets_Abstract_IsoLep_lt_20'
+    train = glob.glob(dest+'/train/*.h5')
+    test  = glob.glob(dest+'/val/*.h5')
+    
+    N=10
+    Nt=N/5
+    if len(sys.argv)>=1:
+        a = sys.argv[1]
+        if a.isdigit():
+            N = int(a)
+            Nt=N/5            
+        else:
+            N,Nt = map(int, a.split(','))
+    
+    
+    open('train_topclass.list','w').write( '\n'.join(sorted( train[:N] )))
+    open('test_topclass.list','w').write( '\n'.join(sorted( test[:Nt] )))
+
+if __name__ == '__main__':
+    main()
diff --git a/nnlo/data/getdata.py b/nnlo/data/getdata.py
new file mode 100644
index 0000000..f5db855
--- /dev/null
+++ b/nnlo/data/getdata.py
@@ -0,0 +1,20 @@
+#!/usr/bin/env python
+# Rui Zhang 7.2020
+# rui.zhang@cern.ch
+
+import sys
+
+def main():
+    command = sys.argv[1]
+    argv = sys.argv[2:]
+    if command.lower() == 'mnist':
+        from nnlo.data.get_mnist import main
+        main(argv)
+    elif command.lower() == 'cifar10':
+        from nnlo.data.get_cifar10 import main
+        main(argv)
+    else:
+        raise RuntimeError('Unknown command: {}'.format(command))
+
+if __name__ == '__main__':
+    main()
diff --git a/MPIGDriver.py b/nnlo/driver/MPIGDriver.py
similarity index 97%
rename from MPIGDriver.py
rename to nnlo/driver/MPIGDriver.py
index 52d5e14..ea62326 100755
--- a/MPIGDriver.py
+++ b/nnlo/driver/MPIGDriver.py
@@ -13,15 +13,15 @@
 from time import time,sleep
 
 from nnlo.mpi.manager import MPIManager, get_device
-from nnlo.train.algo import Algo
-from nnlo.train.data import H5Data
-from nnlo.train.model import ModelFromJson, ModelTensorFlow
+#from nnlo.train.algo import Algo
+#from nnlo.train.data import H5Data
+#from nnlo.train.model import ModelFromJson, ModelTensorFlow
 from nnlo.util.utils import import_keras
 from nnlo.util.logger import initialize_logger
 import socket
 
 
-if __name__ == '__main__':
+def main():
     from TrainingDriver import add_loader_options
     parser = argparse.ArgumentParser()
     parser.add_argument('--verbose',help='display metrics for each training batch',action='store_true')
@@ -112,8 +112,8 @@
 
     logging.info(backend)
     if use_tf:
-        import_keras()
-        import keras.backend as K
+        #import_keras()
+        import tensorflow.keras.backend as K
         gpu_options=K.tf.GPUOptions(
             per_process_gpu_memory_fraction=0.0,
             allow_growth = True,)
@@ -162,3 +162,6 @@
 
     comm.Barrier()
     logging.info("Terminating")
+
+if __name__ == '__main__':
+    main()
diff --git a/OptimizationDriver.py b/nnlo/driver/OptimizationDriver.py
similarity index 99%
rename from OptimizationDriver.py
rename to nnlo/driver/OptimizationDriver.py
index 8b6359b..a140971 100755
--- a/OptimizationDriver.py
+++ b/nnlo/driver/OptimizationDriver.py
@@ -105,9 +105,7 @@ def make_opt_parser():
     
     return parser
 
-
-if __name__ == '__main__':
-
+def main():
     logging.info("Process is on {}".format(socket.gethostname()))
     parser = make_opt_parser()
     args = parser.parse_args()
@@ -323,3 +321,6 @@ def make_opt_parser():
                              checkpoint=args.checkpoint,
                              checkpoint_interval=args.checkpoint_interval)
         block.run()
+
+if __name__ == '__main__':
+    main()
diff --git a/TrainingDriver.py b/nnlo/driver/TrainingDriver.py
similarity index 76%
rename from TrainingDriver.py
rename to nnlo/driver/TrainingDriver.py
index cc8a654..3205acd 100755
--- a/TrainingDriver.py
+++ b/nnlo/driver/TrainingDriver.py
@@ -12,6 +12,7 @@
 
 from mpi4py import MPI
 from time import time,sleep
+import importlib
 
 from nnlo.mpi.manager import MPIManager, get_device
 from nnlo.train.algo import Algo
@@ -21,12 +22,11 @@
 from nnlo.util.timeline import Timeline
 from nnlo.util.logger import initialize_logger
 
-def make_Block_Parser():
-    pass
 def add_log_option(parser):
     # logging configuration
     parser.add_argument('--log-file', default=None, dest='log_file', help='log file to write, in additon to output stream')
     parser.add_argument('--log-level', default='info', dest='log_level', help='log level (debug, info, warn, error)')
+    parser.add_argument('--output', default='./', dest='output', help='output folder')
 
 def add_master_option(parser):
     parser.add_argument('--master-gpu',help='master process should get a gpu',
@@ -89,13 +89,13 @@ def add_train_options(parser):
     parser.add_argument('--thread_validation', help='run a single process', action='store_true')
     
     # model arguments
-    parser.add_argument('--model', help='File containing model architecture (serialized in JSON/pickle, or provided in a .py file')
+    parser.add_argument('--model', required=True, help='File containing model architecture (serialized in JSON/pickle, or provided in a .py file')
     parser.add_argument('--trial-name', help='descriptive name for trial', 
             default='train', dest='trial_name')
 
     # training data arguments
-    parser.add_argument('--train_data', help='text file listing data inputs for training', default=None)
-    parser.add_argument('--val_data', help='text file lis`ting data inputs for validation', default=None)
+    parser.add_argument('--train_data', help='text file listing data inputs for training', required=True)
+    parser.add_argument('--val_data', help='text file lis`ting data inputs for validation', required=True)
     parser.add_argument('--features-name', help='name of HDF5 dataset with input features',
             default='features', dest='features_name')
     parser.add_argument('--labels-name', help='name of HDF5 dataset with output labels',
@@ -149,7 +149,7 @@ def make_loader( args, features_name, labels_name, train_list):
     
     return data
 
-def make_model_weight(args, use_torch):
+def make_model_weight(args, backend):
     model_weights = None
     if args.restore:
         args.restore = re.sub(r'\.algo$', '', args.restore)
@@ -157,22 +157,26 @@ def make_model_weight(args, use_torch):
             with open(args.restore + '.latest', 'r') as latest:
                 args.restore = latest.read().splitlines()[-1]
         if any([os.path.isfile(ff) for ff in glob.glob('./*'+args.restore + '.model')]):
-            if use_torch:
+            if backend == 'torch':
                 args.model = args.restore + '.model'
                 model_weights = args.restore +'.model_w'
-            else:
+            elif backend == 'tf':
                 model_weights = args.restore + '.model'
+            else:
+                logging.error("%s backend not supported", backend)
                 
     return model_weights
                         
-def make_algo( args, use_tf, comm, validate_every ):
+def make_algo( args, backend, comm, validate_every ):
     args_opt = args.optimizer
-    if use_tf:
-        if not args_opt.endswith("tf"):
+    if backend == 'tf':
+        if not args_opt.endswith('tf'):
             args_opt = args_opt + 'tf'
-    else:
-        if not args_opt.endswith("torch"):
+    elif backend == 'torch':
+        if not args_opt.endswith('torch'):
             args_opt = args_opt + 'torch'
+    else:
+        logging.error("%s backend not supported", backend)
             
     if args.mode == 'easgd':
         algo = Algo(None, loss=args.loss, validate_every=validate_every,
@@ -196,23 +200,13 @@ def make_algo( args, use_tf, comm, validate_every ):
         logging.info("%s not supported mode", args.mode)
     return algo
 
-def make_train_val_lists(m_module, args):
+def make_train_val_lists(args):
     train_list = val_list = []
-    if args.train_data:
-        with open(args.train_data) as train_list_file:
-            train_list = [ s.strip() for s in train_list_file.readlines() ]
-    elif m_module is not None:
-        train_list = m_module.get_train()
-    else:
-        logging.info("no training data provided")
+    with open(args.train_data) as train_list_file:
+        train_list = [ s.strip() for s in train_list_file.readlines() ]
         
-    if args.val_data:
-        with open(args.val_data) as val_list_file:
-            val_list = [ s.strip() for s in val_list_file.readlines() ]
-    elif m_module is not None:
-        val_list = m_module.get_val()
-    else:
-        logging.info("no validation data provided")
+    with open(args.val_data) as val_list_file:
+        val_list = [ s.strip() for s in val_list_file.readlines() ]
 
     if not train_list:
         logging.error("No training data provided")
@@ -220,78 +214,56 @@ def make_train_val_lists(m_module, args):
         logging.error("No validation data provided")
     return (train_list, val_list) 
 
-def make_features_labels(m_module, args):
-    features_name = m_module.get_features() if m_module is not None and hasattr(m_module,"get_features") else args.features_name
-    labels_name = m_module.get_labels() if m_module is not None and hasattr(m_module,"get_labels") else args.labels_name
-    return (features_name, labels_name)
-
-if __name__ == '__main__':
+def main():
     parser = make_train_parser()
     args = parser.parse_args()    
     initialize_logger(filename=args.log_file, file_level=args.log_level, stream_level=args.log_level)
 
-    a_backend = args.backend
-    if 'torch' in args.model:
-        a_backend = 'torch'
-        
-    m_module = __import__(args.model.replace('.py','').replace('/', '.'), fromlist=[None]) if '.py' in args.model else None
-    (features_name, labels_name) = make_features_labels(m_module, args)
-    (train_list, val_list) = make_train_val_lists(m_module, args)
+    backend = 'torch' if 'torch' in args.model else 'tf'
+
+    model_source = None
+    try:
+        if args.model == 'mnist':
+            model_source = 'nnlo/models/model_mnist_tf.py'
+        elif args.model == 'mnist_torch':
+            model_source = 'nnlo/models/model_mnist_torch.py'
+        elif args.model == 'cifar10':
+            model_source = 'nnlo/models/model_cifar10_tf.py'
+        elif args.model.endswith('py'):
+            model_source = args.model
+    except Exception as e:
+        logging.fatal(e)
+
+    (features_name, labels_name) = args.features_name, args.labels_name
+    (train_list, val_list) = make_train_val_lists(args)
     comm = MPI.COMM_WORLD.Dup()
 
     if args.timeline: Timeline.enable()
 
-    use_tf = a_backend == 'keras'
-    use_torch = not use_tf
+    model_weights = make_model_weight(args, backend)
 
-    model_weights = make_model_weight(args, use_torch)
-
-    # Theano is the default backend; use tensorflow if --tf is specified.
-    # In the theano case it is necessary to specify the device before importing.
     device = get_device( comm, args.n_masters, gpu_limit=args.max_gpus,
                 gpu_for_master=args.master_gpu)
     os.environ['CUDA_VISIBLE_DEVICES'] = device[-1] if 'gpu' in device else ''
     logging.debug('set to device %s',os.environ['CUDA_VISIBLE_DEVICES'])
 
-    if use_torch:
+    if backend == 'torch':
         logging.debug("Using pytorch")
-        model_builder = ModelPytorch(comm, source=args.model, weights=model_weights, gpus=1 if 'gpu' in device else 0)
-    else:
+        model_builder = ModelPytorch(comm, source=model_source, weights=model_weights, gpus=1 if 'gpu' in device else 0)
+    elif backend == 'tf':
         logging.debug("Using TensorFlow")
-        os.environ['KERAS_BACKEND'] = 'tensorflow'
-
-        import_keras()
-        import keras.backend as K
-        gpu_options=K.tf.GPUOptions(
-            per_process_gpu_memory_fraction=0.1, #was 0.0
-            allow_growth = True,
-            visible_device_list = device[-1] if 'gpu' in device else '')
-        gpu_options=K.tf.GPUOptions(
-            per_process_gpu_memory_fraction=0.0,
-            allow_growth = True,)     
-        #NTHREADS=(2,1)
-        NTHREADS=None
-        if NTHREADS is None:
-            K.set_session( K.tf.Session( config=K.tf.ConfigProto(
-                allow_soft_placement=True, log_device_placement=False,
-                gpu_options=gpu_options
-            ) ) )
-        else:
-            K.set_session( K.tf.Session( config=K.tf.ConfigProto(
-                allow_soft_placement=True, log_device_placement=False,
-                gpu_options=gpu_options,
-                intra_op_parallelism_threads=NTHREADS[0], 
-                inter_op_parallelism_threads=NTHREADS[1],
-            ) ) )
-        
-
-        model_builder = ModelTensorFlow( comm, source=args.model, weights=model_weights)
-
+        import tensorflow as tf
+        gpu_devices = tf.config.experimental.list_physical_devices('GPU')
+        for device in gpu_devices:
+            tf.config.experimental.set_memory_growth(device, True)
+        model_builder = ModelTensorFlow( comm, source=model_source, weights=model_weights)
+    else:
+        logging.error("%s backend not supported", backend)
 
     data = make_loader(args, features_name, labels_name, train_list)
 
     # Some input arguments may be ignored depending on chosen algorithm
-    algo = make_algo( args, use_tf, comm, validate_every=int(data.count_data()/args.batch ))
+    algo = make_algo( args, backend, comm, validate_every=int(data.count_data()/args.batch ))
     
     if args.restore:
         algo.load(args.restore)
@@ -308,13 +280,13 @@ def make_features_labels(m_module, args):
                           checkpoint=args.checkpoint, checkpoint_interval=args.checkpoint_interval)
 
 
-    if m_module:
-        model_name =m_module.get_name()
+    if model_source:
+        model_name = os.path.basename(model_source).replace('.py','')
     else:
         model_name = os.path.basename(args.model).replace('.json','')
 
-    json_name = '_'.join([model_name,args.trial_name,"history.json"])
-    tl_json_name = '_'.join([model_name,args.trial_name,"timeline.json"])
+    json_name = args.output + '/' + '_'.join([model_name,args.trial_name,"history.json"])
+    tl_json_name = args.output + '/' + '_'.join([model_name,args.trial_name,"timeline.json"])
 
     # Process 0 launches the training procedure
     if comm.Get_rank() == 0:
@@ -333,3 +305,6 @@ def make_features_labels(m_module, args):
     comm.barrier()
     logging.info("Terminating")
     if args.timeline: Timeline.collect(clean=True, file_name=tl_json_name)
+
+if __name__ == '__main__':
+    main()
diff --git a/nnlo/driver/__init__.py b/nnlo/driver/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/nnlo/models/__init__.py b/nnlo/models/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/nnlo/models/model_cifar10_tf.py b/nnlo/models/model_cifar10_tf.py
new file mode 100644
index 0000000..8cbf414
--- /dev/null
+++ b/nnlo/models/model_cifar10_tf.py
@@ -0,0 +1,79 @@
+#!/usr/bin/env python
+# Rui Zhang 8.2020
+# rui.zhang@cern.ch
+
+def get_model(**args):
+    from tensorflow.keras.models import Sequential, Model
+    from tensorflow.keras.layers import Dense, Activation, Dropout, Flatten, Input, Permute
+    from tensorflow.keras.layers import Convolution2D, MaxPooling2D, Conv2D
+    import tensorflow.keras.backend as K
+    if args:logging.debug("receiving arguments {}".format(args))
+    nb_classes = 10
+    img_rows, img_cols = 32, 32
+    
+    # use 1 kernel size for all convolutional layers
+    ks = args.get('kernel_size', 3)
+    
+    # tune the number of filters for each convolution layer
+    nb_filters1 = args.get('nb_filters1', 48)
+    nb_filters2 = args.get('nb_filters2', 96)
+    nb_filters3 = args.get('nb_filters3', 192)
+    
+    # tune the pool size once
+    ps = args.get('pool_size', 2)
+    pool_size = (ps,ps)
+    
+    # tune the dropout rates independently
+    do4 = args.get('dropout1', 0.25)
+    do5 = args.get('dropout2', 0.5)
+    
+    # tune the dense layers independently
+    dense1 = args.get('dense1', 512)
+    dense2 = args.get('dense2', 256)
+    
+    if K.image_data_format() == 'channels_first':
+        input_shape = (3, img_rows, img_cols)
+    else:
+        input_shape = (img_rows, img_cols, 3)
+
+    #act = 'sigmoid'
+    act = 'relu'
+        
+    i = Input( input_shape)
+    l = Conv2D(nb_filters1,( ks, ks), padding='same', activation = act)(i)
+    l = MaxPooling2D(pool_size=pool_size)(l)
+    #l = Dropout(do1)(l)
+
+    l = Conv2D(nb_filters2, (ks, ks), padding='same',activation=act)(l)
+    #l = Conv2D(nb_filters2, (ks, ks))(l)
+    l = MaxPooling2D(pool_size=pool_size)(l)
+    #l = Dropout(do2)(l)
+
+    l = Conv2D(nb_filters3, (ks, ks), padding='same',activation=act)(l)
+    #l = Conv2D(nb_filters3, (ks, ks))(l)
+    l = MaxPooling2D(pool_size=pool_size)(l)
+    #l = Dropout(do3)(l)
+
+    l = Flatten()(l)
+    l = Dense(dense1,activation=act)(l)
+    l = Dropout(do4)(l)
+    l = Dense(dense2,activation=act)(l)
+    l =Dropout(do5)(l)
+    
+    o = Dense(nb_classes, activation='softmax')(l)
+
+    model = Model(inputs=i, outputs=o)
+    #model.summary()
+    
+    return model
+
+from skopt.space import Real, Integer, Categorical
+get_model.parameter_range = [
+    Integer(10,300, name='nb_filters1'),
+    Integer(10,300, name='nb_filters2'),
+    Integer(10,300, name='nb_filters3'),
+    Integer(50,1000, name='dense1'),
+    Integer(50,1000, name='dense2'),
+    Real(0.0, 1.0, name='dropout1'),
+    Real(0.0, 1.0, name='dropout2')
+]
diff --git a/nnlo/models/model_example_tf.py b/nnlo/models/model_example_tf.py
new file mode 100644
index 0000000..792677c
--- /dev/null
+++ b/nnlo/models/model_example_tf.py
@@ -0,0 +1,14 @@
+#!/usr/bin/env python
+# Rui Zhang 8.2020
+# rui.zhang@cern.ch
+
+def get_model(**args):
+    """Example model from keras documentation"""
+    from tensorflow.keras.models import Sequential
+    from tensorflow.keras.layers import Dense, Activation
+    model = Sequential()
+    model.add(Dense(output_dim=64, input_dim=100))
+    model.add(Activation("relu"))
+    model.add(Dense(output_dim=10))
+    model.add(Activation("softmax"))
+    return model
diff --git a/examples/example_hls4mlgru.py b/nnlo/models/model_hls4mlgru.py
similarity index 94%
rename from examples/example_hls4mlgru.py
rename to nnlo/models/model_hls4mlgru.py
index fa168d7..a61ac3c 100644
--- a/examples/example_hls4mlgru.py
+++ b/nnlo/models/model_hls4mlgru.py
@@ -1,6 +1,6 @@
 #from keras.activations import relu, selu, elu
-from keras.models import Model, Sequential
-from keras.layers import Dense, Input, GRU, Dropout, Flatten, Permute
+from keras.models import Model
+from keras.layers import Dense, Input, GRU, Dropout, Permute
 import numpy as np
 
 def get_model(**args):
@@ -32,9 +32,6 @@ def get_model(**args):
     #              loss='categorical_crossentropy', metrics=['acc'])
     return model
 
-def get_name():
-    return 'hls4ml-gru'
-
 def get_all():
     import os,glob
 
diff --git a/examples/example_jedi_torch.py b/nnlo/models/model_jedi_torch.py
similarity index 98%
rename from examples/example_jedi_torch.py
rename to nnlo/models/model_jedi_torch.py
index df826fc..6e644cf 100644
--- a/examples/example_jedi_torch.py
+++ b/nnlo/models/model_jedi_torch.py
@@ -158,9 +158,6 @@ def get_model(**args):
 
     return mymodel
 
-def get_name():
-    return 'hls4ml-jedi'
-
 def get_all():
     import os,glob
 
@@ -213,7 +210,6 @@ def get_labels():
 
 
 if __name__ == "__main__":
-    print("do the data conversion")
     import glob
     import h5py
     import numpy as np
@@ -230,5 +226,4 @@ def get_labels():
         fo['X'] = X
         fo['Y'] = Y
         fo.close()
-        print(f,"converted")
         
diff --git a/nnlo/models/model_mnist_tf.py b/nnlo/models/model_mnist_tf.py
new file mode 100644
index 0000000..ce9c418
--- /dev/null
+++ b/nnlo/models/model_mnist_tf.py
@@ -0,0 +1,56 @@
+#!/usr/bin/env python
+# Rui Zhang 8.2020
+# rui.zhang@cern.ch
+
+def get_model(**args):
+    from tensorflow.keras.models import Sequential, Model
+    from tensorflow.keras.layers import Dense, Activation, Dropout, Flatten, Input, Permute
+    from tensorflow.keras.layers import Convolution2D, MaxPooling2D, Conv2D
+    import tensorflow.keras.backend as K
+    """MNIST ConvNet from keras/examples/mnist_cnn.py"""
+    #np.random.seed(1337)  # for reproducibility
+    if args:logging.debug("receiving arguments {}".format(args))
+    nb_classes = 10
+    # input image dimensions
+    img_rows, img_cols = 28, 28
+    # number of convolutional filters to use
+    nb_filters = args.get('nb_filters',32)
+    # size of pooling area for max pooling
+    ps = args.get('pool_size',2)
+    
+    # convolution kernel size
+    ks = args.get('kernel_size',3)
+    do = args.get('dropout', 0.25)
+    dense = args.get('dense', 128)
+
+    pool_size = (ps,ps)
+    if K.image_data_format() == 'channels_first':
+        input_shape = (1, img_rows, img_cols)
+    else:
+        input_shape = (img_rows, img_cols, 1)
+    model = Sequential()
+    model.add(Convolution2D(nb_filters, (ks, ks),
+                            padding='valid',
+                            input_shape=input_shape))
+    model.add(Activation('relu'))
+    model.add(Convolution2D(nb_filters, (ks, ks)))
+    model.add(Activation('relu'))
+    model.add(MaxPooling2D(pool_size=pool_size))
+    model.add(Dropout(do))
+    model.add(Flatten())
+    model.add(Dense(dense))
+    model.add(Activation('relu'))
+    model.add(Dropout(do))
+    model.add(Dense(nb_classes))
+    model.add(Activation('softmax'))
+    return model
+
+from skopt.space import Real, Integer, Categorical
+get_model.parameter_range =     [
+    Integer(10,50, name='nb_filters'),
+    Integer(2,10, name='pool_size'),
+    Integer(2,10, name='kernel_size'),
+    Integer(50,200, name='dense'),
+    Real(0.0, 1.0, name='dropout')
+]
+
diff --git a/nnlo/models/model_mnist_torch.py b/nnlo/models/model_mnist_torch.py
new file mode 100644
index 0000000..a945e57
--- /dev/null
+++ b/nnlo/models/model_mnist_torch.py
@@ -0,0 +1,42 @@
+#!/usr/bin/env python
+# Rui Zhang 8.2020
+# rui.zhang@cern.ch
+import torch.nn as nn
+import torch.nn.functional as F
+
+class MNistNet(nn.Module):
+    def __init__(self, **args):
+        super(MNistNet, self).__init__()
+        ks = int(args.get('kernel_size',5))
+        do = float(args.get('dropout',0.5))
+        dense = int(args.get('dense',50))
+        self.conv1 = nn.Conv2d(1, 10, kernel_size=ks)
+        self.conv2 = nn.Conv2d(10, 20, kernel_size=ks)
+        self.conv2_drop = nn.Dropout2d(do)
+        self.fc1 = nn.Linear(320, dense)
+        self.fc2 = nn.Linear(dense, 10)
+
+    def forward(self, x):
+        x = x.permute(0,3,1,2).float()
+        x = F.relu(F.max_pool2d(self.conv1(x), 2))
+        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
+        x = x.view(-1, 320)
+        x = F.relu(self.fc1(x))
+        x = F.dropout(x, training=self.training)
+        x = self.fc2(x)
+        #return F.log_softmax(x, dim=1)
+        #return F.softmax(x)
+        #return F.cross_entropy(x)
+        return x
+
+def get_model(**args):
+    if args:logging.debug("receiving arguments {}".format(args))
+    model = MNistNet(**args)
+    return model
+
+from skopt.space import Real, Integer, Categorical
+get_model.parameter_range = [
+    Integer(2,10, name='kernel_size'),
+    Integer(50,200, name='dense'),
+    Real(0.0, 1.0, name='dropout')
+]
diff --git a/nnlo/models/model_topclass_tf.py b/nnlo/models/model_topclass_tf.py
new file mode 100644
index 0000000..f48e923
--- /dev/null
+++ b/nnlo/models/model_topclass_tf.py
@@ -0,0 +1,52 @@
+#!/usr/bin/env python
+# Rui Zhang 8.2020
+# rui.zhang@cern.ch
+
+def get_model(**args):
+    from tensorflow.keras.models import Sequential, Model
+    from tensorflow.keras.layers import Dense, Activation, Dropout, Flatten, Input, Permute
+    from tensorflow.keras.layers import Convolution2D, MaxPooling2D, Conv2D
+    if args:logging.debug("receiving arguments {}".format(args))
+    conv_layers=args.get('conv_layers',2)
+    dense_layers=args.get('dense_layers',2)
+    dropout=args.get('dropout',0.2)
+    kernel = args.get('kernel_size',3)
+    classes=3
+    in_channels=5
+    in_ch = in_channels
+    ## the trace in the input file is 750, 150, 94, 5
+    input = Input( (150,94,in_ch))
+    ## convs
+    c = input
+    for i in range(conv_layers):
+        channel_in = in_ch*((i+1)%5)
+        channel_out = in_ch*((i+2)%5)
+        if channel_in == 0: channel_in += 1
+        if channel_out == 0: channel_out += 1
+        c = Conv2D( filters=channel_out, kernel_size=(kernel,kernel) , strides=1, padding="same", activation = 'relu') (c)
+    c = Conv2D(1, (kernel,kernel), activation = 'relu',strides=2, padding="same")(c)
+
+    ## pooling
+    pool = args.get('pool', 10)
+    m  = MaxPooling2D((pool,pool))(c)
+    f = Flatten()(m)
+    d = f
+    base = args.get('hidden_factor',5)*100
+    for i in range(dense_layers):
+        N = int(base//(2**(i+1)))
+        d = Dense( N, activation='relu')(d)
+        if dropout:
+            d = Dropout(dropout)(d)
+    o = Dense(classes, activation='softmax')(d)
+
+    model = Model(inputs=input, outputs=o)
+    #model.summary()
+    return model
+
+from skopt.space import Real, Integer, Categorical
+get_model.parameter_range = [
+    Integer(1,6, name='conv_layers'),
+    Integer(1,6, name='dense_layers'),
+    Integer(1,6, name='kernel_size'),
+    Real(0.0, 1.0, name='dropout')
+]
diff --git a/models/TorchModels.py b/nnlo/models/model_topclass_torch.py
similarity index 71%
rename from models/TorchModels.py
rename to nnlo/models/model_topclass_torch.py
index fa663e7..74cc4b7 100644
--- a/models/TorchModels.py
+++ b/nnlo/models/model_topclass_torch.py
@@ -1,9 +1,10 @@
+#!/usr/bin/env python
+# Rui Zhang 8.2020
+# rui.zhang@cern.ch
+
 import torch
 from torch.autograd import Variable
 import torch.nn as nn
-import torch.nn.parallel
-import torch.backends.cudnn as cudnn
-import torch.distributed as dist
 import torch.optim
 import torch.utils.data.distributed
 import torchvision.transforms as transforms
@@ -11,32 +12,7 @@
 import torchvision.models as models
 import torch.nn.functional as F
 import numpy
-
-class MNistNet(nn.Module):
-    def __init__(self, **args):
-        super(MNistNet, self).__init__()
-        ks = int(args.get('kernel_size',5))
-        do = float(args.get('dropout',0.5))
-        dense = int(args.get('dense',50))
-        self.conv1 = nn.Conv2d(1, 10, kernel_size=ks)
-        self.conv2 = nn.Conv2d(10, 20, kernel_size=ks)
-        self.conv2_drop = nn.Dropout2d(do)
-        self.fc1 = nn.Linear(320, dense)
-        self.fc2 = nn.Linear(dense, 10)
-
-    def forward(self, x):
-        x = x.permute(0,3,1,2).float()
-        x = F.relu(F.max_pool2d(self.conv1(x), 2))
-        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
-        x = x.view(-1, 320)
-        x = F.relu(self.fc1(x))
-        x = F.dropout(x, training=self.training)
-        x = self.fc2(x)
-        #return F.log_softmax(x, dim=1)
-        #return F.softmax(x)
-        #return F.cross_entropy(x)
-        return x
-
+import logging
 
 
 ### Build a customized CNN with given hyperparameters
@@ -65,7 +41,7 @@ def __init__(self, dense_layers, dropout ,base):
         for i in range(dense_layers):
             il = int(base//(2**i))
             ol = int(base//(2**(i+1)))
-            print (il,"=>",ol)
+            logging.info("{} =>> {}".format(il,ol))
             self.add_module('denselayer%d'%(i), nn.Linear(il, ol))
             self.add_module('relu%d'%(i), nn.ReLU(inplace=True))
         self.dropout = dropout
@@ -89,7 +65,7 @@ def build_net(self,*args, **kwargs):
         self.adapt_pool = nn.AdaptiveMaxPool2d((base_2,base_2))
         il = int(base//(2**(args[1])))
         ol = int(args[3])
-        print (il,"=>",ol)
+        logging.info("{} =>> {}".format(il,ol))
         self.output = nn.Linear(il, ol)
 
     def forward(self, x):
@@ -101,3 +77,23 @@ def forward(self, x):
         return self.output(x)
             
 
+def get_model(**args):
+    if args:logging.debug("receiving arguments {}".format(args))
+    conv_layers=args.get('conv_layers',2)
+    dense_layers=args.get('dense_layers',2)
+    dropout=args.get('dropout',0.5)
+    classes=3
+    in_channels=5
+    try:
+        from TorchModels import CNN
+    except:
+        from .TorchModels import CNN
+    model = CNN(conv_layers=conv_layers, dense_layers=dense_layers, dropout=dropout, classes=classes, in_channels=in_channels)
+    return model
+
+from skopt.space import Real, Integer, Categorical
+get_model.parameter_range =    [
+    Integer(1,6, name='conv_layers'),
+    Integer(1,6, name='dense_layers'),
+    Real(0.0,1.0, name='dropout')
+]
diff --git a/nnlo/train/GanModel.py b/nnlo/train/GanModel.py
index 93389d3..c939089 100644
--- a/nnlo/train/GanModel.py
+++ b/nnlo/train/GanModel.py
@@ -1,17 +1,16 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 
-from __future__ import print_function
 from collections import defaultdict
 try:
     import cPickle as pickle
 except ImportError:
     import pickle
-import keras
-from keras.models import Model
-from keras.layers import Input
-from keras import optimizers
-from keras.optimizers import RMSprop,SGD
+import tensorflow.keras as keras
+from tensorflow.keras.models import Model
+from tensorflow.keras.layers import Input
+from tensorflow.keras import optimizers
+from tensorflow.keras.optimizers import RMSprop,SGD
 #from EcalEnergyGan import generator, discriminator
 import numpy as np
 import numpy.core.umath_tests as umath
@@ -23,17 +22,16 @@
 import logging
 
 import keras.backend as K
-from keras.models import Model, Sequential
-from keras.layers import (Input, Dense, Reshape, Flatten, Lambda, merge,
+from tensorflow.keras.models import Model, Sequential
+from tensorflow.keras.layers import (Input, Dense, Reshape, Flatten, Lambda, merge,
                           Dropout, BatchNormalization, Activation, Embedding)
-from keras.layers.advanced_activations import LeakyReLU
-from keras.layers.convolutional import (UpSampling3D, Conv3D, ZeroPadding3D,
+from tensorflow.keras.layers.advanced_activations import LeakyReLU
+from tensorflow.keras.layers.convolutional import (UpSampling3D, Conv3D, ZeroPadding3D,
                                         AveragePooling3D)
 
 from ..train.model import MPIModel, ModelBuilder
 from .optimizer import OptimizerBuilder
 
-import keras
 kv2 = keras.__version__.startswith('2')
 
 def hn():
@@ -46,8 +44,6 @@ def weights(m):
         _weights_names += [ll.name for ll in layer.weights]
     _weights = m.get_weights()
     _disp = [(np.min(s),np.max(s),np.mean(s),np.std(s),s.shape,n) for s,n in zip(_weights,_weights_names)]
-    #for ii,dd in enumerate(_disp):
-    #    print (ii,dd)
 
 def weights_diff( m ,lap=True, init=False,label='', alert=None):#1000.):
     if (weights_diff.old_weights is None) or init:
@@ -62,14 +58,10 @@ def weights_diff( m ,lap=True, init=False,label='', alert=None):#1000.):
     ## make the diffs
     _diffs = [np.subtract(a,b) for (a,b) in zip(check_on_weight,and_check_on_weight)]
     _diffsN = [(np.min(s),np.max(s),np.mean(s),np.std(s),s.shape,n) for s,n in zip(_diffs,_weights_names)]
-    #print ('\n'.join(['%s'%dd for dd in _diffsN]))
     for ii,dd in enumerate(_diffsN):
         if alert:
             if not any([abs(vv) > alert for vv in dd[:3]]):
                 continue
-        #print (ii,'WD %s'%label,dd)
-        #if dd[-2] == (8,):
-        #    print ("\t",_diffs[ii])
     if lap:
         weights_diff.old_weights = m.get_weights()
 
@@ -250,16 +242,11 @@ def get_moments(images, sumsx, sumsy, sumsz, totalE, m):
 def load_sorted(sorted_path):
     sorted_files = sorted(glob.glob(sorted_path))
 
-    #print ("found sorterd files",sorted( sorted_files))
     energies = []
     srt = {}
     for f in sorted_files:
-        #print (f)
-        #energy = int(list(filter(str.isdigit, f))[:-1])
         file_name=f[f.find('sorted_'):-1]
-        #energy = int(''.join(list(filter(str.isdigit, f))[:-1]))
         energy = int(''.join(list(filter(str.isdigit, file_name))[:-1]))*10
-        #print ("found files for energy",energy)
         energies.append(energy)
         srtfile = h5py.File(f,'r')
         srt["events" + str(energy)] = np.array(srtfile.get('ECAL'))
@@ -368,22 +355,16 @@ def __init__(self, **args):
         self.calculate_fom = args.get('calculate_fom',True)
 
         if self.tell:
-            #print ("Generator summary")
-            #self.generator.summary()
-            #print ("Discriminator summary")
-            #self.discriminator.summary()
-            #print ("Combined summary")
-            #self.combined.summary()
             pass
-        if True:
-            if self.with_fixed_disc: print ("the batch norm weights are fixed. heavey weight re-assigning")
-            if self.checkpoint: print ("Checkpointing the model weigths after %d batch, based on the process id"%self.checkpoint)
-            if self._onepass: print ("Training in one pass")
-            if self._reversedorder: print ("will train generator first, then discriminator")
-            if self._heavycheck: print("running heavy check on weight sanity")
-            if self._show_values: print("showing the input values at each batch")
-            if self._show_loss: print("showing the loss at each batch")
-            if self._show_weights: print("showing weights statistics at each batch")
+        #if True:
+        #    if self.with_fixed_disc: print ("the batch norm weights are fixed. heavey weight re-assigning")
+        #    if self.checkpoint: print ("Checkpointing the model weigths after %d batch, based on the process id"%self.checkpoint)
+        #    if self._onepass: print ("Training in one pass")
+        #    if self._reversedorder: print ("will train generator first, then discriminator")
+        #    if self._heavycheck: print("running heavy check on weight sanity")
+        #    if self._show_values: print("showing the input values at each batch")
+        #    if self._show_loss: print("showing the loss at each batch")
+        #    if self._show_weights: print("showing weights statistics at each batch")
 
         MPIModel.__init__(self, models = [
             self.discriminator,
@@ -472,13 +453,10 @@ def big_assemble_models(self):
 
 
     def ext_assemble_models(self):
-        #print('[INFO] Building generator')
         self.generator = generator(self.latent_size, with_bn = self.gen_bn)
-        #print('[INFO] Building discriminator')
         self.discriminator = discriminator(discr_drop_out = self.discr_drop_out)
         if self.with_fixed_disc:
             self.fixed_discriminator = discriminator(discr_drop_out = self.discr_drop_out, fixed_bn=True)
-        #print('[INFO] Building combined')
         latent = Input(shape=(self.latent_size, ), name='combined_z')
         fake_image = self.generator(latent)
         if self.with_fixed_disc:
@@ -494,7 +472,6 @@ def ext_assemble_models(self):
 
     def compile(self, **args):
         ## args are fully ignored here
-        #print('[INFO] IN GAN MODEL: COMPILE')
         if 'optimizer' in args and isinstance(args['optimizer'], OptimizerBuilder):
             opt_builder = args['optimizer']
         else:
@@ -512,7 +489,6 @@ def make_opt(**args):
                 else:
                     opt = SGD(lr=lr)
 
-            #print ("optimizer for compiling",opt) 
             return opt
 
         self.generator.compile(
@@ -536,14 +512,11 @@ def make_opt(**args):
             loss_weights=self.discr_loss_weights
         )
         self.combined.metrics_names = self.discriminator.metrics_names
-        #print ("disc metrics",self.discriminator.metrics_names)
-        #print ("comb metrics",self.combined.metrics_names)
 
         
         if hasattr(self, 'calculate_fom'):
             self.energies, self.g4var = self.prepare_geant4_data()
          
-        #print ("compiled")
 
     def assemble_models(self):
         self.ext_assemble_models()
@@ -554,44 +527,33 @@ def batch_transform(self, x, y):
         y_disc_real =y
         show_values = self._show_values
         def mm( label, t):
-            #print (label,np.min(t),np.max(t),np.mean(t),np.std(t),t.shape)
             pass
 
         if self.batch_size is None:
             ## fix me, maybe
             self.batch_size = x_disc_real.shape[0]
-            #print (hn(),"initializing sizes",x_disc_real.shape,[ yy.shape for yy in y])
 
 
         noise = np.random.normal(0, 1, (self.batch_size, self.latent_size))
         sampled_energies = np.random.uniform(0.1, 5,(self.batch_size,1))
         generator_ip = np.multiply(sampled_energies, noise)
-        #if show_values: print ('energies',np.ravel(sampled_energies)[:10])
         if show_values: mm('energies',sampled_energies)
         ratio = np.polyval(root_fit, sampled_energies)
-        #if show_values: print ('ratios',np.ravel(ratio)[:10])
         if show_values: mm('ratios',ratio)
         ecal_ip = np.multiply(ratio, sampled_energies)
-        #if show_values: print ('estimated sum cells',np.ravel(ecal_ip)[:10])
         if show_values: mm('estimated sum cells',ecal_ip)
 
         now = time.mktime(time.gmtime())
-        #if self.p_cc>1 and len(self.p_t)%100==0:
-        #    print ("prediction average",np.mean(self.p_t),"[s]' over",len(self.p_t))
         generated_images = self.generator.predict(generator_ip)
         ecal_rip = np.squeeze(np.sum(generated_images, axis=(1, 2, 3)))
-        #if show_values: print ('generated sum cells',np.ravel(ecal_rip)[:10])
         if show_values: mm('generated sum cells',ecal_rip)
 
         norm_overflow = False
         apply_identify = False ## False was intended originally
 
         if norm_overflow and np.max( ecal_rip ) > 1000.:
-            #if show_values: print ("normalizing back")
-            #ecal_ip = ecal_rip
             generated_images /= np.max( generated_images )
             ecal_rip = np.squeeze(np.sum(generated_images, axis=(1, 2, 3)))
-            #if show_values: print ('generated sum cells',np.ravel(ecal_rip)[:10])
             if show_values: mm('generated sum cells',ecal_rip)
         elif apply_identify:
             ecal_ip = ecal_rip
@@ -625,7 +587,6 @@ def mm( label, t):
 
 
         c_noise = np.random.normal(0, 1, (2*self.batch_size, self.latent_size))
-        ###print ('noise',np.ravel(noise)[:10])
         c_sampled_energies = np.random.uniform(0.1, 5, (2*self.batch_size,1 ))
         c_generator_ip = np.multiply(c_sampled_energies, c_noise)
         c_ratio = np.polyval(root_fit, c_sampled_energies)
@@ -651,9 +612,6 @@ def test_on_batch(self,x, y, sample_weight=None):
             (X_for_disc,Y_for_disc,X_for_combined,Y_for_combined) = self.batch_transform(x,y)
             epoch_disc_loss = self.discriminator.test_on_batch(X_for_disc,Y_for_disc)
             epoch_gen_loss = self.combined.test_on_batch(X_for_combined,Y_for_combined)
-            #if show_loss:
-            #    print ("test discr loss",epoch_disc_loss)
-            #    print ("test combined loss",epoch_gen_loss)
         else:
             ((x_disc_real,re_y),(generated_images, y_disc_fake),(x_comb1,y_comb1),(x_comb2,y_comb2)) = self.batch_transform(x,y)
             real_disc_loss = self.discriminator.test_on_batch( x_disc_real,re_y )
@@ -663,9 +621,6 @@ def test_on_batch(self,x, y, sample_weight=None):
             c_loss1= self.combined.test_on_batch( x_comb1,y_comb1 )
             c_loss2= self.combined.test_on_batch(x_comb2,y_comb2 )
             epoch_gen_loss = [(a + b) / 2 for a, b in zip(c_loss1,c_loss2)]
-            #if show_loss:
-            #    print ("test discr loss",real_disc_loss,fake_disc_loss)
-            #    print ("test combined loss",c_loss1, c_loss2)
 
 
 
@@ -684,7 +639,7 @@ def train_on_batch(self, x, y,
     def _checkpoint(self):
         if self.checkpoint and (self.g_cc%self.checkpoint)==0:
             dest='%s/mpi_generator_%s_%s.h5'%(os.environ.get('GANCHECKPOINTLOC','.'),socket.gethostname(),os.getpid())
-            print ("Saving generator to",dest,"at",self.g_cc)
+            logging.info("Saving generator to {} at {}".format(dest, self.g_cc))
             self.generator.save_weights(dest)        
 
     def _onepass_train_on_batch(self, x, y,
@@ -708,8 +663,6 @@ def _train_disc():
             self.discriminator.trainable = True
             now = time.mktime(time.gmtime())
             epoch_disc_loss = self.discriminator.train_on_batch(X_for_disc,Y_for_disc)
-            #if show_loss:
-            #    print (self.d_cc," discr loss",epoch_disc_loss)
             done = time.mktime(time.gmtime())
             if self.d_cc:
                 self.d_t.append( done - now )
@@ -725,13 +678,10 @@ def _train_comb(noT=False):
                 self.discriminator.trainable = False
             now = time.mktime(time.gmtime())
             if noT:
-                #print ("evaluating the combined model")
                 epoch_gen_loss = self.combined.test_on_batch(X_for_combined,Y_for_combined)
             else:
                 epoch_gen_loss = self.combined.train_on_batch(X_for_combined,Y_for_combined)
 
-            #if show_loss:
-            #    print (self.g_cc,"combined loss",epoch_gen_loss)
             done = time.mktime(time.gmtime())
             if self.g_cc:
                 self.g_t.append( done - now )
@@ -766,12 +716,6 @@ def _train_comb(noT=False):
             weights( self.combined )
 
 
-        #if len(self.g_t)>0 and len(self.g_t)%100==0:
-        #    print ("generator average ",np.mean(self.g_t),"[s] over",len(self.g_t))
-
-        #if len(self.d_t)>0 and len(self.d_t)%100==0:
-        #    print ("discriminator average",np.mean(self.d_t),"[s] over ",len(self.d_t))
-
         self._checkpoint()
 
         return np.asarray([epoch_disc_loss, epoch_gen_loss])
@@ -784,8 +728,6 @@ def _twopass_train_on_batch(self, x, y,
 
         show_loss = self._show_loss
         show_weights = self._show_weights
-        #if self.d_cc>1 and len(self.d_t)%100==0:
-        #    print ("discriminator average",np.mean(self.d_t),"[s] over ",len(self.d_t))
         self.discriminator.trainable = True
 
         if self._heavycheck:
@@ -822,9 +764,6 @@ def _twopass_train_on_batch(self, x, y,
             weights_diff( on_weight , label='D-fake')
 
 
-        #if show_loss:
-            #print (self.discriminator.metrics_names)
-            #print (self.d_cc,"discr loss",real_batch_loss,fake_batch_loss)
         epoch_disc_loss = np.asarray([(a + b) / 2 for a, b in zip(real_batch_loss, fake_batch_loss)])
         done = time.mktime(time.gmtime())
         if self.d_cc:
@@ -837,7 +776,6 @@ def _twopass_train_on_batch(self, x, y,
             weights( self.combined )
 
         if self.g_cc>1 and len(self.g_t)%100==0:
-            #print ("generator average ",np.mean(self.g_t),"[s] over",len(self.g_t))
             now = time.mktime(time.gmtime())
 
         if self.g_cc:
@@ -852,9 +790,6 @@ def _twopass_train_on_batch(self, x, y,
             if show_weights: weights( on_weight )
             weights_diff( on_weight , label='C-2')
 
-        #if show_loss:
-        #    #print(self.combined.metrics_names)
-        #    print (self.g_cc,"combined loss",c_loss1,c_loss2)
         epoch_gen_loss = np.asarray([(a + b) / 2 for a, b in zip(c_loss1,c_loss2)])
         done = time.mktime(time.gmtime())
         if self.g_cc:
@@ -871,18 +806,18 @@ def _twopass_train_on_batch(self, x, y,
                 checks = [np.all(np.equal(a,b)) for (a,b) in zip(check_on_weight,and_check_on_weight)]
                 weights_have_changed = not all(checks)
                 weights_are_all_equal = all(checks)
-                print ('Weights are the same?',checks)
+                logging.info("Weights are the same? {}".format(str(checks)))
                 if weights_have_changed:
                     for iw,b in enumerate(checks):
                         if not b:
-                            print (iw,"This",check_on_weight[iw].shape)
-                            print (np.ravel(check_on_weight[iw])[:10])
-                            print (iw,"And that",and_check_on_weight[iw].shape)
-                            print (np.ravel(and_check_on_weight[iw])[:10])
+                            logging.info("{} This {}".format(iw,str(check_on_weight[iw].shape)))
+                            logging.info("{}".format(np.ravel(check_on_weight[iw])[:10]))
+                            logging.info("{} And that {}".format(iw,and_check_on_weight[iw].shape))
+                            logging.info("{}".format(np.ravel(and_check_on_weight[iw])[:10]))
                 else:
-                    print ("weights are all identical")
-                    print (np.ravel(and_check_on_weight[1])[:10])
-                    print (np.ravel(check_on_weight[1])[:10])
+                    logging.info("weights are all identical")
+                    logging.info("".format(str(np.ravel(and_check_on_weight[1])[:10])))
+                    logging.info("".format(str(np.ravel(check_on_weight[1])[:10])))
 
         self._checkpoint()
 
@@ -891,7 +826,7 @@ def _twopass_train_on_batch(self, x, y,
             switching_loss = (1.,1.)
             if False and not self.recompiled and epoch_disc_loss[0]<switching_loss[0] and epoch_gen_loss[0]<switching_loss[1]:
                 ## go on
-                print ("going for full sgd")
+                logging.info("going for full sgd")
                 self.recompiled = True
                 self.compile( prop=False, lr=1.0)
                 #K.set_value( self.discriminator.optimizer.lr, 1.0)
@@ -910,13 +845,12 @@ def _twopass_train_on_batch(self, x, y,
                     nlr = th[1]
                     break
             if abs(nlr-lr)/lr > 0.0001:
-                print ("#"*30)
-                print ("swithcing lr",lr,"to", nlr)
+                logging.info("{}".format("#"*30))
+                logging.info("swithcing lr {} to {}".format(lr, nlr))
                 K.set_value( self.discriminator.optimizer.lr, nlr)
-                print (K.get_value( self.discriminator.optimizer.lr ))
+                logging.info("{}".format(K.get_value( self.discriminator.optimizer.lr )))
                 K.set_value( self.combined.optimizer.lr, nlr)
-                print (K.get_value( self.combined.optimizer.lr ))
-                print ("#"*30)
+                logging.info("{}".format(K.get_value( self.combined.optimizer.lr )))
 
         return np.asarray([epoch_disc_loss, epoch_gen_loss])
 
@@ -947,7 +881,6 @@ def prepare_geant4_data(self, **args):
         return energies, var
 
     def figure_of_merit(self, **args):
-        #print (self.histories)
         delta_loss = np.abs(self.histories['discriminator_model']['val_classification_loss'][-1] - self.histories['combined_model']['val_classification_loss'][-1])
         return delta_loss
         
diff --git a/nnlo/train/model.py b/nnlo/train/model.py
index 01e6754..c3f88cf 100644
--- a/nnlo/train/model.py
+++ b/nnlo/train/model.py
@@ -7,6 +7,7 @@
 import sys
 import six
 import logging
+import importlib
 
 def tell_gpu_memory(label):
     import gpustat
@@ -356,8 +357,10 @@ def test_on_batch(self, x=None, y=None, *args, **kwargs):
         if self.gpus > 0:
             x = x.cuda()
             target = target.cuda()
-        pred = self.model.forward(Variable(x, volatile=True))
-        loss = self.loss(pred, Variable(target, volatile=True))
+        import torch
+        with torch.no_grad():
+            pred = self.model.forward(Variable(x))
+            loss = self.loss(pred, Variable(target))
         l_data = loss.data.numpy() if self.gpus == 0 else loss.data.cpu().numpy()
         self.metrics = [l_data] if l_data.shape==() else [l_data[0]]        
         if 'acc' in self.metrics_names: # compute the accuracy
@@ -434,7 +437,7 @@ def __init__(self, comm, source,
             custom_objects={}, weights=None):
         if isinstance(source, six.string_types):
             if source.endswith('.py'):
-                module = __import__(source.replace('.py','').replace('/', '.'), fromlist=[None])
+                module = module = importlib.import_module(source.replace('.py','').replace('/', '.'))
                 self.model = module.get_model()
                 self.filename = None
             else:
@@ -443,14 +446,13 @@ def __init__(self, comm, source,
         else:
             self.filename = None
             self.model = source
+        logging.debug("Get model {0} from file {1}".format(self.model, self.filename))
         self.weights = weights
         self.custom_objects = custom_objects
         super(ModelTensorFlow, self).__init__(comm)
 
 
     def build_model_aux(self):
-        import keras.backend as K
-
         if type(self.filename) == list:
             models = []
             self.weights = self.weights.split(',') if self.weights else [None]*len(self.filename)
@@ -464,27 +466,26 @@ def build_model_aux(self):
 
 
     def build_model(self, local_session = True):
-        import keras.backend as K
+        import tensorflow as tf
 
         if local_session:
-            graph = K.tf.Graph()
-            session = K.tf.Session(graph=graph, config=K.tf.ConfigProto(
+            graph = tf.Graph()
+            session = tf.compat.v1.Session(graph=graph, config=tf.compat.v1.ConfigProto(
                 allow_soft_placement=True, log_device_placement=False,
-                gpu_options=K.tf.GPUOptions(
+                gpu_options=tf.compat.v1.GPUOptions(
                         per_process_gpu_memory_fraction=1./self.comm.Get_size()) ) )
 
             with graph.as_default():
                 with session.as_default():
-                    import keras.backend as K
                     ret_model = self.build_model_aux()
                     ret_model.session = session
                     ret_model.graph = graph
                     return ret_model
         else:
-            K.set_session( K.tf.Session( config=K.tf.ConfigProto(
+            tf.compat.v1.Session( config=tf.compat.v1.ConfigProto(
                 allow_soft_placement=True, log_device_placement=False,
-                gpu_options=K.tf.GPUOptions(
-                    per_process_gpu_memory_fraction=1./self.comm.Get_size()) ) ) )
+                gpu_options=tf.compat.v1.GPUOptions(
+                    per_process_gpu_memory_fraction=1./self.comm.Get_size()) ) )
             return self.build_model_aux()
 
     def get_backend_name(self):
@@ -497,7 +498,7 @@ def __init__(self, comm, source,
         super(ModelPytorch,self).__init__(comm)
         if isinstance(source, six.string_types):
             if source.endswith('.py'):
-                module = __import__(source.replace('.py','').replace('/', '.'), fromlist=[None])
+                module = module = importlib.import_module(source.replace('.py','').replace('/', '.'))
                 self.model = module.get_model()
                 self.filename = None
             else:
diff --git a/nnlo/train/optimizer.py b/nnlo/train/optimizer.py
index 3053e92..11703c6 100644
--- a/nnlo/train/optimizer.py
+++ b/nnlo/train/optimizer.py
@@ -647,7 +647,7 @@ def __init__(self, name, config=None, horovod_wrapper=False):
         self.horovod_wrapper = horovod_wrapper
 
     def build(self):
-        from keras.optimizers import deserialize
+        from tensorflow.keras.optimizers import deserialize
         opt_config = {'class_name': self.name, 'config': self.config}
         opt = deserialize(opt_config)
         if self.horovod_wrapper:
diff --git a/nnlo/util/plot_loss.py b/nnlo/util/plot_loss.py
new file mode 100644
index 0000000..84a296e
--- /dev/null
+++ b/nnlo/util/plot_loss.py
@@ -0,0 +1,50 @@
+#!/usr/bin/env python
+# Rui Zhang 8.2020
+# rui.zhang@cern.ch
+
+import json
+import logging
+import sys
+import pandas as pd
+import matplotlib.pyplot as plt
+import matplotlib
+matplotlib.use('Agg')
+
+def plotLoss(data, outname='', variable='loss'):
+    nGPU = len(data["history"].keys())
+    for irank, values in data["history"].items():
+        irank = irank.split(':')[0]
+        if irank.startswith('0'):
+            plt.plot(values['val_'+variable], linestyle='-', label=f'R{irank}: val')
+            if variable in values:
+                plt.plot(values[variable], linestyle=':', label=f'R{irank}: train')
+        else:
+            plt.plot(values[variable], linestyle='--', label='')
+
+    plt.title(outname)
+    plt.xlabel('Epochs')
+    plt.ylabel('Loss')
+    plt.legend()
+    plt.savefig(f'{variable}_{outname}.pdf', format='pdf')
+    logging.info(f'Save {variable}_{outname}.pdf')
+    plt.clf()
+
+def main():
+    logging.basicConfig(level = logging.INFO)
+    filename, rows_list = [], []
+    try:
+        filenames = sys.argv[1:]
+    except:
+        logging.fatal('Usage: python plot_loss.py [json file name]')
+
+    for filename in filenames:
+        with open(filename) as f:
+            logging.info(f'Read {filename}')
+            data = json.load(f)
+            outname = filename.replace('.json', '')
+            for variable in ['loss', 'accuracy']:
+                plotLoss(data, outname, variable)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/nnlo/util/print_table.py b/nnlo/util/print_table.py
new file mode 100644
index 0000000..1b29a9e
--- /dev/null
+++ b/nnlo/util/print_table.py
@@ -0,0 +1,41 @@
+#!/usr/bin/env python
+# Rui Zhang 7.2020
+# rui.zhang@cern.ch
+
+import json
+import logging
+import sys
+import pandas as pd
+
+def main():
+    logging.basicConfig(level = logging.INFO)
+    filename, rows_list = [], []
+    try:
+        filenames = sys.argv[1:]
+    except:
+        logging.fatal('Usage: python print_table.py [json file name]')
+    
+    for filename in filenames:
+        with open(filename) as f:
+            data = json.load(f)
+        name = filename.split('_')[1]
+        dic = {
+            'file': filename,
+            'ranks': int(name[name.find('n')+1:name.find('g')]),
+            'trainTime': data["train_time"],
+        }
+        try:
+            dic['val_loss'] = data["history"][r"0:0:-"]["val_loss"][-10]
+            dic['val_accuracy'] = data["history"][r"0:0:-"]["val_accuracy"][-10]
+            dic['epochs'] = len(data["history"][r"0:0:-"]["val_loss"])
+        except:
+            dic['val_loss'] = data["history"][r"0:-:-"]["val_loss"][-10]
+            dic['val_accuracy'] = data["history"][r"0:-:-"]["val_accuracy"][-10]
+            dic['epochs'] = len(data["history"][r"0:-:-"]["val_loss"])
+        rows_list.append(dic)
+
+    df = pd.DataFrame(rows_list).sort_values('ranks')
+    logging.info(f'\n{df}')
+
+if __name__ == '__main__':
+    main()
diff --git a/nnlo/util/utils.py b/nnlo/util/utils.py
index e7a669b..73dfe7f 100644
--- a/nnlo/util/utils.py
+++ b/nnlo/util/utils.py
@@ -35,7 +35,7 @@ def import_keras(tries=10):
         try:
             stderr = sys.stderr
             sys.stderr = open(os.devnull, 'w')
-            import keras
+            import tensorflow.keras as keras
             sys.stderr = stderr
             return
         except ValueError:
@@ -51,14 +51,19 @@ def load_model(filename=None, model=None, weights_file=None, custom_objects={}):
         weights_file: path to HDF5 file containing model weights
 	custom_objects: A Dictionary of custom classes used in the model keyed by name"""
     import_keras()
-    from keras.models import model_from_json, clone_model
+    from tensorflow.keras.models import model_from_json, clone_model
     if filename is not None:
         with open( filename ) as arch_f:
             json_str = arch_f.readline()
             new_model = model_from_json( json_str, custom_objects=custom_objects) 
-    if model is not None:
+        logging.info(f"Load model from filename")
+    elif model is not None:
         new_model = clone_model(model)
-    if weights_file is not None:
+        logging.info(f"Load model from model")
+    elif weights_file is not None:
         new_model.load_weights( weights_file )
+        logging.info(f"Load model from weights_file")
+    else:
+        logging.error(f"Cannot load model: filename, model and weights_file are None")
     return new_model
 
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..a19b0ae
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,36 @@
+from setuptools import setup, find_packages
+
+with open('README.md') as readme_file:
+    README = readme_file.read()
+
+with open('HISTORY.md') as history_file:
+    HISTORY = history_file.read()
+
+setup_args = dict(
+    name='nnlo',
+    version='0.0.7',
+    entry_points = {
+        'console_scripts': ['TrainingDriver=nnlo.driver.TrainingDriver:main',
+            'GetData=nnlo.data.getdata:main',
+            'PrintTable=nnlo.util.print_table:main',
+            'PlotLoss=nnlo.util.plot_loss:main',
+        ],
+    },
+    description='Distributed Machine Learning tool for High Performance Computing',
+    long_description_content_type="text/markdown",
+    long_description=README + '\n\n' + HISTORY,
+    license='MIT',
+    packages=find_packages(),
+    author='NNLO team',
+    author_email='rui.zhang@cern.ch',
+    keywords=['Distributed Machine Learning', 'High Performance Computing', 'Hyperparameter optimisation'],
+    url='https://github.com/chnzhangrui/NNLO',
+    download_url='https://pypi.org/project/nnlo/'
+)
+
+install_requires = [
+    'scikit-optimize',
+]
+
+if __name__ == '__main__':
+    setup(**setup_args, install_requires=install_requires, include_package_data=True)