diff --git a/.gitignore b/.gitignore
index abe4a4a..a52bb86 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,9 @@
 *.pyc
 *.swp
-*.json
 *.h5
-*.txt
-.DS_Store
\ No newline at end of file
+.DS_Store
+build/
+dist/
+nnlo.egg-info/
+publish.sh
+.local/
diff --git a/models/__init__.py b/HISTORY.md
similarity index 100%
rename from models/__init__.py
rename to HISTORY.md
diff --git a/MANIFEST.in b/MANIFEST.in
new file mode 100644
index 0000000..e69de29
diff --git a/README.md b/README.md
index 88445f8..168cc4c 100644
--- a/README.md
+++ b/README.md
@@ -10,36 +10,38 @@ The original package was implemented by [Dustin Anderson](https://github.com/dua
 
 Test with the MNIST dataset, with keras+tensorflow
 ```
-git clone https://github.com/vlimant/NNLO.git
+pip install nnlo
 cd NNLO
 ```
 Example with mnist provided in a python file
 ```
-python3 models/get_mnist.py
-mpirun -np 3 --tag-output python3 TrainingDriver.py --model examples/example_mnist.py --loss categorical_crossentropy --epochs 3
-mpirun -np 3 --tag-output python3 TrainingDriver.py --model examples/example_mnist_torch.py --loss categorical_crossentropy --epochs 3
+GetData mnist
+mpirun -np 3 TrainingDriver --model mnist --loss categorical_crossentropy --epochs 3 --trial-name n3g1epoch3 --train_data /path/to/train_mnist.list --val_data /path/to/test_mnist.list
+mpirun -np 3 python TrainingDriver.py --model examples/example_mnist_torch.py --loss categorical_crossentropy --epochs 3
+jsrun -n 3 -g 1 TrainingDriver --model mnist --loss categorical_crossentropy --epochs 3 --trial-name n3g1epoch3 --train_data /path/to/train_mnist.list --val_data /path/to/test_mnist.list
 ```
 
 Example with the cifar10 with model json
 ```
-python3 models/BuildModel.py cifar10
+GetData cifar10
 python3 models/get_cifar10.py
-mpirun -np 3 --tag-output python3 TrainingDriver.py --model cifar10_arch.json --train train_cifar10.list  --val test_cifar10.list --loss categorical_crossentropy --epochs 5
+mpirun -np 3 TrainingDriver --model cifar10 --loss categorical_crossentropy --epochs 3 --trial-name n3g1epoch3 --train_data /path/to/train_cifar10.list --val_data /path/to/test_cifar10.list
+jsrun -n 3 -g 1 TrainingDriver --model cifar10 --loss categorical_crossentropy --epochs 3 --trial-name n3g1epoch3 --train_data /path/to/train_cifar10.list --val_data /path/to/test_cifar10.list
 ```
 
 Example of training mnist with 2 workers, each with 2 process per Horovod ring
 ```
-mpirun -np 5 --tag-output python3 TrainingDriver.py --model examples/example_mnist.py --loss categorical_crossentropy --epochs 3 --n-processes 2
+mpirun -np 5 python3 TrainingDriver.py --model examples/example_mnist.py --loss categorical_crossentropy --epochs 3 --n-processes 2
 ```
 
 Example of training mnist with early stopping
 ```
-mpirun -np 3 --tag-output python3 TrainingDriver.py --model examples/example_mnist.py --loss categorical_crossentropy --epochs 10000 --early "val_loss,~<,4"
+mpirun -np 3 python3 TrainingDriver.py --model examples/example_mnist.py --loss categorical_crossentropy --epochs 10000 --early "val_loss,~<,4"
 ```
 
 Example of training with a fixed target
 ```
-mpirun -np 3 --tag-output python3 TrainingDriver.py --model examples/example_mnist.py --loss categorical_crossentropy --epochs 10000 --target-metric "val_acc,>,0.97"
+mpirun -np 3 python3 TrainingDriver.py --model examples/example_mnist.py --loss categorical_crossentropy --epochs 10000 --target-metric "val_acc,>,0.97"
 ```
 
 ## GAN Examples (experimental)
diff --git a/examples/example_mnist.py b/examples/example_mnist.py
deleted file mode 100644
index a6b11bd..0000000
--- a/examples/example_mnist.py
+++ /dev/null
@@ -1,39 +0,0 @@
-from models.Models import make_mnist_model
-
-get_model = make_mnist_model
-def get_name():
-    return 'mnist'
-
-def get_all():
-    import socket,os,glob
-    host = os.environ.get('HOST',os.environ.get('HOSTNAME',socket.gethostname()))
-
-    if 'daint' in host:
-        all_list = glob.glob('/scratch/snx3000/vlimant/data/mnist/*.h5')
-    elif 'titan' in host:
-        all_list = glob.glob('/ccs/proj/csc291/DATA/mnist/*.h5')
-    else:
-        all_list = glob.glob('/bigdata/shared/mnist/*.h5')
-    if not all_list:
-        all_list = glob.glob('mnist_*.h5')
-    return all_list
-    
-def get_train():
-    all_list = get_all()
-    l = int( len(all_list)*0.70)
-    train_list = all_list[:l]
-    return train_list
-
-def get_val():
-    all_list = get_all()
-    l = int( len(all_list)*0.70)
-    val_list = all_list[l:]
-    return val_list
-
-def get_features():
-    #return ('features', lambda x: x) ##example of data adaptor
-    return 'features'
-
-def get_labels():
-    return 'labels'
-    
diff --git a/examples/example_mnist_torch.py b/examples/example_mnist_torch.py
deleted file mode 100644
index af916c9..0000000
--- a/examples/example_mnist_torch.py
+++ /dev/null
@@ -1,5 +0,0 @@
-from models.Models import make_mnist_torch_model
-from examples.example_mnist import *
-
-get_model = make_mnist_torch_model
-
diff --git a/models/BuildModel.py b/models/BuildModel.py
deleted file mode 100644
index 1529ca7..0000000
--- a/models/BuildModel.py
+++ /dev/null
@@ -1,50 +0,0 @@
-### Builds one of the available models.  
-# Saves model architecture to <model_name>_arch.json
-# and model weights to <model_name>_weights.h5
-
-import os
-os.environ['CUDA_VISIBLE_DEVICES']=""
-import argparse
-
-from Models import make_model
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser()
-    parser.add_argument('model_name', help='model to construct')
-    parser.add_argument('model_args', nargs='*', help='key=value to pass to the model',default=[])
-    args = parser.parse_args()
-    model_name = args.model_name
-    model_args = {}
-    for kw in args.model_args:
-        k,v = kw.split('=')
-        try:
-            v = int(v)
-        except:
-            v= float(v)
-        model_args[k] = v
-    if model_args:
-        print ("passing",model_args,"to the model builder")
-        model = make_model( model_name ,**model_args)
-    else:
-        model = make_model( model_name)
-    weights_filename = "%s_weights.h5" % model_name
-    arch_filename = "%s_arch.json" % model_name
-
-    if not "torch" in model_name:
-        model.summary()
-        model.save_weights( weights_filename, overwrite=True )
-        print ("Saved model weights to {0}".format(weights_filename))
-
-        model_arch = model.to_json()
-        with open( arch_filename, 'w' ) as arch_file:
-            arch_file.write( model_arch )
-        print ("Saved model architecture to {0}".format(arch_filename))
-    else:
-        import torch
-        weights_filename = weights_filename.replace('h5','torch')
-        arch_filename = arch_filename.replace('json','torch')
-        torch.save(model.state_dict(), weights_filename)
-        print ("Saved model weights to {0}".format(weights_filename))
-        torch.save(model, arch_filename)
-        print ("Saved model architecture to {0}".format(arch_filename))
-                        
diff --git a/models/Models.py b/models/Models.py
deleted file mode 100644
index 734f784..0000000
--- a/models/Models.py
+++ /dev/null
@@ -1,247 +0,0 @@
-### Predefined Keras models
-
-import sys
-import logging
-
-def model_function(model_name):
-    """Constructs the Keras model indicated by model_name"""
-    model_maker_dict = {
-            'example':make_example_model,
-            'mnist':make_mnist_model,
-            'cifar10':make_cifar10_model,
-            'mnist_torch':make_mnist_torch_model,
-            'topclass': make_topclass_model,
-            'topclass_torch':make_topclass_torch_model
-        
-            }
-    return model_maker_dict[model_name]    
-def make_model(model_name, **args):
-    m_fn = model_function(model_name)
-    if args and hasattr(m_fn,'parameter_range'):
-        provided = set(args.keys())
-        accepted = set([a.name for a in m_fn.parameter_range])
-        if not provided.issubset( accepted ):
-            logging.error("provided arguments {} do not match the accepted ones {}".format(sorted(provided),sorted(accepted)))
-            sys.exit(-1)
-    return model_function(model_name)(**args)
-
-def make_example_model():
-    """Example model from keras documentation"""
-    from keras.models import Sequential
-    from keras.layers import Dense, Activation
-    model = Sequential()
-    model.add(Dense(output_dim=64, input_dim=100))
-    model.add(Activation("relu"))
-    model.add(Dense(output_dim=10))
-    model.add(Activation("softmax"))
-    return model
-
-def make_topclass_model(**args):
-    from keras.models import Sequential, Model
-    from keras.layers import Dense, Activation, Dropout, Flatten, Input, Permute
-    from keras.layers import Convolution2D, MaxPooling2D, Conv2D
-    if args:logging.debug("receiving arguments {}".format(args))
-    conv_layers=args.get('conv_layers',2)
-    dense_layers=args.get('dense_layers',2)
-    dropout=args.get('dropout',0.2)
-    kernel = args.get('kernel_size',3)
-    classes=3
-    in_channels=5
-    in_ch = in_channels
-    ## the trace in the input file is 750, 150, 94, 5
-    input = Input( (150,94,in_ch))
-    ## convs
-    c = input
-    for i in range(conv_layers):
-        channel_in = in_ch*((i+1)%5)
-        channel_out = in_ch*((i+2)%5)
-        if channel_in == 0: channel_in += 1
-        if channel_out == 0: channel_out += 1
-        c = Conv2D( filters=channel_out, kernel_size=(kernel,kernel) , strides=1, padding="same", activation = 'relu') (c)
-    c = Conv2D(1, (kernel,kernel), activation = 'relu',strides=2, padding="same")(c)
-
-    ## pooling
-    pool = args.get('pool', 10)
-    m  = MaxPooling2D((pool,pool))(c)
-    f = Flatten()(m)
-    d = f
-    base = args.get('hidden_factor',5)*100
-    for i in range(dense_layers):
-        N = int(base//(2**(i+1)))
-        d = Dense( N, activation='relu')(d)
-        if dropout:
-            d = Dropout(dropout)(d)
-    o = Dense(classes, activation='softmax')(d)
-
-    model = Model(inputs=input, outputs=o)
-    #model.summary()
-    return model
-
-def make_cifar10_model(**args):
-    from keras.models import Sequential, Model
-    from keras.layers import Dense, Activation, Dropout, Flatten, Input, Permute
-    from keras.layers import Convolution2D, MaxPooling2D, Conv2D
-    import keras.backend as K
-    if args:logging.debug("receiving arguments {}".format(args))
-    nb_classes = 10
-    img_rows, img_cols = 32, 32
-    
-    # use 1 kernel size for all convolutional layers
-    ks = args.get('kernel_size', 3)
-    
-    # tune the number of filters for each convolution layer
-    nb_filters1 = args.get('nb_filters1', 48)
-    nb_filters2 = args.get('nb_filters2', 96)
-    nb_filters3 = args.get('nb_filters3', 192)
-    
-    # tune the pool size once
-    ps = args.get('pool_size', 2)
-    pool_size = (ps,ps)
-    
-    # tune the dropout rates independently
-    do4 = args.get('dropout1', 0.25)
-    do5 = args.get('dropout2', 0.5)
-    
-    # tune the dense layers independently
-    dense1 = args.get('dense1', 512)
-    dense2 = args.get('dense2', 256)
-    
-    if K.image_dim_ordering() == 'th':
-        input_shape = (3, img_rows, img_cols)
-    else:
-        input_shape = (img_rows, img_cols, 3)
-
-    #act = 'sigmoid'
-    act = 'relu'
-        
-    i = Input( input_shape)
-    l = Conv2D(nb_filters1,( ks, ks), padding='same', activation = act)(i)
-    l = MaxPooling2D(pool_size=pool_size)(l)
-    #l = Dropout(do1)(l)
-
-    l = Conv2D(nb_filters2, (ks, ks), padding='same',activation=act)(l)
-    #l = Conv2D(nb_filters2, (ks, ks))(l)
-    l = MaxPooling2D(pool_size=pool_size)(l)
-    #l = Dropout(do2)(l)
-
-    l = Conv2D(nb_filters3, (ks, ks), padding='same',activation=act)(l)
-    #l = Conv2D(nb_filters3, (ks, ks))(l)
-    l = MaxPooling2D(pool_size=pool_size)(l)
-    #l = Dropout(do3)(l)
-
-    l = Flatten()(l)
-    l = Dense(dense1,activation=act)(l)
-    l = Dropout(do4)(l)
-    l = Dense(dense2,activation=act)(l)
-    l =Dropout(do5)(l)
-    
-    o = Dense(nb_classes, activation='softmax')(l)
-
-    model = Model(inputs=i, outputs=o)
-    #model.summary()
-    
-    return model
-
-def make_mnist_model(**args):
-    from keras.models import Sequential, Model
-    from keras.layers import Dense, Activation, Dropout, Flatten, Input, Permute
-    from keras.layers import Convolution2D, MaxPooling2D, Conv2D
-    import keras.backend as K
-    """MNIST ConvNet from keras/examples/mnist_cnn.py"""
-    #np.random.seed(1337)  # for reproducibility
-    if args:logging.debug("receiving arguments {}".format(args))
-    nb_classes = 10
-    # input image dimensions
-    img_rows, img_cols = 28, 28
-    # number of convolutional filters to use
-    nb_filters = args.get('nb_filters',32)
-    # size of pooling area for max pooling
-    ps = args.get('pool_size',2)
-    
-    # convolution kernel size
-    ks = args.get('kernel_size',3)
-    do = args.get('dropout', 0.25)
-    dense = args.get('dense', 128)
-
-    pool_size = (ps,ps)
-    if K.image_dim_ordering() == 'th':
-        input_shape = (1, img_rows, img_cols)
-    else:
-        input_shape = (img_rows, img_cols, 1)
-    model = Sequential()
-    model.add(Convolution2D(nb_filters, (ks, ks),
-                            border_mode='valid',
-                            input_shape=input_shape))
-    model.add(Activation('relu'))
-    model.add(Convolution2D(nb_filters, (ks, ks)))
-    model.add(Activation('relu'))
-    model.add(MaxPooling2D(pool_size=pool_size))
-    model.add(Dropout(do))
-    model.add(Flatten())
-    model.add(Dense(dense))
-    model.add(Activation('relu'))
-    model.add(Dropout(do))
-    model.add(Dense(nb_classes))
-    model.add(Activation('softmax'))
-    return model
-
-def make_mnist_torch_model(**args):
-    if args:logging.debug("receiving arguments {}".format(args))
-    try:
-        from TorchModels import MNistNet
-    except:
-        from .TorchModels import MNistNet
-    model = MNistNet(**args)
-    return model
-
-def make_topclass_torch_model(**args):
-    if args:logging.debug("receiving arguments {}".format(args))
-    conv_layers=args.get('conv_layers',2)
-    dense_layers=args.get('dense_layers',2)
-    dropout=args.get('dropout',0.5)
-    classes=3
-    in_channels=5
-    try:
-        from TorchModels import CNN
-    except:
-        from .TorchModels import CNN
-    model = CNN(conv_layers=conv_layers, dense_layers=dense_layers, dropout=dropout, classes=classes, in_channels=in_channels)
-    return model
-
-try:
-    from skopt.space import Real, Integer, Categorical
-    make_mnist_model.parameter_range =     [
-        Integer(10,50, name='nb_filters'),
-        Integer(2,10, name='pool_size'),
-        Integer(2,10, name='kernel_size'),
-        Integer(50,200, name='dense'),
-        Real(0.0, 1.0, name='dropout')
-    ]
-    make_mnist_torch_model.parameter_range = [
-        Integer(2,10, name='kernel_size'),
-        Integer(50,200, name='dense'),
-        Real(0.0, 1.0, name='dropout')
-    ]
-    make_topclass_model.parameter_range =   [
-        Integer(1,6, name='conv_layers'),
-        Integer(1,6, name='dense_layers'),
-        Integer(1,6, name='kernel_size'),
-        Real(0.0, 1.0, name='dropout')
-    ]
-    make_topclass_torch_model.parameter_range =    [
-        Integer(1,6, name='conv_layers'),
-        Integer(1,6, name='dense_layers'),
-        Real(0.0,1.0, name='dropout')
-    ]
-    make_cifar10_model.parameter_range = [
-        Integer(10,300, name='nb_filters1'),
-        Integer(10,300, name='nb_filters2'),
-        Integer(10,300, name='nb_filters3'),
-        Integer(50,1000, name='dense1'),
-        Integer(50,1000, name='dense2'),
-        Real(0.0, 1.0, name='dropout1'),
-        Real(0.0, 1.0, name='dropout2')
-    ]
-except:
-    pass
-
diff --git a/models/get_3d.py b/models/get_3d.py
deleted file mode 100644
index 53a4a7b..0000000
--- a/models/get_3d.py
+++ /dev/null
@@ -1,91 +0,0 @@
-import os
-import glob
-try:
-    import h5py
-    pass
-except:
-    print ("hum")
-import numpy as np
-import sys
-
-def get_data(datafile):
-    #get data for training
-    #print ('Loading Data from .....', datafile)
-    f=h5py.File(datafile,'r')
-    y=f.get('target')
-    X=np.array(f.get('ECAL'))
-    y=(np.array(y[:,1]))
-    X[X < 1e-4] = 0
-    X = np.expand_dims(X, axis=-1)
-    X = X.astype(np.float32)
-    y = y.astype(np.float32)
-    y = y/100.
-    ecal = np.squeeze(np.sum(X, axis=(1, 2, 3)))
-    print (X.shape)
-    print (y.shape)
-    print (ecal.shape)
-
-    f.close()
-    return X, y, ecal
-
-dest='/data/shared/3DGAN/'
-import socket
-host = os.environ.get('HOST', os.environ.get('HOSTNAME',socket.gethostname()))
-if 'daint' in host:
-    dest='/scratch/snx3000/vlimant/3DGAN/'
-if 'titan' in host:
-    dest='/ccs/proj/csc291/DATA/3DGAN/'
-
-sub_split = int(sys.argv[1]) if len(sys.argv)>1 else 1
-                                          
-for F in glob.glob('/bigdata/shared/LCD/NewV1/*scan/*.h5'):
-    _,d,f = F.rsplit('/',2)
-    if not 'Ele' in d: continue
-    X = None
-    if sub_split==1:
-        nf = '%s/%s_%s.h5'%( dest,d,f)
-        if os.path.isfile( nf) :
-            continue
-        print ("processing files",F,"into",nf)
-        if X is None:
-            X,y,ecal = get_data(F)
-        o = h5py.File(nf,'w')
-        o['X'] = X
-        o.create_group("y")
-        o['y']['a'] = np.ones(y.shape)
-        o['y']['b'] = y
-        o['y']['c'] = ecal
-        o.close()        
-    else:
-        for sub in range(sub_split):
-            nf = '%s/%s_%s_sub%s.h5'%(dest, d,f,sub)
-            if os.path.isfile( nf) :
-                continue
-            print ("processing files",F,"into",nf)
-            if X is None:
-                X,y,ecal = get_data(F)
-                N = X.shape[0]
-                splits = [i*N/sub_split for i in range(sub_split)]+[-1]
-            o = h5py.File(nf,'w')
-            o['X'] = X[splits[sub]:splits[sub+1],...]
-            o.create_group("y")
-            o['y']['a'] = np.ones(y[splits[sub]:splits[sub+1],...].shape)
-            o['y']['b'] = y[splits[sub]:splits[sub+1],...]
-            o['y']['c'] = ecal[splits[sub]:splits[sub+1],...]
-            o.close()
-            X = None
-
-if sub_split == 1:
-    sub_files = lambda f:not 'sub' in f
-else:
-    sub_files = lambda f:'sub' in f
-
-open('train_3d.list','w').write( '\n'.join(filter(sub_files,glob.glob(dest+'/*.h5')[:-4])))
-open('test_3d.list','w').write( '\n'.join(filter(sub_files,glob.glob(dest+'/*.h5')[-4:])))
-
-open('train_small_3d.list','w').write( '\n'.join(filter(sub_files,glob.glob(dest+'/*.h5')[:-4])))
-open('test_small_3d.list','w').write( '\n'.join(filter(sub_files,glob.glob(dest+'/*.h5')[-4:])))
-
-open('train_7_3d.list','w').write( '\n'.join(filter(sub_files,glob.glob(dest+'/*.h5')[:7])))
-open('test_1_3d.list','w').write( '\n'.join(filter(sub_files,glob.glob(dest+'/*.h5')[-1:])))
-    
diff --git a/models/get_cifar10.py b/models/get_cifar10.py
deleted file mode 100644
index bebcb8a..0000000
--- a/models/get_cifar10.py
+++ /dev/null
@@ -1,53 +0,0 @@
-### This script downloads the cifar10 dataset, unpacks it, splits it into four pieces, and saves 
-# each piece in a separate h5 file.
-
-from numpy import array_split
-from keras.datasets import cifar10
-from keras.utils import np_utils
-from keras import backend as K
-import h5py
-import sys
-
-(X_train, Y_train), (X_test, Y_test) = cifar10.load_data()
-
-img_rows = 32
-img_cols = 32
-if K.image_dim_ordering() == 'th':
-    X_train = X_train.reshape(X_train.shape[0], 3, img_rows, img_cols)
-    X_test = X_test.reshape(X_test.shape[0], 3, img_rows, img_cols)
-    input_shape = (3, img_rows, img_cols)
-else:
-    X_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, 3)
-    X_test = X_test.reshape(X_test.shape[0], img_rows, img_cols, 3)
-    input_shape = (img_rows, img_cols, 3)
-
-num_train_pieces = int(sys.argv[1]) if len(sys.argv)>1 else 24
-num_test_pieces = int(sys.argv[2]) if len(sys.argv)>1 else 4
-split_X_train = [ X.astype('float32') / 255 for X in array_split(X_train, num_train_pieces) ]
-split_Y_train = [ np_utils.to_categorical(Y,10) for Y in array_split(Y_train, num_train_pieces) ]
-split_X_test = [ X.astype('float32') / 255 for X in array_split(X_test, num_test_pieces) ]
-split_Y_test = [ np_utils.to_categorical(Y,10) for Y in array_split(Y_test, num_test_pieces) ]
-
-train_list = []
-for i in range(num_train_pieces):
-    train_name = "cifar10_train_%d.h5" % i
-    train_list.append(train_name+"\n")
-    train_outfile = h5py.File( train_name, 'w' )
-    train_outfile.create_dataset( "features", data=split_X_train[i] )
-    train_outfile.create_dataset( "labels", data=split_Y_train[i] )
-    train_outfile.close()
-with open('train_cifar10.list', 'w') as train_list_file:
-    for f in train_list:
-        train_list_file.write(f)
-
-test_list = []
-for i in range(num_test_pieces):
-    test_name = "cifar10_test_%d.h5" % i
-    test_list.append(test_name+"\n")
-    test_outfile = h5py.File( test_name, 'w' )
-    test_outfile.create_dataset( "features", data=split_X_test[i] )
-    test_outfile.create_dataset( "labels", data=split_Y_test[i] )
-    test_outfile.close()
-with open('test_cifar10.list', 'w') as test_list_file:
-    for f in test_list:
-        test_list_file.write(f)
diff --git a/models/get_mnist.py b/models/get_mnist.py
deleted file mode 100644
index b5b2a14..0000000
--- a/models/get_mnist.py
+++ /dev/null
@@ -1,53 +0,0 @@
-### This script downloads the MNIST dataset, unpacks it, splits it into four pieces, and saves 
-# each piece in a separate h5 file.
-
-from numpy import array_split
-from keras.datasets import mnist
-from keras.utils import np_utils
-from keras import backend as K
-import h5py
-import sys
-
-(X_train, Y_train), (X_test, Y_test) = mnist.load_data()
-
-img_rows = 28
-img_cols = 28
-if K.image_dim_ordering() == 'th':
-    X_train = X_train.reshape(X_train.shape[0], 1, img_rows, img_cols)
-    X_test = X_test.reshape(X_test.shape[0], 1, img_rows, img_cols)
-    input_shape = (1, img_rows, img_cols)
-else:
-    X_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, 1)
-    X_test = X_test.reshape(X_test.shape[0], img_rows, img_cols, 1)
-input_shape = (img_rows, img_cols, 1)
-
-num_train_pieces = int(sys.argv[1]) if len(sys.argv)>1 else 24
-num_test_pieces = int(sys.argv[2]) if len(sys.argv)>1 else 4
-split_X_train = [ X.astype('float32') / 255 for X in array_split(X_train, num_train_pieces) ]
-split_Y_train = [ np_utils.to_categorical(Y,10) for Y in array_split(Y_train, num_train_pieces) ]
-split_X_test = [ X.astype('float32') / 255 for X in array_split(X_test, num_test_pieces) ]
-split_Y_test = [ np_utils.to_categorical(Y,10) for Y in array_split(Y_test, num_test_pieces) ]
-
-train_list = []
-for i in range(num_train_pieces):
-    train_name = "mnist_train_%d.h5" % i
-    train_list.append(train_name+"\n")
-    train_outfile = h5py.File( train_name, 'w' )
-    train_outfile.create_dataset( "features", data=split_X_train[i] )
-    train_outfile.create_dataset( "labels", data=split_Y_train[i] )
-    train_outfile.close()
-with open('train_mnist.list', 'w') as train_list_file:
-    for f in train_list:
-        train_list_file.write(f)
-
-test_list = []
-for i in range(num_test_pieces):
-    test_name = "mnist_test_%d.h5" % i
-    test_list.append(test_name+"\n")
-    test_outfile = h5py.File( test_name, 'w' )
-    test_outfile.create_dataset( "features", data=split_X_test[i] )
-    test_outfile.create_dataset( "labels", data=split_Y_test[i] )
-    test_outfile.close()
-with open('test_mnist.list', 'w') as test_list_file:
-    for f in test_list:
-        test_list_file.write(f)
diff --git a/models/get_topclass.py b/models/get_topclass.py
deleted file mode 100644
index f3e1998..0000000
--- a/models/get_topclass.py
+++ /dev/null
@@ -1,25 +0,0 @@
-import os
-import glob
-import sys
-
-dest='/bigdata/shared/LCDJets_Abstract_IsoLep_lt_20'
-import socket
-host = os.environ.get('HOST', os.environ.get('HOSTNAME',socket.gethostname()))
-if 'titan' in host:
-    dest='/ccs/proj/csc291/DATA/LCDJets_Abstract_IsoLep_lt_20'
-train = glob.glob(dest+'/train/*.h5')
-test  = glob.glob(dest+'/val/*.h5')
-
-N=10
-Nt=N/5
-if len(sys.argv)>=1:
-    a = sys.argv[1]
-    if a.isdigit():
-        N = int(a)
-        Nt=N/5            
-    else:
-        N,Nt = map(int, a.split(','))
-
-
-open('train_topclass.list','w').write( '\n'.join(sorted( train[:N] )))
-open('test_topclass.list','w').write( '\n'.join(sorted( test[:Nt] )))
diff --git a/nnlo/data/__init__.py b/nnlo/data/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/nnlo/data/get_3d.py b/nnlo/data/get_3d.py
new file mode 100644
index 0000000..bd8b4fa
--- /dev/null
+++ b/nnlo/data/get_3d.py
@@ -0,0 +1,92 @@
+import os
+import glob
+import logging
+try:
+    import h5py
+    pass
+except:
+    logging.info("import h5py failed")
+import numpy as np
+import sys
+
+def get_data(datafile):
+    #get data for training
+    f=h5py.File(datafile,'r')
+    y=f.get('target')
+    X=np.array(f.get('ECAL'))
+    y=(np.array(y[:,1]))
+    X[X < 1e-4] = 0
+    X = np.expand_dims(X, axis=-1)
+    X = X.astype(np.float32)
+    y = y.astype(np.float32)
+    y = y/100.
+    ecal = np.squeeze(np.sum(X, axis=(1, 2, 3)))
+    logging.info("X shape {}; y shape {}; ecal shape {}".format(str(X.shape)), str(y.shape), str(ecal.shape))
+
+    f.close()
+    return X, y, ecal
+
+def main():
+    dest='/data/shared/3DGAN/'
+    import socket
+    host = os.environ.get('HOST', os.environ.get('HOSTNAME',socket.gethostname()))
+    if 'daint' in host:
+        dest='/scratch/snx3000/vlimant/3DGAN/'
+    if 'titan' in host:
+        dest='/ccs/proj/csc291/DATA/3DGAN/'
+    
+    sub_split = int(sys.argv[1]) if len(sys.argv)>1 else 1
+                                              
+    for F in glob.glob('/bigdata/shared/LCD/NewV1/*scan/*.h5'):
+        _,d,f = F.rsplit('/',2)
+        if not 'Ele' in d: continue
+        X = None
+        if sub_split==1:
+            nf = '%s/%s_%s.h5'%( dest,d,f)
+            if os.path.isfile( nf) :
+                continue
+            logging.info("processing files {} into {}".format(F,nf))
+            if X is None:
+                X,y,ecal = get_data(F)
+            o = h5py.File(nf,'w')
+            o['X'] = X
+            o.create_group("y")
+            o['y']['a'] = np.ones(y.shape)
+            o['y']['b'] = y
+            o['y']['c'] = ecal
+            o.close()        
+        else:
+            for sub in range(sub_split):
+                nf = '%s/%s_%s_sub%s.h5'%(dest, d,f,sub)
+                if os.path.isfile( nf) :
+                    continue
+                logging.info("processing files {} into {}".format(F,nf))
+                if X is None:
+                    X,y,ecal = get_data(F)
+                    N = X.shape[0]
+                    splits = [i*N/sub_split for i in range(sub_split)]+[-1]
+                o = h5py.File(nf,'w')
+                o['X'] = X[splits[sub]:splits[sub+1],...]
+                o.create_group("y")
+                o['y']['a'] = np.ones(y[splits[sub]:splits[sub+1],...].shape)
+                o['y']['b'] = y[splits[sub]:splits[sub+1],...]
+                o['y']['c'] = ecal[splits[sub]:splits[sub+1],...]
+                o.close()
+                X = None
+
+    if sub_split == 1:
+        sub_files = lambda f:not 'sub' in f
+    else:
+        sub_files = lambda f:'sub' in f
+    
+    open('train_3d.list','w').write( '\n'.join(filter(sub_files,glob.glob(dest+'/*.h5')[:-4])))
+    open('test_3d.list','w').write( '\n'.join(filter(sub_files,glob.glob(dest+'/*.h5')[-4:])))
+    
+    open('train_small_3d.list','w').write( '\n'.join(filter(sub_files,glob.glob(dest+'/*.h5')[:-4])))
+    open('test_small_3d.list','w').write( '\n'.join(filter(sub_files,glob.glob(dest+'/*.h5')[-4:])))
+    
+    open('train_7_3d.list','w').write( '\n'.join(filter(sub_files,glob.glob(dest+'/*.h5')[:7])))
+    open('test_1_3d.list','w').write( '\n'.join(filter(sub_files,glob.glob(dest+'/*.h5')[-1:])))
+    
+if __name__ == '__main__':
+    main()
diff --git a/nnlo/data/get_cifar10.py b/nnlo/data/get_cifar10.py
new file mode 100644
index 0000000..82425bf
--- /dev/null
+++ b/nnlo/data/get_cifar10.py
@@ -0,0 +1,57 @@
+### This script downloads the cifar10 dataset, unpacks it, splits it into four pieces, and saves 
+# each piece in a separate h5 file.
+
+from numpy import array_split
+from tensorflow.keras.datasets import cifar10
+from tensorflow.python.keras.utils import np_utils
+from tensorflow.python.keras import backend as K
+import h5py
+import os
+
+def main(argv):
+    (X_train, Y_train), (X_test, Y_test) = cifar10.load_data()
+    
+    img_rows = 32
+    img_cols = 32
+    if K.image_data_format() == 'channels_first':
+        X_train = X_train.reshape(X_train.shape[0], 3, img_rows, img_cols)
+        X_test = X_test.reshape(X_test.shape[0], 3, img_rows, img_cols)
+        input_shape = (3, img_rows, img_cols)
+    else:
+        X_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, 3)
+        X_test = X_test.reshape(X_test.shape[0], img_rows, img_cols, 3)
+        input_shape = (img_rows, img_cols, 3)
+    
+    num_train_pieces = int(argv[1]) if len(argv)>1 else 24
+    num_test_pieces = int(argv[2]) if len(argv)>1 else 4
+    split_X_train = [ X.astype('float32') / 255 for X in array_split(X_train, num_train_pieces) ]
+    split_Y_train = [ np_utils.to_categorical(Y,10) for Y in array_split(Y_train, num_train_pieces) ]
+    split_X_test = [ X.astype('float32') / 255 for X in array_split(X_test, num_test_pieces) ]
+    split_Y_test = [ np_utils.to_categorical(Y,10) for Y in array_split(Y_test, num_test_pieces) ]
+    
+    train_list = []
+    for i in range(num_train_pieces):
+        train_name = f"{os.getcwd()}/cifar10_train_%d.h5" % i
+        train_list.append(train_name+"\n")
+        train_outfile = h5py.File( train_name, 'w' )
+        train_outfile.create_dataset( "features", data=split_X_train[i] )
+        train_outfile.create_dataset( "labels", data=split_Y_train[i] )
+        train_outfile.close()
+    with open('train_cifar10.list', 'w') as train_list_file:
+        for f in train_list:
+            train_list_file.write(f)
+    
+    test_list = []
+    for i in range(num_test_pieces):
+        test_name = f"{os.getcwd()}/cifar10_test_%d.h5" % i
+        test_list.append(test_name+"\n")
+        test_outfile = h5py.File( test_name, 'w' )
+        test_outfile.create_dataset( "features", data=split_X_test[i] )
+        test_outfile.create_dataset( "labels", data=split_Y_test[i] )
+        test_outfile.close()
+    with open('test_cifar10.list', 'w') as test_list_file:
+        for f in test_list:
+            test_list_file.write(f)
+
+if __name__ == '__main__':
+    main()
diff --git a/nnlo/data/get_mnist.py b/nnlo/data/get_mnist.py
new file mode 100644
index 0000000..93675f0
--- /dev/null
+++ b/nnlo/data/get_mnist.py
@@ -0,0 +1,57 @@
+### This script downloads the MNIST dataset, unpacks it, splits it into four pieces, and saves 
+# each piece in a separate h5 file.
+
+from numpy import array_split
+from tensorflow.keras.datasets import mnist
+from tensorflow.python.keras.utils import np_utils
+from tensorflow.python.keras import backend as K
+import h5py
+import os
+
+def main(argv):
+    (X_train, Y_train), (X_test, Y_test) = mnist.load_data()
+    
+    img_rows = 28
+    img_cols = 28
+    if K.image_data_format() == 'channels_first':
+        X_train = X_train.reshape(X_train.shape[0], 1, img_rows, img_cols)
+        X_test = X_test.reshape(X_test.shape[0], 1, img_rows, img_cols)
+        input_shape = (1, img_rows, img_cols)
+    else:
+        X_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, 1)
+        X_test = X_test.reshape(X_test.shape[0], img_rows, img_cols, 1)
+        input_shape = (img_rows, img_cols, 1)
+    
+    num_train_pieces = int(argv[1]) if len(argv)>1 else 24
+    num_test_pieces = int(argv[2]) if len(argv)>1 else 4
+    split_X_train = [ X.astype('float32') / 255 for X in array_split(X_train, num_train_pieces) ]
+    split_Y_train = [ np_utils.to_categorical(Y,10) for Y in array_split(Y_train, num_train_pieces) ]
+    split_X_test = [ X.astype('float32') / 255 for X in array_split(X_test, num_test_pieces) ]
+    split_Y_test = [ np_utils.to_categorical(Y,10) for Y in array_split(Y_test, num_test_pieces) ]
+    
+    train_list = []
+    for i in range(num_train_pieces):
+        train_name = f"{os.getcwd()}/mnist_train_%d.h5" % i
+        train_list.append(train_name+"\n")
+        train_outfile = h5py.File( train_name, 'w' )
+        train_outfile.create_dataset( "features", data=split_X_train[i] )
+        train_outfile.create_dataset( "labels", data=split_Y_train[i] )
+        train_outfile.close()
+    with open('train_mnist.list', 'w') as train_list_file:
+        for f in train_list:
+            train_list_file.write(f)
+    
+    test_list = []
+    for i in range(num_test_pieces):
+        test_name = f"{os.getcwd()}/mnist_test_%d.h5" % i
+        test_list.append(os.getcwd()+test_name+"\n")
+        test_outfile = h5py.File( test_name, 'w' )
+        test_outfile.create_dataset( "features", data=split_X_test[i] )
+        test_outfile.create_dataset( "labels", data=split_Y_test[i] )
+        test_outfile.close()
+    with open('test_mnist.list', 'w') as test_list_file:
+        for f in test_list:
+            test_list_file.write(f)
+
+if __name__ == '__main__':
+    main()
diff --git a/nnlo/data/get_topclass.py b/nnlo/data/get_topclass.py
new file mode 100644
index 0000000..b30c167
--- /dev/null
+++ b/nnlo/data/get_topclass.py
@@ -0,0 +1,29 @@
+import os
+import glob
+import sys
+
+def main():
+    dest='/bigdata/shared/LCDJets_Abstract_IsoLep_lt_20'
+    import socket
+    host = os.environ.get('HOST', os.environ.get('HOSTNAME',socket.gethostname()))
+    if 'titan' in host:
+        dest='/ccs/proj/csc291/DATA/LCDJets_Abstract_IsoLep_lt_20'
+    train = glob.glob(dest+'/train/*.h5')
+    test  = glob.glob(dest+'/val/*.h5')
+    
+    N=10
+    Nt=N/5
+    if len(sys.argv)>=1:
+        a = sys.argv[1]
+        if a.isdigit():
+            N = int(a)
+            Nt=N/5            
+        else:
+            N,Nt = map(int, a.split(','))
+    
+    
+    open('train_topclass.list','w').write( '\n'.join(sorted( train[:N] )))
+    open('test_topclass.list','w').write( '\n'.join(sorted( test[:Nt] )))
+
+if __name__ == '__main__':
+    main()
diff --git a/nnlo/data/getdata.py b/nnlo/data/getdata.py
new file mode 100644
index 0000000..f5db855
--- /dev/null
+++ b/nnlo/data/getdata.py
@@ -0,0 +1,20 @@
+#!/usr/bin/env python
+# Rui Zhang 7.2020
+# rui.zhang@cern.ch
+
+import sys
+
+def main():
+    command = sys.argv[1]
+    argv = sys.argv[2:]
+    if command.lower() == 'mnist':
+        from nnlo.data.get_mnist import main
+        main(argv)
+    elif command.lower() == 'cifar10':
+        from nnlo.data.get_cifar10 import main
+        main(argv)
+    else:
+        raise RuntimeError('Unknown command: {}'.format(command))
+
+if __name__ == '__main__':
+    main()
diff --git a/MPIGDriver.py b/nnlo/driver/MPIGDriver.py
similarity index 99%
rename from MPIGDriver.py
rename to nnlo/driver/MPIGDriver.py
index 52d5e14..cd39854 100755
--- a/MPIGDriver.py
+++ b/nnlo/driver/MPIGDriver.py
@@ -21,7 +21,7 @@
 import socket
 
 
-if __name__ == '__main__':
+def main():
     from TrainingDriver import add_loader_options
     parser = argparse.ArgumentParser()
     parser.add_argument('--verbose',help='display metrics for each training batch',action='store_true')
@@ -162,3 +162,6 @@
 
     comm.Barrier()
     logging.info("Terminating")
+
+if __name__ == '__main__':
+    main()
diff --git a/OptimizationDriver.py b/nnlo/driver/OptimizationDriver.py
similarity index 99%
rename from OptimizationDriver.py
rename to nnlo/driver/OptimizationDriver.py
index 8b6359b..a140971 100755
--- a/OptimizationDriver.py
+++ b/nnlo/driver/OptimizationDriver.py
@@ -105,9 +105,7 @@ def make_opt_parser():
     
     return parser
 
-
-if __name__ == '__main__':
-
+def main():
     logging.info("Process is on {}".format(socket.gethostname()))
     parser = make_opt_parser()
     args = parser.parse_args()
@@ -323,3 +321,6 @@ def make_opt_parser():
                              checkpoint=args.checkpoint,
                              checkpoint_interval=args.checkpoint_interval)
         block.run()
+
+if __name__ == '__main__':
+    main()
diff --git a/TrainingDriver.py b/nnlo/driver/TrainingDriver.py
similarity index 81%
rename from TrainingDriver.py
rename to nnlo/driver/TrainingDriver.py
index cc8a654..01be474 100755
--- a/TrainingDriver.py
+++ b/nnlo/driver/TrainingDriver.py
@@ -12,6 +12,7 @@
 
 from mpi4py import MPI
 from time import time,sleep
+import importlib
 
 from nnlo.mpi.manager import MPIManager, get_device
 from nnlo.train.algo import Algo
@@ -27,6 +28,7 @@ def add_log_option(parser):
     # logging configuration
     parser.add_argument('--log-file', default=None, dest='log_file', help='log file to write, in additon to output stream')
     parser.add_argument('--log-level', default='info', dest='log_level', help='log level (debug, info, warn, error)')
+    parser.add_argument('--output', default='./', dest='output', help='output folder')
 
 def add_master_option(parser):
     parser.add_argument('--master-gpu',help='master process should get a gpu',
@@ -89,13 +91,13 @@ def add_train_options(parser):
     parser.add_argument('--thread_validation', help='run a single process', action='store_true')
     
     # model arguments
-    parser.add_argument('--model', help='File containing model architecture (serialized in JSON/pickle, or provided in a .py file')
+    parser.add_argument('--model', choices=['mnist', 'mnist_torch', 'cifar10', 'cifar10_torch'], help='File containing model architecture (serialized in JSON/pickle, or provided in a .py file')
     parser.add_argument('--trial-name', help='descriptive name for trial', 
             default='train', dest='trial_name')
 
     # training data arguments
-    parser.add_argument('--train_data', help='text file listing data inputs for training', default=None)
-    parser.add_argument('--val_data', help='text file lis`ting data inputs for validation', default=None)
+    parser.add_argument('--train_data', help='text file listing data inputs for training', required=True)
+    parser.add_argument('--val_data', help='text file lis`ting data inputs for validation', required=True)
     parser.add_argument('--features-name', help='name of HDF5 dataset with input features',
             default='features', dest='features_name')
     parser.add_argument('--labels-name', help='name of HDF5 dataset with output labels',
@@ -198,21 +200,11 @@ def make_algo( args, use_tf, comm, validate_every ):
 
 def make_train_val_lists(m_module, args):
     train_list = val_list = []
-    if args.train_data:
-        with open(args.train_data) as train_list_file:
-            train_list = [ s.strip() for s in train_list_file.readlines() ]
-    elif m_module is not None:
-        train_list = m_module.get_train()
-    else:
-        logging.info("no training data provided")
+    with open(args.train_data) as train_list_file:
+        train_list = [ s.strip() for s in train_list_file.readlines() ]
         
-    if args.val_data:
-        with open(args.val_data) as val_list_file:
-            val_list = [ s.strip() for s in val_list_file.readlines() ]
-    elif m_module is not None:
-        val_list = m_module.get_val()
-    else:
-        logging.info("no validation data provided")
+    with open(args.val_data) as val_list_file:
+        val_list = [ s.strip() for s in val_list_file.readlines() ]
 
     if not train_list:
         logging.error("No training data provided")
@@ -220,12 +212,7 @@ def make_train_val_lists(m_module, args):
         logging.error("No validation data provided")
     return (train_list, val_list) 
 
-def make_features_labels(m_module, args):
-    features_name = m_module.get_features() if m_module is not None and hasattr(m_module,"get_features") else args.features_name
-    labels_name = m_module.get_labels() if m_module is not None and hasattr(m_module,"get_labels") else args.labels_name
-    return (features_name, labels_name)
-
-if __name__ == '__main__':
+def main():
     parser = make_train_parser()
     args = parser.parse_args()    
     initialize_logger(filename=args.log_file, file_level=args.log_level, stream_level=args.log_level)
@@ -234,8 +221,21 @@ def make_features_labels(m_module, args):
     if 'torch' in args.model:
         a_backend = 'torch'
         
-    m_module = __import__(args.model.replace('.py','').replace('/', '.'), fromlist=[None]) if '.py' in args.model else None
-    (features_name, labels_name) = make_features_labels(m_module, args)
+    m_module, model_source = None, None
+    try:
+        if args.model == 'mnist':
+            m_module = importlib.import_module(f'nnlo.models.model_mnist_tf')
+            model_source = 'models/model_mnist_tf.py'
+        elif args.model == 'mnist_torch':
+            m_module = importlib.import_module(f'nnlo.models.model_mnist_torch')
+            model_source = 'models/model_mnist_torch.py'
+        elif args.model == 'cifar10':
+            m_module = importlib.import_module(f'nnlo.models.model_cifar10_tf')
+            model_source = 'models/model_cifar10_tf.py'
+    except Exception as e:
+        logging.fatal(e)
+
+    (features_name, labels_name) = args.features_name, args.labels_name
     (train_list, val_list) = make_train_val_lists(m_module, args)
     comm = MPI.COMM_WORLD.Dup()
 
@@ -255,37 +255,42 @@ def make_features_labels(m_module, args):
 
     if use_torch:
         logging.debug("Using pytorch")
-        model_builder = ModelPytorch(comm, source=args.model, weights=model_weights, gpus=1 if 'gpu' in device else 0)
+        model_builder = ModelPytorch(comm, source=model_source, weights=model_weights, gpus=1 if 'gpu' in device else 0)
     else:
         logging.debug("Using TensorFlow")
         os.environ['KERAS_BACKEND'] = 'tensorflow'
 
+        import tensorflow as tf
         import_keras()
-        import keras.backend as K
-        gpu_options=K.tf.GPUOptions(
-            per_process_gpu_memory_fraction=0.1, #was 0.0
-            allow_growth = True,
-            visible_device_list = device[-1] if 'gpu' in device else '')
-        gpu_options=K.tf.GPUOptions(
-            per_process_gpu_memory_fraction=0.0,
-            allow_growth = True,)     
+        #tf.config.gpu.set_per_process_memory_fraction(0.1)
+        #gpu_options=K.tf.GPUOptions(
+        #    per_process_gpu_memory_fraction=0.1, #was 0.0
+        #    allow_growth = True,
+        #    visible_device_list = device[-1] if 'gpu' in device else '')
+        #gpu_options=K.tf.GPUOptions(
+        #    per_process_gpu_memory_fraction=0.0,
+        #    allow_growth = True,)     
+        gpu_devices = tf.config.experimental.list_physical_devices('GPU')
+        for device in gpu_devices:
+            tf.config.experimental.set_memory_growth(device, True)
+
         #NTHREADS=(2,1)
-        NTHREADS=None
-        if NTHREADS is None:
-            K.set_session( K.tf.Session( config=K.tf.ConfigProto(
-                allow_soft_placement=True, log_device_placement=False,
-                gpu_options=gpu_options
-            ) ) )
-        else:
-            K.set_session( K.tf.Session( config=K.tf.ConfigProto(
-                allow_soft_placement=True, log_device_placement=False,
-                gpu_options=gpu_options,
-                intra_op_parallelism_threads=NTHREADS[0], 
-                inter_op_parallelism_threads=NTHREADS[1],
-            ) ) )
+        #NTHREADS=None
+        #if NTHREADS is None:
+        #    K.set_session( K.tf.Session( config=K.tf.ConfigProto(
+        #        allow_soft_placement=True, log_device_placement=False,
+        #        gpu_options=gpu_options
+        #    ) ) )
+        #else:
+        #    K.set_session( K.tf.Session( config=K.tf.ConfigProto(
+        #        allow_soft_placement=True, log_device_placement=False,
+        #        gpu_options=gpu_options,
+        #        intra_op_parallelism_threads=NTHREADS[0], 
+        #        inter_op_parallelism_threads=NTHREADS[1],
+        #    ) ) )
         
 
-        model_builder = ModelTensorFlow( comm, source=args.model, weights=model_weights)
+        model_builder = ModelTensorFlow( comm, source=model_source, weights=model_weights)
 
 
     data = make_loader(args, features_name, labels_name, train_list)
@@ -313,8 +318,8 @@ def make_features_labels(m_module, args):
     else:
         model_name = os.path.basename(args.model).replace('.json','')
 
-    json_name = '_'.join([model_name,args.trial_name,"history.json"])
-    tl_json_name = '_'.join([model_name,args.trial_name,"timeline.json"])
+    json_name = args.output + '/' + '_'.join([model_name,args.trial_name,"history.json"])
+    tl_json_name = args.output + '/' + '_'.join([model_name,args.trial_name,"timeline.json"])
 
     # Process 0 launches the training procedure
     if comm.Get_rank() == 0:
@@ -333,3 +338,6 @@ def make_features_labels(m_module, args):
     comm.barrier()
     logging.info("Terminating")
     if args.timeline: Timeline.collect(clean=True, file_name=tl_json_name)
+
+if __name__ == '__main__':
+    main()
diff --git a/nnlo/driver/__init__.py b/nnlo/driver/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/nnlo/models/__init__.py b/nnlo/models/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/nnlo/models/model_cifar10_tf.py b/nnlo/models/model_cifar10_tf.py
new file mode 100644
index 0000000..7b693c4
--- /dev/null
+++ b/nnlo/models/model_cifar10_tf.py
@@ -0,0 +1,82 @@
+#!/usr/bin/env python
+# Rui Zhang 8.2020
+# rui.zhang@cern.ch
+
+def get_name():
+    return 'cifar10'
+
+def get_model(**args):
+    from tensorflow.keras.models import Sequential, Model
+    from tensorflow.keras.layers import Dense, Activation, Dropout, Flatten, Input, Permute
+    from tensorflow.keras.layers import Convolution2D, MaxPooling2D, Conv2D
+    import tensorflow.keras.backend as K
+    if args:logging.debug("receiving arguments {}".format(args))
+    nb_classes = 10
+    img_rows, img_cols = 32, 32
+    
+    # use 1 kernel size for all convolutional layers
+    ks = args.get('kernel_size', 3)
+    
+    # tune the number of filters for each convolution layer
+    nb_filters1 = args.get('nb_filters1', 48)
+    nb_filters2 = args.get('nb_filters2', 96)
+    nb_filters3 = args.get('nb_filters3', 192)
+    
+    # tune the pool size once
+    ps = args.get('pool_size', 2)
+    pool_size = (ps,ps)
+    
+    # tune the dropout rates independently
+    do4 = args.get('dropout1', 0.25)
+    do5 = args.get('dropout2', 0.5)
+    
+    # tune the dense layers independently
+    dense1 = args.get('dense1', 512)
+    dense2 = args.get('dense2', 256)
+    
+    if K.image_data_format() == 'channels_first':
+        input_shape = (3, img_rows, img_cols)
+    else:
+        input_shape = (img_rows, img_cols, 3)
+
+    #act = 'sigmoid'
+    act = 'relu'
+        
+    i = Input( input_shape)
+    l = Conv2D(nb_filters1,( ks, ks), padding='same', activation = act)(i)
+    l = MaxPooling2D(pool_size=pool_size)(l)
+    #l = Dropout(do1)(l)
+
+    l = Conv2D(nb_filters2, (ks, ks), padding='same',activation=act)(l)
+    #l = Conv2D(nb_filters2, (ks, ks))(l)
+    l = MaxPooling2D(pool_size=pool_size)(l)
+    #l = Dropout(do2)(l)
+
+    l = Conv2D(nb_filters3, (ks, ks), padding='same',activation=act)(l)
+    #l = Conv2D(nb_filters3, (ks, ks))(l)
+    l = MaxPooling2D(pool_size=pool_size)(l)
+    #l = Dropout(do3)(l)
+
+    l = Flatten()(l)
+    l = Dense(dense1,activation=act)(l)
+    l = Dropout(do4)(l)
+    l = Dense(dense2,activation=act)(l)
+    l =Dropout(do5)(l)
+    
+    o = Dense(nb_classes, activation='softmax')(l)
+
+    model = Model(inputs=i, outputs=o)
+    #model.summary()
+    
+    return model
+
+from skopt.space import Real, Integer, Categorical
+get_model.parameter_range = [
+    Integer(10,300, name='nb_filters1'),
+    Integer(10,300, name='nb_filters2'),
+    Integer(10,300, name='nb_filters3'),
+    Integer(50,1000, name='dense1'),
+    Integer(50,1000, name='dense2'),
+    Real(0.0, 1.0, name='dropout1'),
+    Real(0.0, 1.0, name='dropout2')
+]
diff --git a/nnlo/models/model_example_tf.py b/nnlo/models/model_example_tf.py
new file mode 100644
index 0000000..43d8c2e
--- /dev/null
+++ b/nnlo/models/model_example_tf.py
@@ -0,0 +1,17 @@
+#!/usr/bin/env python
+# Rui Zhang 8.2020
+# rui.zhang@cern.ch
+
+def get_name():
+    return 'example'
+
+def get_model(**args):
+    """Example model from keras documentation"""
+    from tensorflow.keras.models import Sequential
+    from tensorflow.keras.layers import Dense, Activation
+    model = Sequential()
+    model.add(Dense(output_dim=64, input_dim=100))
+    model.add(Activation("relu"))
+    model.add(Dense(output_dim=10))
+    model.add(Activation("softmax"))
+    return model
diff --git a/examples/example_hls4mlgru.py b/nnlo/models/model_hls4mlgru.py
similarity index 96%
rename from examples/example_hls4mlgru.py
rename to nnlo/models/model_hls4mlgru.py
index fa168d7..2eb3783 100644
--- a/examples/example_hls4mlgru.py
+++ b/nnlo/models/model_hls4mlgru.py
@@ -1,6 +1,6 @@
 #from keras.activations import relu, selu, elu
-from keras.models import Model, Sequential
-from keras.layers import Dense, Input, GRU, Dropout, Flatten, Permute
+from keras.models import Model
+from keras.layers import Dense, Input, GRU, Dropout, Permute
 import numpy as np
 
 def get_model(**args):
diff --git a/examples/example_jedi_torch.py b/nnlo/models/model_jedi_torch.py
similarity index 99%
rename from examples/example_jedi_torch.py
rename to nnlo/models/model_jedi_torch.py
index df826fc..cb6fa6b 100644
--- a/examples/example_jedi_torch.py
+++ b/nnlo/models/model_jedi_torch.py
@@ -213,7 +213,6 @@ def get_labels():
 
 
 if __name__ == "__main__":
-    print("do the data conversion")
     import glob
     import h5py
     import numpy as np
@@ -230,5 +229,4 @@ def get_labels():
         fo['X'] = X
         fo['Y'] = Y
         fo.close()
-        print(f,"converted")
         
diff --git a/nnlo/models/model_mnist_tf.py b/nnlo/models/model_mnist_tf.py
new file mode 100644
index 0000000..8c15019
--- /dev/null
+++ b/nnlo/models/model_mnist_tf.py
@@ -0,0 +1,59 @@
+#!/usr/bin/env python
+# Rui Zhang 8.2020
+# rui.zhang@cern.ch
+
+def get_name():
+    return 'mnist'
+
+def get_model(**args):
+    from tensorflow.keras.models import Sequential, Model
+    from tensorflow.keras.layers import Dense, Activation, Dropout, Flatten, Input, Permute
+    from tensorflow.keras.layers import Convolution2D, MaxPooling2D, Conv2D
+    import tensorflow.keras.backend as K
+    """MNIST ConvNet from keras/examples/mnist_cnn.py"""
+    #np.random.seed(1337)  # for reproducibility
+    if args:logging.debug("receiving arguments {}".format(args))
+    nb_classes = 10
+    # input image dimensions
+    img_rows, img_cols = 28, 28
+    # number of convolutional filters to use
+    nb_filters = args.get('nb_filters',32)
+    # size of pooling area for max pooling
+    ps = args.get('pool_size',2)
+    
+    # convolution kernel size
+    ks = args.get('kernel_size',3)
+    do = args.get('dropout', 0.25)
+    dense = args.get('dense', 128)
+
+    pool_size = (ps,ps)
+    if K.image_data_format() == 'channels_first':
+        input_shape = (1, img_rows, img_cols)
+    else:
+        input_shape = (img_rows, img_cols, 1)
+    model = Sequential()
+    model.add(Convolution2D(nb_filters, (ks, ks),
+                            padding='valid',
+                            input_shape=input_shape))
+    model.add(Activation('relu'))
+    model.add(Convolution2D(nb_filters, (ks, ks)))
+    model.add(Activation('relu'))
+    model.add(MaxPooling2D(pool_size=pool_size))
+    model.add(Dropout(do))
+    model.add(Flatten())
+    model.add(Dense(dense))
+    model.add(Activation('relu'))
+    model.add(Dropout(do))
+    model.add(Dense(nb_classes))
+    model.add(Activation('softmax'))
+    return model
+
+from skopt.space import Real, Integer, Categorical
+get_model.parameter_range =     [
+    Integer(10,50, name='nb_filters'),
+    Integer(2,10, name='pool_size'),
+    Integer(2,10, name='kernel_size'),
+    Integer(50,200, name='dense'),
+    Real(0.0, 1.0, name='dropout')
+]
+
diff --git a/nnlo/models/model_mnist_torch.py b/nnlo/models/model_mnist_torch.py
new file mode 100644
index 0000000..e0a6df6
--- /dev/null
+++ b/nnlo/models/model_mnist_torch.py
@@ -0,0 +1,45 @@
+#!/usr/bin/env python
+# Rui Zhang 8.2020
+# rui.zhang@cern.ch
+import torch.nn as nn
+import torch.nn.functional as F
+
+def get_name():
+    return 'mnist_torch'
+
+class MNistNet(nn.Module):
+    def __init__(self, **args):
+        super(MNistNet, self).__init__()
+        ks = int(args.get('kernel_size',5))
+        do = float(args.get('dropout',0.5))
+        dense = int(args.get('dense',50))
+        self.conv1 = nn.Conv2d(1, 10, kernel_size=ks)
+        self.conv2 = nn.Conv2d(10, 20, kernel_size=ks)
+        self.conv2_drop = nn.Dropout2d(do)
+        self.fc1 = nn.Linear(320, dense)
+        self.fc2 = nn.Linear(dense, 10)
+
+    def forward(self, x):
+        x = x.permute(0,3,1,2).float()
+        x = F.relu(F.max_pool2d(self.conv1(x), 2))
+        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
+        x = x.view(-1, 320)
+        x = F.relu(self.fc1(x))
+        x = F.dropout(x, training=self.training)
+        x = self.fc2(x)
+        #return F.log_softmax(x, dim=1)
+        #return F.softmax(x)
+        #return F.cross_entropy(x)
+        return x
+
+def get_model(**args):
+    if args:logging.debug("receiving arguments {}".format(args))
+    model = MNistNet(**args)
+    return model
+
+from skopt.space import Real, Integer, Categorical
+get_model.parameter_range = [
+    Integer(2,10, name='kernel_size'),
+    Integer(50,200, name='dense'),
+    Real(0.0, 1.0, name='dropout')
+]
diff --git a/nnlo/models/model_topclass_tf.py b/nnlo/models/model_topclass_tf.py
new file mode 100644
index 0000000..18cc99b
--- /dev/null
+++ b/nnlo/models/model_topclass_tf.py
@@ -0,0 +1,55 @@
+#!/usr/bin/env python
+# Rui Zhang 8.2020
+# rui.zhang@cern.ch
+
+def get_name():
+    return 'topclass'
+
+def get_model(**args):
+    from tensorflow.keras.models import Sequential, Model
+    from tensorflow.keras.layers import Dense, Activation, Dropout, Flatten, Input, Permute
+    from tensorflow.keras.layers import Convolution2D, MaxPooling2D, Conv2D
+    if args:logging.debug("receiving arguments {}".format(args))
+    conv_layers=args.get('conv_layers',2)
+    dense_layers=args.get('dense_layers',2)
+    dropout=args.get('dropout',0.2)
+    kernel = args.get('kernel_size',3)
+    classes=3
+    in_channels=5
+    in_ch = in_channels
+    ## the trace in the input file is 750, 150, 94, 5
+    input = Input( (150,94,in_ch))
+    ## convs
+    c = input
+    for i in range(conv_layers):
+        channel_in = in_ch*((i+1)%5)
+        channel_out = in_ch*((i+2)%5)
+        if channel_in == 0: channel_in += 1
+        if channel_out == 0: channel_out += 1
+        c = Conv2D( filters=channel_out, kernel_size=(kernel,kernel) , strides=1, padding="same", activation = 'relu') (c)
+    c = Conv2D(1, (kernel,kernel), activation = 'relu',strides=2, padding="same")(c)
+
+    ## pooling
+    pool = args.get('pool', 10)
+    m  = MaxPooling2D((pool,pool))(c)
+    f = Flatten()(m)
+    d = f
+    base = args.get('hidden_factor',5)*100
+    for i in range(dense_layers):
+        N = int(base//(2**(i+1)))
+        d = Dense( N, activation='relu')(d)
+        if dropout:
+            d = Dropout(dropout)(d)
+    o = Dense(classes, activation='softmax')(d)
+
+    model = Model(inputs=input, outputs=o)
+    #model.summary()
+    return model
+
+from skopt.space import Real, Integer, Categorical
+get_model.parameter_range = [
+    Integer(1,6, name='conv_layers'),
+    Integer(1,6, name='dense_layers'),
+    Integer(1,6, name='kernel_size'),
+    Real(0.0, 1.0, name='dropout')
+]
diff --git a/models/TorchModels.py b/nnlo/models/model_topclass_torch.py
similarity index 71%
rename from models/TorchModels.py
rename to nnlo/models/model_topclass_torch.py
index fa663e7..6560be9 100644
--- a/models/TorchModels.py
+++ b/nnlo/models/model_topclass_torch.py
@@ -1,9 +1,10 @@
+#!/usr/bin/env python
+# Rui Zhang 8.2020
+# rui.zhang@cern.ch
+
 import torch
 from torch.autograd import Variable
 import torch.nn as nn
-import torch.nn.parallel
-import torch.backends.cudnn as cudnn
-import torch.distributed as dist
 import torch.optim
 import torch.utils.data.distributed
 import torchvision.transforms as transforms
@@ -11,32 +12,7 @@
 import torchvision.models as models
 import torch.nn.functional as F
 import numpy
-
-class MNistNet(nn.Module):
-    def __init__(self, **args):
-        super(MNistNet, self).__init__()
-        ks = int(args.get('kernel_size',5))
-        do = float(args.get('dropout',0.5))
-        dense = int(args.get('dense',50))
-        self.conv1 = nn.Conv2d(1, 10, kernel_size=ks)
-        self.conv2 = nn.Conv2d(10, 20, kernel_size=ks)
-        self.conv2_drop = nn.Dropout2d(do)
-        self.fc1 = nn.Linear(320, dense)
-        self.fc2 = nn.Linear(dense, 10)
-
-    def forward(self, x):
-        x = x.permute(0,3,1,2).float()
-        x = F.relu(F.max_pool2d(self.conv1(x), 2))
-        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
-        x = x.view(-1, 320)
-        x = F.relu(self.fc1(x))
-        x = F.dropout(x, training=self.training)
-        x = self.fc2(x)
-        #return F.log_softmax(x, dim=1)
-        #return F.softmax(x)
-        #return F.cross_entropy(x)
-        return x
-
+import logging
 
 
 ### Build a customized CNN with given hyperparameters
@@ -65,7 +41,7 @@ def __init__(self, dense_layers, dropout ,base):
         for i in range(dense_layers):
             il = int(base//(2**i))
             ol = int(base//(2**(i+1)))
-            print (il,"=>",ol)
+            logging.info("{} =>> {}".format(il,ol))
             self.add_module('denselayer%d'%(i), nn.Linear(il, ol))
             self.add_module('relu%d'%(i), nn.ReLU(inplace=True))
         self.dropout = dropout
@@ -89,7 +65,7 @@ def build_net(self,*args, **kwargs):
         self.adapt_pool = nn.AdaptiveMaxPool2d((base_2,base_2))
         il = int(base//(2**(args[1])))
         ol = int(args[3])
-        print (il,"=>",ol)
+        logging.info("{} =>> {}".format(il,ol))
         self.output = nn.Linear(il, ol)
 
     def forward(self, x):
@@ -101,3 +77,26 @@ def forward(self, x):
         return self.output(x)
             
 
+def get_name():
+    return 'topclass_torch'
+
+def get_model(**args):
+    if args:logging.debug("receiving arguments {}".format(args))
+    conv_layers=args.get('conv_layers',2)
+    dense_layers=args.get('dense_layers',2)
+    dropout=args.get('dropout',0.5)
+    classes=3
+    in_channels=5
+    try:
+        from TorchModels import CNN
+    except:
+        from .TorchModels import CNN
+    model = CNN(conv_layers=conv_layers, dense_layers=dense_layers, dropout=dropout, classes=classes, in_channels=in_channels)
+    return model
+
+from skopt.space import Real, Integer, Categorical
+get_model.parameter_range =    [
+    Integer(1,6, name='conv_layers'),
+    Integer(1,6, name='dense_layers'),
+    Real(0.0,1.0, name='dropout')
+]
diff --git a/nnlo/train/GanModel.py b/nnlo/train/GanModel.py
index 93389d3..c939089 100644
--- a/nnlo/train/GanModel.py
+++ b/nnlo/train/GanModel.py
@@ -1,17 +1,16 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 
-from __future__ import print_function
 from collections import defaultdict
 try:
     import cPickle as pickle
 except ImportError:
     import pickle
-import keras
-from keras.models import Model
-from keras.layers import Input
-from keras import optimizers
-from keras.optimizers import RMSprop,SGD
+import tensorflow.keras as keras
+from tensorflow.keras.models import Model
+from tensorflow.keras.layers import Input
+from tensorflow.keras import optimizers
+from tensorflow.keras.optimizers import RMSprop,SGD
 #from EcalEnergyGan import generator, discriminator
 import numpy as np
 import numpy.core.umath_tests as umath
@@ -23,17 +22,16 @@
 import logging
 
 import keras.backend as K
-from keras.models import Model, Sequential
-from keras.layers import (Input, Dense, Reshape, Flatten, Lambda, merge,
+from tensorflow.keras.models import Model, Sequential
+from tensorflow.keras.layers import (Input, Dense, Reshape, Flatten, Lambda, merge,
                           Dropout, BatchNormalization, Activation, Embedding)
-from keras.layers.advanced_activations import LeakyReLU
-from keras.layers.convolutional import (UpSampling3D, Conv3D, ZeroPadding3D,
+from tensorflow.keras.layers.advanced_activations import LeakyReLU
+from tensorflow.keras.layers.convolutional import (UpSampling3D, Conv3D, ZeroPadding3D,
                                         AveragePooling3D)
 
 from ..train.model import MPIModel, ModelBuilder
 from .optimizer import OptimizerBuilder
 
-import keras
 kv2 = keras.__version__.startswith('2')
 
 def hn():
@@ -46,8 +44,6 @@ def weights(m):
         _weights_names += [ll.name for ll in layer.weights]
     _weights = m.get_weights()
     _disp = [(np.min(s),np.max(s),np.mean(s),np.std(s),s.shape,n) for s,n in zip(_weights,_weights_names)]
-    #for ii,dd in enumerate(_disp):
-    #    print (ii,dd)
 
 def weights_diff( m ,lap=True, init=False,label='', alert=None):#1000.):
     if (weights_diff.old_weights is None) or init:
@@ -62,14 +58,10 @@ def weights_diff( m ,lap=True, init=False,label='', alert=None):#1000.):
     ## make the diffs
     _diffs = [np.subtract(a,b) for (a,b) in zip(check_on_weight,and_check_on_weight)]
     _diffsN = [(np.min(s),np.max(s),np.mean(s),np.std(s),s.shape,n) for s,n in zip(_diffs,_weights_names)]
-    #print ('\n'.join(['%s'%dd for dd in _diffsN]))
     for ii,dd in enumerate(_diffsN):
         if alert:
             if not any([abs(vv) > alert for vv in dd[:3]]):
                 continue
-        #print (ii,'WD %s'%label,dd)
-        #if dd[-2] == (8,):
-        #    print ("\t",_diffs[ii])
     if lap:
         weights_diff.old_weights = m.get_weights()
 
@@ -250,16 +242,11 @@ def get_moments(images, sumsx, sumsy, sumsz, totalE, m):
 def load_sorted(sorted_path):
     sorted_files = sorted(glob.glob(sorted_path))
 
-    #print ("found sorterd files",sorted( sorted_files))
     energies = []
     srt = {}
     for f in sorted_files:
-        #print (f)
-        #energy = int(list(filter(str.isdigit, f))[:-1])
         file_name=f[f.find('sorted_'):-1]
-        #energy = int(''.join(list(filter(str.isdigit, f))[:-1]))
         energy = int(''.join(list(filter(str.isdigit, file_name))[:-1]))*10
-        #print ("found files for energy",energy)
         energies.append(energy)
         srtfile = h5py.File(f,'r')
         srt["events" + str(energy)] = np.array(srtfile.get('ECAL'))
@@ -368,22 +355,16 @@ def __init__(self, **args):
         self.calculate_fom = args.get('calculate_fom',True)
 
         if self.tell:
-            #print ("Generator summary")
-            #self.generator.summary()
-            #print ("Discriminator summary")
-            #self.discriminator.summary()
-            #print ("Combined summary")
-            #self.combined.summary()
             pass
-        if True:
-            if self.with_fixed_disc: print ("the batch norm weights are fixed. heavey weight re-assigning")
-            if self.checkpoint: print ("Checkpointing the model weigths after %d batch, based on the process id"%self.checkpoint)
-            if self._onepass: print ("Training in one pass")
-            if self._reversedorder: print ("will train generator first, then discriminator")
-            if self._heavycheck: print("running heavy check on weight sanity")
-            if self._show_values: print("showing the input values at each batch")
-            if self._show_loss: print("showing the loss at each batch")
-            if self._show_weights: print("showing weights statistics at each batch")
+        #if True:
+        #    if self.with_fixed_disc: print ("the batch norm weights are fixed. heavey weight re-assigning")
+        #    if self.checkpoint: print ("Checkpointing the model weigths after %d batch, based on the process id"%self.checkpoint)
+        #    if self._onepass: print ("Training in one pass")
+        #    if self._reversedorder: print ("will train generator first, then discriminator")
+        #    if self._heavycheck: print("running heavy check on weight sanity")
+        #    if self._show_values: print("showing the input values at each batch")
+        #    if self._show_loss: print("showing the loss at each batch")
+        #    if self._show_weights: print("showing weights statistics at each batch")
 
         MPIModel.__init__(self, models = [
             self.discriminator,
@@ -472,13 +453,10 @@ def big_assemble_models(self):
 
 
     def ext_assemble_models(self):
-        #print('[INFO] Building generator')
         self.generator = generator(self.latent_size, with_bn = self.gen_bn)
-        #print('[INFO] Building discriminator')
         self.discriminator = discriminator(discr_drop_out = self.discr_drop_out)
         if self.with_fixed_disc:
             self.fixed_discriminator = discriminator(discr_drop_out = self.discr_drop_out, fixed_bn=True)
-        #print('[INFO] Building combined')
         latent = Input(shape=(self.latent_size, ), name='combined_z')
         fake_image = self.generator(latent)
         if self.with_fixed_disc:
@@ -494,7 +472,6 @@ def ext_assemble_models(self):
 
     def compile(self, **args):
         ## args are fully ignored here
-        #print('[INFO] IN GAN MODEL: COMPILE')
         if 'optimizer' in args and isinstance(args['optimizer'], OptimizerBuilder):
             opt_builder = args['optimizer']
         else:
@@ -512,7 +489,6 @@ def make_opt(**args):
                 else:
                     opt = SGD(lr=lr)
 
-            #print ("optimizer for compiling",opt) 
             return opt
 
         self.generator.compile(
@@ -536,14 +512,11 @@ def make_opt(**args):
             loss_weights=self.discr_loss_weights
         )
         self.combined.metrics_names = self.discriminator.metrics_names
-        #print ("disc metrics",self.discriminator.metrics_names)
-        #print ("comb metrics",self.combined.metrics_names)
 
         
         if hasattr(self, 'calculate_fom'):
             self.energies, self.g4var = self.prepare_geant4_data()
          
-        #print ("compiled")
 
     def assemble_models(self):
         self.ext_assemble_models()
@@ -554,44 +527,33 @@ def batch_transform(self, x, y):
         y_disc_real =y
         show_values = self._show_values
         def mm( label, t):
-            #print (label,np.min(t),np.max(t),np.mean(t),np.std(t),t.shape)
             pass
 
         if self.batch_size is None:
             ## fix me, maybe
             self.batch_size = x_disc_real.shape[0]
-            #print (hn(),"initializing sizes",x_disc_real.shape,[ yy.shape for yy in y])
 
 
         noise = np.random.normal(0, 1, (self.batch_size, self.latent_size))
         sampled_energies = np.random.uniform(0.1, 5,(self.batch_size,1))
         generator_ip = np.multiply(sampled_energies, noise)
-        #if show_values: print ('energies',np.ravel(sampled_energies)[:10])
         if show_values: mm('energies',sampled_energies)
         ratio = np.polyval(root_fit, sampled_energies)
-        #if show_values: print ('ratios',np.ravel(ratio)[:10])
         if show_values: mm('ratios',ratio)
         ecal_ip = np.multiply(ratio, sampled_energies)
-        #if show_values: print ('estimated sum cells',np.ravel(ecal_ip)[:10])
         if show_values: mm('estimated sum cells',ecal_ip)
 
         now = time.mktime(time.gmtime())
-        #if self.p_cc>1 and len(self.p_t)%100==0:
-        #    print ("prediction average",np.mean(self.p_t),"[s]' over",len(self.p_t))
         generated_images = self.generator.predict(generator_ip)
         ecal_rip = np.squeeze(np.sum(generated_images, axis=(1, 2, 3)))
-        #if show_values: print ('generated sum cells',np.ravel(ecal_rip)[:10])
         if show_values: mm('generated sum cells',ecal_rip)
 
         norm_overflow = False
         apply_identify = False ## False was intended originally
 
         if norm_overflow and np.max( ecal_rip ) > 1000.:
-            #if show_values: print ("normalizing back")
-            #ecal_ip = ecal_rip
             generated_images /= np.max( generated_images )
             ecal_rip = np.squeeze(np.sum(generated_images, axis=(1, 2, 3)))
-            #if show_values: print ('generated sum cells',np.ravel(ecal_rip)[:10])
             if show_values: mm('generated sum cells',ecal_rip)
         elif apply_identify:
             ecal_ip = ecal_rip
@@ -625,7 +587,6 @@ def mm( label, t):
 
 
         c_noise = np.random.normal(0, 1, (2*self.batch_size, self.latent_size))
-        ###print ('noise',np.ravel(noise)[:10])
         c_sampled_energies = np.random.uniform(0.1, 5, (2*self.batch_size,1 ))
         c_generator_ip = np.multiply(c_sampled_energies, c_noise)
         c_ratio = np.polyval(root_fit, c_sampled_energies)
@@ -651,9 +612,6 @@ def test_on_batch(self,x, y, sample_weight=None):
             (X_for_disc,Y_for_disc,X_for_combined,Y_for_combined) = self.batch_transform(x,y)
             epoch_disc_loss = self.discriminator.test_on_batch(X_for_disc,Y_for_disc)
             epoch_gen_loss = self.combined.test_on_batch(X_for_combined,Y_for_combined)
-            #if show_loss:
-            #    print ("test discr loss",epoch_disc_loss)
-            #    print ("test combined loss",epoch_gen_loss)
         else:
             ((x_disc_real,re_y),(generated_images, y_disc_fake),(x_comb1,y_comb1),(x_comb2,y_comb2)) = self.batch_transform(x,y)
             real_disc_loss = self.discriminator.test_on_batch( x_disc_real,re_y )
@@ -663,9 +621,6 @@ def test_on_batch(self,x, y, sample_weight=None):
             c_loss1= self.combined.test_on_batch( x_comb1,y_comb1 )
             c_loss2= self.combined.test_on_batch(x_comb2,y_comb2 )
             epoch_gen_loss = [(a + b) / 2 for a, b in zip(c_loss1,c_loss2)]
-            #if show_loss:
-            #    print ("test discr loss",real_disc_loss,fake_disc_loss)
-            #    print ("test combined loss",c_loss1, c_loss2)
 
 
 
@@ -684,7 +639,7 @@ def train_on_batch(self, x, y,
     def _checkpoint(self):
         if self.checkpoint and (self.g_cc%self.checkpoint)==0:
             dest='%s/mpi_generator_%s_%s.h5'%(os.environ.get('GANCHECKPOINTLOC','.'),socket.gethostname(),os.getpid())
-            print ("Saving generator to",dest,"at",self.g_cc)
+            logging.info("Saving generator to {} at {}".format(dest, self.g_cc))
             self.generator.save_weights(dest)        
 
     def _onepass_train_on_batch(self, x, y,
@@ -708,8 +663,6 @@ def _train_disc():
             self.discriminator.trainable = True
             now = time.mktime(time.gmtime())
             epoch_disc_loss = self.discriminator.train_on_batch(X_for_disc,Y_for_disc)
-            #if show_loss:
-            #    print (self.d_cc," discr loss",epoch_disc_loss)
             done = time.mktime(time.gmtime())
             if self.d_cc:
                 self.d_t.append( done - now )
@@ -725,13 +678,10 @@ def _train_comb(noT=False):
                 self.discriminator.trainable = False
             now = time.mktime(time.gmtime())
             if noT:
-                #print ("evaluating the combined model")
                 epoch_gen_loss = self.combined.test_on_batch(X_for_combined,Y_for_combined)
             else:
                 epoch_gen_loss = self.combined.train_on_batch(X_for_combined,Y_for_combined)
 
-            #if show_loss:
-            #    print (self.g_cc,"combined loss",epoch_gen_loss)
             done = time.mktime(time.gmtime())
             if self.g_cc:
                 self.g_t.append( done - now )
@@ -766,12 +716,6 @@ def _train_comb(noT=False):
             weights( self.combined )
 
 
-        #if len(self.g_t)>0 and len(self.g_t)%100==0:
-        #    print ("generator average ",np.mean(self.g_t),"[s] over",len(self.g_t))
-
-        #if len(self.d_t)>0 and len(self.d_t)%100==0:
-        #    print ("discriminator average",np.mean(self.d_t),"[s] over ",len(self.d_t))
-
         self._checkpoint()
 
         return np.asarray([epoch_disc_loss, epoch_gen_loss])
@@ -784,8 +728,6 @@ def _twopass_train_on_batch(self, x, y,
 
         show_loss = self._show_loss
         show_weights = self._show_weights
-        #if self.d_cc>1 and len(self.d_t)%100==0:
-        #    print ("discriminator average",np.mean(self.d_t),"[s] over ",len(self.d_t))
         self.discriminator.trainable = True
 
         if self._heavycheck:
@@ -822,9 +764,6 @@ def _twopass_train_on_batch(self, x, y,
             weights_diff( on_weight , label='D-fake')
 
 
-        #if show_loss:
-            #print (self.discriminator.metrics_names)
-            #print (self.d_cc,"discr loss",real_batch_loss,fake_batch_loss)
         epoch_disc_loss = np.asarray([(a + b) / 2 for a, b in zip(real_batch_loss, fake_batch_loss)])
         done = time.mktime(time.gmtime())
         if self.d_cc:
@@ -837,7 +776,6 @@ def _twopass_train_on_batch(self, x, y,
             weights( self.combined )
 
         if self.g_cc>1 and len(self.g_t)%100==0:
-            #print ("generator average ",np.mean(self.g_t),"[s] over",len(self.g_t))
             now = time.mktime(time.gmtime())
 
         if self.g_cc:
@@ -852,9 +790,6 @@ def _twopass_train_on_batch(self, x, y,
             if show_weights: weights( on_weight )
             weights_diff( on_weight , label='C-2')
 
-        #if show_loss:
-        #    #print(self.combined.metrics_names)
-        #    print (self.g_cc,"combined loss",c_loss1,c_loss2)
         epoch_gen_loss = np.asarray([(a + b) / 2 for a, b in zip(c_loss1,c_loss2)])
         done = time.mktime(time.gmtime())
         if self.g_cc:
@@ -871,18 +806,18 @@ def _twopass_train_on_batch(self, x, y,
                 checks = [np.all(np.equal(a,b)) for (a,b) in zip(check_on_weight,and_check_on_weight)]
                 weights_have_changed = not all(checks)
                 weights_are_all_equal = all(checks)
-                print ('Weights are the same?',checks)
+                logging.info("Weights are the same? {}".format(str(checks)))
                 if weights_have_changed:
                     for iw,b in enumerate(checks):
                         if not b:
-                            print (iw,"This",check_on_weight[iw].shape)
-                            print (np.ravel(check_on_weight[iw])[:10])
-                            print (iw,"And that",and_check_on_weight[iw].shape)
-                            print (np.ravel(and_check_on_weight[iw])[:10])
+                            logging.info("{} This {}".format(iw,str(check_on_weight[iw].shape)))
+                            logging.info("{}".format(np.ravel(check_on_weight[iw])[:10]))
+                            logging.info("{} And that {}".format(iw,and_check_on_weight[iw].shape))
+                            logging.info("{}".format(np.ravel(and_check_on_weight[iw])[:10]))
                 else:
-                    print ("weights are all identical")
-                    print (np.ravel(and_check_on_weight[1])[:10])
-                    print (np.ravel(check_on_weight[1])[:10])
+                    logging.info("weights are all identical")
+                    logging.info("".format(str(np.ravel(and_check_on_weight[1])[:10])))
+                    logging.info("".format(str(np.ravel(check_on_weight[1])[:10])))
 
         self._checkpoint()
 
@@ -891,7 +826,7 @@ def _twopass_train_on_batch(self, x, y,
             switching_loss = (1.,1.)
             if False and not self.recompiled and epoch_disc_loss[0]<switching_loss[0] and epoch_gen_loss[0]<switching_loss[1]:
                 ## go on
-                print ("going for full sgd")
+                logging.info("going for full sgd")
                 self.recompiled = True
                 self.compile( prop=False, lr=1.0)
                 #K.set_value( self.discriminator.optimizer.lr, 1.0)
@@ -910,13 +845,12 @@ def _twopass_train_on_batch(self, x, y,
                     nlr = th[1]
                     break
             if abs(nlr-lr)/lr > 0.0001:
-                print ("#"*30)
-                print ("swithcing lr",lr,"to", nlr)
+                logging.info("{}".format("#"*30))
+                logging.info("swithcing lr {} to {}".format(lr, nlr))
                 K.set_value( self.discriminator.optimizer.lr, nlr)
-                print (K.get_value( self.discriminator.optimizer.lr ))
+                logging.info("{}".format(K.get_value( self.discriminator.optimizer.lr )))
                 K.set_value( self.combined.optimizer.lr, nlr)
-                print (K.get_value( self.combined.optimizer.lr ))
-                print ("#"*30)
+                logging.info("{}".format(K.get_value( self.combined.optimizer.lr )))
 
         return np.asarray([epoch_disc_loss, epoch_gen_loss])
 
@@ -947,7 +881,6 @@ def prepare_geant4_data(self, **args):
         return energies, var
 
     def figure_of_merit(self, **args):
-        #print (self.histories)
         delta_loss = np.abs(self.histories['discriminator_model']['val_classification_loss'][-1] - self.histories['combined_model']['val_classification_loss'][-1])
         return delta_loss
         
diff --git a/nnlo/train/model.py b/nnlo/train/model.py
index 01e6754..1607eda 100644
--- a/nnlo/train/model.py
+++ b/nnlo/train/model.py
@@ -356,8 +356,10 @@ def test_on_batch(self, x=None, y=None, *args, **kwargs):
         if self.gpus > 0:
             x = x.cuda()
             target = target.cuda()
-        pred = self.model.forward(Variable(x, volatile=True))
-        loss = self.loss(pred, Variable(target, volatile=True))
+        import torch
+        with torch.no_grad():
+            pred = self.model.forward(Variable(x))
+            loss = self.loss(pred, Variable(target))
         l_data = loss.data.numpy() if self.gpus == 0 else loss.data.cpu().numpy()
         self.metrics = [l_data] if l_data.shape==() else [l_data[0]]        
         if 'acc' in self.metrics_names: # compute the accuracy
@@ -434,7 +436,7 @@ def __init__(self, comm, source,
             custom_objects={}, weights=None):
         if isinstance(source, six.string_types):
             if source.endswith('.py'):
-                module = __import__(source.replace('.py','').replace('/', '.'), fromlist=[None])
+                module = __import__('nnlo.'+source.replace('.py','').replace('/', '.'), fromlist=[None])
                 self.model = module.get_model()
                 self.filename = None
             else:
@@ -443,14 +445,13 @@ def __init__(self, comm, source,
         else:
             self.filename = None
             self.model = source
+        logging.debug("Get model {0} from file {1}".format(self.model, self.filename))
         self.weights = weights
         self.custom_objects = custom_objects
         super(ModelTensorFlow, self).__init__(comm)
 
 
     def build_model_aux(self):
-        import keras.backend as K
-
         if type(self.filename) == list:
             models = []
             self.weights = self.weights.split(',') if self.weights else [None]*len(self.filename)
@@ -464,27 +465,26 @@ def build_model_aux(self):
 
 
     def build_model(self, local_session = True):
-        import keras.backend as K
+        import tensorflow as tf
 
         if local_session:
-            graph = K.tf.Graph()
-            session = K.tf.Session(graph=graph, config=K.tf.ConfigProto(
+            graph = tf.Graph()
+            session = tf.compat.v1.Session(graph=graph, config=tf.compat.v1.ConfigProto(
                 allow_soft_placement=True, log_device_placement=False,
-                gpu_options=K.tf.GPUOptions(
+                gpu_options=tf.compat.v1.GPUOptions(
                         per_process_gpu_memory_fraction=1./self.comm.Get_size()) ) )
 
             with graph.as_default():
                 with session.as_default():
-                    import keras.backend as K
                     ret_model = self.build_model_aux()
                     ret_model.session = session
                     ret_model.graph = graph
                     return ret_model
         else:
-            K.set_session( K.tf.Session( config=K.tf.ConfigProto(
+            tf.compat.v1.Session( config=tf.compat.v1.ConfigProto(
                 allow_soft_placement=True, log_device_placement=False,
-                gpu_options=K.tf.GPUOptions(
-                    per_process_gpu_memory_fraction=1./self.comm.Get_size()) ) ) )
+                gpu_options=tf.compat.v1.GPUOptions(
+                    per_process_gpu_memory_fraction=1./self.comm.Get_size()) ) )
             return self.build_model_aux()
 
     def get_backend_name(self):
@@ -497,7 +497,7 @@ def __init__(self, comm, source,
         super(ModelPytorch,self).__init__(comm)
         if isinstance(source, six.string_types):
             if source.endswith('.py'):
-                module = __import__(source.replace('.py','').replace('/', '.'), fromlist=[None])
+                module = __import__('nnlo.'+source.replace('.py','').replace('/', '.'), fromlist=[None])
                 self.model = module.get_model()
                 self.filename = None
             else:
diff --git a/nnlo/train/optimizer.py b/nnlo/train/optimizer.py
index 3053e92..11703c6 100644
--- a/nnlo/train/optimizer.py
+++ b/nnlo/train/optimizer.py
@@ -647,7 +647,7 @@ def __init__(self, name, config=None, horovod_wrapper=False):
         self.horovod_wrapper = horovod_wrapper
 
     def build(self):
-        from keras.optimizers import deserialize
+        from tensorflow.keras.optimizers import deserialize
         opt_config = {'class_name': self.name, 'config': self.config}
         opt = deserialize(opt_config)
         if self.horovod_wrapper:
diff --git a/nnlo/util/count_epoch.py b/nnlo/util/count_epoch.py
new file mode 100644
index 0000000..77782c9
--- /dev/null
+++ b/nnlo/util/count_epoch.py
@@ -0,0 +1,41 @@
+#!/usr/bin/env python
+# Rui Zhang 7.2020
+# rui.zhang@cern.ch
+
+import json
+import logging
+import sys
+import pandas as pd
+
+def main():
+    logging.basicConfig(level = logging.INFO)
+    filename, rows_list = [], []
+    try:
+        filenames = sys.argv[1:]
+    except:
+        logging.fatal('python count_epoch.py Usage [json file name]')
+    
+    for filename in filenames:
+        with open(filename) as f:
+            data = json.load(f)
+        name = filename.split('_')[1]
+        dic = {
+            'file': filename,
+            'ranks': int(name[name.find('n')+1:name.find('g')]),
+            'trainTime': data["train_time"],
+        }
+        try:
+            dic['val_loss'] = data["history"][r"0:0:-"]["val_loss"][-10]
+            dic['val_accuracy'] = data["history"][r"0:0:-"]["val_accuracy"][-10]
+            dic['epochs'] = len(data["history"][r"0:0:-"]["val_loss"])
+        except:
+            dic['val_loss'] = data["history"][r"0:-:-"]["val_loss"][-10]
+            dic['val_accuracy'] = data["history"][r"0:-:-"]["val_accuracy"][-10]
+            dic['epochs'] = len(data["history"][r"0:-:-"]["val_loss"])
+        rows_list.append(dic)
+
+    df = pd.DataFrame(rows_list).sort_values('ranks')
+    logging.info(f'\n{df}')
+
+if __name__ == '__main__':
+    main()
diff --git a/nnlo/util/utils.py b/nnlo/util/utils.py
index e7a669b..73dfe7f 100644
--- a/nnlo/util/utils.py
+++ b/nnlo/util/utils.py
@@ -35,7 +35,7 @@ def import_keras(tries=10):
         try:
             stderr = sys.stderr
             sys.stderr = open(os.devnull, 'w')
-            import keras
+            import tensorflow.keras as keras
             sys.stderr = stderr
             return
         except ValueError:
@@ -51,14 +51,19 @@ def load_model(filename=None, model=None, weights_file=None, custom_objects={}):
         weights_file: path to HDF5 file containing model weights
 	custom_objects: A Dictionary of custom classes used in the model keyed by name"""
     import_keras()
-    from keras.models import model_from_json, clone_model
+    from tensorflow.keras.models import model_from_json, clone_model
     if filename is not None:
         with open( filename ) as arch_f:
             json_str = arch_f.readline()
             new_model = model_from_json( json_str, custom_objects=custom_objects) 
-    if model is not None:
+        logging.info(f"Load model from filename")
+    elif model is not None:
         new_model = clone_model(model)
-    if weights_file is not None:
+        logging.info(f"Load model from model")
+    elif weights_file is not None:
         new_model.load_weights( weights_file )
+        logging.info(f"Load model from weights_file")
+    else:
+        logging.error(f"Cannot load model: filename, model and weights_file are None")
     return new_model
 
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..f60a4d6
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,35 @@
+from setuptools import setup, find_packages
+
+with open('README.md') as readme_file:
+    README = readme_file.read()
+
+with open('HISTORY.md') as history_file:
+    HISTORY = history_file.read()
+
+setup_args = dict(
+    name='nnlo',
+    version='0.0.7',
+    entry_points = {
+        'console_scripts': ['TrainingDriver=nnlo.driver.TrainingDriver:main',
+            'GetData=nnlo.data.getdata:main',
+            'CountEpoch=nnlo.util.count_epoch:main',
+        ],
+    },
+    description='Distributed Machine Learning tool for High Performance Computing',
+    long_description_content_type="text/markdown",
+    long_description=README + '\n\n' + HISTORY,
+    license='MIT',
+    packages=find_packages(),
+    author='NNLO team',
+    author_email='rui.zhang@cern.ch',
+    keywords=['Distributed Machine Learning', 'High Performance Computing', 'Hyperparameter optimisation'],
+    url='https://github.com/chnzhangrui/NNLO',
+    download_url='https://pypi.org/project/nnlo/'
+)
+
+install_requires = [
+    'scikit-optimize',
+]
+
+if __name__ == '__main__':
+    setup(**setup_args, install_requires=install_requires, include_package_data=True)