diff --git a/.idea/RobustInference.iml b/.idea/RobustInference.iml
index fa80a76..85c7612 100644
--- a/.idea/RobustInference.iml
+++ b/.idea/RobustInference.iml
@@ -4,7 +4,7 @@
     <content url="file://$MODULE_DIR$">
       <excludeFolder url="file://$MODULE_DIR$/venv" />
     </content>
-    <orderEntry type="jdk" jdkName="Python 3.7" jdkType="Python SDK" />
+    <orderEntry type="inheritedJdk" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
   <component name="TestRunnerService">
diff --git a/.idea/misc.xml b/.idea/misc.xml
index a2e120d..dc9ea49 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -1,4 +1,4 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.7" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10" project-jdk-type="Python SDK" />
 </project>
\ No newline at end of file
diff --git a/RIFLE/RobustImputer.py b/RIFLE/RobustImputer.py
index f6467b4..6334bb4 100644
--- a/RIFLE/RobustImputer.py
+++ b/RIFLE/RobustImputer.py
@@ -2,6 +2,9 @@
 import numpy as np
 from sklearn.preprocessing import StandardScaler
 from math import sqrt
+import multiprocessing
+import time
+from preprocessing import PolyFeatures
 
 
 class RobustImputer:
@@ -14,73 +17,122 @@ def __init__(self):
         self.validation_data_proportion = 0.1
         self.data = None
         self.transformed_data = None
+        self.poly_transformed_data = None
         self.confidence_matrix = None
         self.imputed_data = None
+        self.cols = None
 
     def read_and_scale(self, filename):
-        self.data = pd.read_csv(filename)
+        self.data = pd.read_csv(filename, na_values='?')
+        self.cols = self.data.columns
 
         sc = StandardScaler()
         sc.fit(self.data)
-
         transformed = sc.transform(self.data)
         self.transformed_data = pd.DataFrame(transformed, columns=self.data.columns, index=self.data.index)
 
+        poly = PolyFeatures(2, include_bias=False)
+        poly.fit(self.data)
+        poly_data = poly.transform(self.data.to_numpy(dtype=float))
+        sc.fit(poly_data)
+        poly_transformed = sc.transform(poly_data)
+        self.poly_transformed_data = pd.DataFrame(data=poly_transformed,
+                                                  index=self.data.index,
+                                                  columns=poly.get_feature_names_out(self.data.columns))
+        print(self.poly_transformed_data)
+
     def scale_data(self, data):
         self.data = data
         sc = StandardScaler()
         sc.fit(self.data)
-
         transformed = sc.transform(self.data)
         self.transformed_data = pd.DataFrame(transformed, columns=data.columns, index=data.index)
 
-    def estimate_confidence_intervals(self):
+        poly = PolyFeatures(2, include_bias=False)
+        poly.fit(self.data)
+        poly_data = poly.transform(self.data.to_numpy(dtype=float))
+        sc.fit(poly_data)
+        poly_transformed = sc.transform(poly_data)
+        self.poly_transformed_data = pd.DataFrame(data=poly_transformed,
+                                                  index=self.data.index,
+                                                  columns=poly.get_feature_names_out(data.columns))
 
-        data = self.transformed_data
-        dimension = data.shape[1]
-        confidence_matrix = np.zeros(shape=(dimension, dimension))
+    def find_confidence_interval(self, feature_index1):
 
-        cols = data.columns
+        # print starting point and features for each process
+        # print(f'starting find_confidence_interval with {feature_index1}')
 
-        for i in range(dimension):
-            for j in range(i, dimension):
-                feature_i = cols[i]
-                feature_j = cols[j]
-                columns = data[[feature_i, feature_j]]
-                intersections = columns[columns[[feature_i, feature_j]].notnull().all(axis=1)]
+        # data = self.transformed_data
+        data = self.poly_transformed_data
+        dimension = data.shape[1]
+        for feature_index2 in range(feature_index1, dimension):
+            cols = data.columns
+            feature_i = cols[feature_index1]
+            feature_j = cols[feature_index2]
+            columns = data[[feature_i, feature_j]]
+            intersections = columns[columns[[feature_i, feature_j]].notnull().all(axis=1)]
+
+            intersection_num = len(intersections)
+
+            sample_size = int(intersection_num * self.bootstrap_proportion)
+
+            if sample_size < 2:
+                max_vals = columns.max()
+                max1 = max_vals[feature_i]
+                max2 = max_vals[feature_j]
+                self.confidence_matrix[feature_index1][feature_index2] = max1 * max2
+
+            estimation_array = []
+            for ind in range(self.number_of_bootstrap_estimations):
+                # current_sample = np.array(intersections.sample(n=sample_size, replace=self.with_replacement))
+                # For debugging
+                current_sample = np.array(
+                    intersections.sample(n=sample_size, replace=self.with_replacement, random_state=1))
+                f1 = current_sample[:, 0]
+                f2 = current_sample[:, 1]
+                inner_prod = np.inner(f1, f2) / sample_size
+                estimation_array.append(inner_prod)
+
+            self.confidence_matrix[feature_index1][feature_index2] = np.std(estimation_array)
+
+        # print ending point and features for each process
+        # print(f'finishing find_confidence_interval with {feature_index1, feature_index2}')
 
-                intersection_num = len(intersections)
+    def estimate_confidence_intervals(self):
 
-                sample_size = int(intersection_num * self.bootstrap_proportion)
+        # data = self.transformed_data
+        data = self.poly_transformed_data
+        dimension = data.shape[1]
+
+        # initialized confidence matrix so that we are not subscripting a NoneType object
+        self.confidence_matrix = np.zeros(shape=(dimension, dimension), dtype="float")
 
-                if sample_size < 2:
-                    max_vals = columns.max()
-                    max1 = max_vals[feature_i]
-                    max2 = max_vals[feature_j]
-                    confidence_matrix[i][j] = max1 * max2
-                    continue
+        # start timer
+        start = time.time()
 
-                estimation_array = []
-                for ind in range(self.number_of_bootstrap_estimations):
-                    current_sample = np.array(intersections.sample(n=sample_size, replace=self.with_replacement))
-                    f1 = current_sample[:, 0]
-                    f2 = current_sample[:, 1]
-                    inner_prod = np.inner(f1, f2) / sample_size
-                    estimation_array.append(inner_prod)
+        pool = multiprocessing.Pool()
+        pool.map(self.find_confidence_interval, range(dimension))
+        pool.close()
 
-                confidence_matrix[i][j] = np.std(estimation_array)
+        # end timer and output time taken
+        end = time.time()
+        print('Confidence done in {:.4f} seconds'.format(end - start))
 
-        for j in range(dimension):
-            for i in range(j + 1, dimension):
-                confidence_matrix[i][j] = confidence_matrix[j][i]
+        #
+        # for j in range(dimension):
+        #     for i in range(j + 1, dimension):
+        #         confidence_matrix[i][j] = confidence_matrix[j][i]
 
-        self.confidence_matrix = confidence_matrix
+        # self.confidence_matrix = confidence_matrix
 
     def impute_data(self, column_index):
-        data = self.transformed_data
+        print(f'starting impute_data with {column_index}')
+        # data = self.transformed_data
+        data = self.poly_transformed_data
         confidence_intervals = self.confidence_matrix
 
-        data_columns = data.columns
+        # data_columns = data.columns
+        data_columns = self.cols
 
         y_column = data_columns[column_index]
         X = data.drop([y_column], axis=1)
@@ -208,22 +260,41 @@ def impute_data(self, column_index):
             y_predict = np.dot(data_i.T, theta)
             predicts.append(y_predict[0][0])
 
-        return predicts
+        res = (column_index, predicts)
+        return res
 
     def impute(self):
+
+        start = time.time()
+
         original_data = self.data
         standard_deviations = original_data.std()
         means = original_data.mean()
         data_cols = original_data.columns
 
-        for column_ind in range(original_data.shape[1]):
+        dimension = original_data.shape[1]
+        pool = multiprocessing.Pool()
+        predictions = pool.map(self.impute_data, range(dimension))
+        pool.close()
+
+        for pred_index in range(len(predictions)):
+            column_ind = predictions[pred_index][0]
             print(data_cols[column_ind] + " is imputed.")
-            predictions = self.impute_data(column_ind)
-            predictions = [x * standard_deviations[column_ind] + means[column_ind] for x in predictions]
+            temp = [x * standard_deviations[column_ind] + means[column_ind] for x in predictions[pred_index][1]]
+
+            original_data[data_cols[column_ind]] = temp
 
-            original_data[data_cols[column_ind]] = predictions
+        # for column_ind in range(original_data.shape[1]):
+        #     print(data_cols[column_ind] + " is imputed.")
+        #     predictions = self.impute_data(column_ind)
+        #     predictions = [x * standard_deviations[column_ind] + means[column_ind] for x in predictions]
+        #
+        #     original_data[data_cols[column_ind]] = predictions
+        #
 
         self.imputed_data = original_data
+        end = time.time()
+        print('Impute done in {:.4f} seconds'.format(end - start))
 
     def write_to_csv(self, output_filename):
         self.imputed_data.to_csv(output_filename, index=False)
diff --git a/RIFLE/__pycache__/RobustImputer.cpython-39.pyc b/RIFLE/__pycache__/RobustImputer.cpython-39.pyc
new file mode 100644
index 0000000..a24b721
Binary files /dev/null and b/RIFLE/__pycache__/RobustImputer.cpython-39.pyc differ
diff --git a/RIFLE/preprocessing/__init__.py b/RIFLE/preprocessing/__init__.py
new file mode 100644
index 0000000..3d7fe1d
--- /dev/null
+++ b/RIFLE/preprocessing/__init__.py
@@ -0,0 +1,3 @@
+from ._polynomial import PolyFeatures
+
+__all__ = ["PolyFeatures"]
\ No newline at end of file
diff --git a/RIFLE/preprocessing/__pycache__/__init__.cpython-39.pyc b/RIFLE/preprocessing/__pycache__/__init__.cpython-39.pyc
new file mode 100644
index 0000000..909dbc6
Binary files /dev/null and b/RIFLE/preprocessing/__pycache__/__init__.cpython-39.pyc differ
diff --git a/RIFLE/preprocessing/__pycache__/_polynomial.cpython-39.pyc b/RIFLE/preprocessing/__pycache__/_polynomial.cpython-39.pyc
new file mode 100644
index 0000000..dcc5f66
Binary files /dev/null and b/RIFLE/preprocessing/__pycache__/_polynomial.cpython-39.pyc differ
diff --git a/RIFLE/preprocessing/__pycache__/validation.cpython-39.pyc b/RIFLE/preprocessing/__pycache__/validation.cpython-39.pyc
new file mode 100644
index 0000000..741511b
Binary files /dev/null and b/RIFLE/preprocessing/__pycache__/validation.cpython-39.pyc differ
diff --git a/RIFLE/preprocessing/_polynomial.py b/RIFLE/preprocessing/_polynomial.py
new file mode 100644
index 0000000..85a54f4
--- /dev/null
+++ b/RIFLE/preprocessing/_polynomial.py
@@ -0,0 +1,254 @@
+import collections
+import numbers
+from itertools import chain
+from itertools import combinations_with_replacement as combinations_w_r
+import numpy as np
+from scipy.special import comb
+
+from .validation import _check_feature_names_in
+
+
+class PolyFeatures:
+    """ Generate interaction and polynomial features. Altered version of
+    sklearn.preprocessing.PolynomialFeatures to preserve NaN values.
+
+    Parameters
+    ----------
+    degree : int, default=2
+    Maximum degree of the polynomial features.
+
+    include_bias : bool, default=True
+    If 'True', then include the bias column, the feature in which all
+    polynomial powers are zero (acts as an intercept term in a linear
+    model.
+    """
+
+    def __init__(self, degree=2, *, include_bias=True):
+        self.degree = degree
+        self.include_bias = include_bias
+
+    @staticmethod
+    def _combinations(n_features, min_degree, max_degree, include_bias):
+        comb = combinations_w_r
+        start = max(1, min_degree)
+        iter = chain.from_iterable(
+            comb(range(n_features), i) for i in range(start, max_degree + 1)
+        )
+        if include_bias:
+            iter = chain(comb(range(n_features), 0), iter)
+        return iter
+
+    @staticmethod
+    def _num_combinations(n_features, min_degree, max_degree, include_bias):
+        """
+        Calculate number of terms in polynomial expansion.
+
+        """
+        combinations = comb(n_features + max_degree, max_degree, exact=True) - 1
+        if min_degree > 0:
+            d = min_degree - 1
+            combinations -= comb(n_features + d, d, exact=True) - 1
+
+        if include_bias:
+            combinations += 1
+
+        return combinations
+
+    @property
+    def powers_(self):
+        """
+        Exponent for each of the inputs in the output.
+
+        """
+        combinations = self._combinations(
+            n_features=self.n_features_in_,
+            min_degree=self._min_degree,
+            max_degree=self._max_degree,
+            include_bias=self.include_bias,
+        )
+        return np.vstack(
+            [np.bincount(c, minlength=self.n_features_in_) for c in combinations]
+        )
+
+    def get_feature_names_out(self, input_features=None):
+        """
+        Get output feature names for transformation.
+
+        Parameters
+        ----------
+        input_features : array of str objects or None, default=None
+        Input features.
+
+        Returns
+        -------
+        feature_names : ndarray of str objects
+        Transformed feature names.
+        """
+        powers = self.powers_
+        input_features = _check_feature_names_in(self, input_features)
+        feature_names = []
+        for row in powers:
+            inds = np.where(row)[0]
+            if len(inds):
+                name = " ".join(
+                    "%s^%d" % (input_features[ind], exp)
+                    if exp != 1
+                    else input_features[ind]
+                    for ind, exp in zip(inds, row[inds])
+                )
+            else:
+                name = "1"
+            feature_names.append(name)
+        return np.asarray(feature_names, dtype=object)
+
+    def fit(self, X):
+        """
+        Compute number of output features.
+
+        Parameters
+        ----------
+        X : array-like matrix of shape (n_samples, n_features)
+        The data.
+
+        Returns
+        -------
+        self : object
+        Fitted transformer.
+        """
+        _, n_features = X.shape
+        self.n_features_in_ = n_features
+        if isinstance(self.degree, numbers.Integral):
+            if self.degree < 0:
+                raise ValueError(
+                    f"degree must be a non-negative integer, got {self.degree}."
+                )
+            elif self.degree == 0 and not self.include_bias:
+                raise ValueError(
+                    "Setting degree to zero and include_bias to False would result in"
+                    " an empty output array."
+                )
+
+            self._min_degree = 0
+            self._max_degree = self.degree
+        elif (
+                isinstance(self.degree, collections.abc.Iterable) and len(self.degree) == 2
+        ):
+            self._min_degree, self._max_degree = self.degree
+            if not (
+                    isinstance(self._min_degree, numbers.Integral)
+                    and isinstance(self._max_degree, numbers.Integral)
+                    and self._min_degree >= 0
+                    and self._min_degree <= self._max_degree
+            ):
+                raise ValueError(
+                    "degree=(min_degree, max_degree) must "
+                    "be non-negative integers that fulfil "
+                    "min_degree <= max_degree, got "
+                    f"{self.degree}."
+                )
+            elif self._max_degree == 0 and not self.include_bias:
+                raise ValueError(
+                    "Setting both min_deree and max_degree to zero and include_bias to"
+                    " False would result in an empty output array."
+                )
+        else:
+            raise ValueError(
+                "degree must be a non-negative int or tuple "
+                "(min_degree, max_degree), got "
+                f"{self.degree}."
+            )
+
+        self.n_output_features_ = self._num_combinations(
+            n_features=n_features,
+            min_degree=self._min_degree,
+            max_degree=self._max_degree,
+            include_bias=self.include_bias,
+        )
+        # We also record the number of output features for
+        # _max_degree = 0
+        self._n_out_full = self._num_combinations(
+            n_features=n_features,
+            min_degree=0,
+            max_degree=self._max_degree,
+            include_bias=self.include_bias,
+        )
+
+        return self
+
+    def transform(self, X):
+        """
+        Transform data to polynomial features.
+
+        Parameters
+        ----------
+        X : array-like matrix of shape (n_samples, n_features)
+        The data to transform.
+
+        Returns
+        -------
+        XP : ndarray matrix of shape (n_samples, NP)
+        The matrix of features, where NP is the number of polynomial features
+        generated from the combination of inputs.
+        """
+        n_samples, n_features = X.shape
+        # Do as if _min_degree = 0 and cut down array after the
+        # computation, i.e. use _n_out_full instead of n_output_features_.
+
+        XP = np.empty(shape=(n_samples, self._n_out_full),
+                      dtype=X.dtype)
+
+        # degree 0 term
+        if self.include_bias:
+            XP[:, 0] = 1
+            current_col = 1
+        else:
+            current_col = 0
+
+        if self._max_degree == 0:
+            return XP
+
+        # degree 1 term
+        XP[:, current_col: current_col + n_features] = X
+        index = list(range(current_col, current_col + n_features))
+        current_col += n_features
+        index.append(current_col)
+
+        # loop over degree >= 2 terms
+        for _ in range(2, self._max_degree + 1):
+            new_index = []
+            end = index[-1]
+            for feature_idx in range(n_features):
+                start = index[feature_idx]
+                new_index.append(current_col)
+                next_col = current_col + end - start
+                if next_col <= current_col:
+                    break
+                # multiply
+                np.multiply(
+                    XP[:, start:end],
+                    X[:, feature_idx: feature_idx + 1],
+                    out=XP[:, current_col:next_col],
+                    casting="no",
+                )
+                # print(XP[:, start:end])
+                # print(X[:, feature_idx: feature_idx + 1])
+                # print(XP[:, current_col:next_col])
+                # print('-----')
+                current_col = next_col
+
+            new_index.append(current_col)
+            index = new_index
+
+        if self._min_degree > 1:
+            n_XP, n_Xout = self._n_out_full, self.n_output_features_
+            if self.include_bias:
+                Xout = np.empty(
+                    shape=(n_samples, n_Xout), dtype=XP.dtype, order=self.order
+                )
+                Xout[:, 0] = 1
+                Xout[:, 1:] = XP[:, n_XP - n_Xout + 1:]
+            else:
+                Xout = XP[:, n_XP - n_Xout:].copy()
+            XP = Xout
+
+        return XP
diff --git a/RIFLE/preprocessing/validation.py b/RIFLE/preprocessing/validation.py
new file mode 100644
index 0000000..8d1541c
--- /dev/null
+++ b/RIFLE/preprocessing/validation.py
@@ -0,0 +1,50 @@
+import numpy as np
+
+
+def _check_feature_names_in(estimator, input_features=None, *, generate_names=True):
+    """
+    Check `input_features` and generate names if needed.
+
+    Parameters
+    ----------
+    input_features : array-like of type str or None, default=None
+    Input features.
+
+    generate_names : bool, default=None
+    Whether to generate names when 'input_features' is 'None'.
+
+    Return
+    ------
+    feature_names_in : ndarray of str or 'None'
+    Feature names in.
+
+    """
+
+    feature_names_in_ = getattr(estimator, "feature_names_in_", None)
+    n_features_in_ = getattr(estimator, "n_features_in_", None)
+
+    if input_features is not None:
+        input_features = np.asarray(input_features, dtype=object)
+        if feature_names_in_ is not None and not np.array_equal(
+            feature_names_in_, input_features
+        ):
+            raise ValueError("input_features is not equal to feature_names_in_")
+
+        if n_features_in_ is not None and len(input_features) != n_features_in_:
+            raise ValueError(
+                "input_features should have length equal to number of "
+                f"features ({n_features_in_}), got {len(input_features)}"
+            )
+        return input_features
+
+    if feature_names_in_ is not None:
+        return feature_names_in_
+
+    if not generate_names:
+        return
+
+    # Generates feature names if `n_features_in_` is defined
+    if n_features_in_ is None:
+        raise ValueError("Unable to generate feature names without n_features_in_")
+
+    return np.asarray([f"x{i}" for i in range(n_features_in_)], dtype=object)
\ No newline at end of file
diff --git a/RIFLE/run.py b/RIFLE/run.py
index e904f70..6866245 100644
--- a/RIFLE/run.py
+++ b/RIFLE/run.py
@@ -1,12 +1,23 @@
 from RobustImputer import RobustImputer
 import sys
+import time
 
 
-missing, imputed = sys.argv[1:3]
-imputer = RobustImputer()
+def run():
+    missing, imputed = sys.argv[1:3]
+    imputer = RobustImputer()
 
-imputer.read_and_scale(missing)
-imputer.estimate_confidence_intervals()
-imputer.impute()
+    imputer.read_and_scale(missing)
+    imputer.estimate_confidence_intervals()
+    imputer.impute()
+    imputer.write_to_csv(imputed)
 
-imputer.write_to_csv(imputed)
+# This guard is necessary to avoid creating subprocesses recursively.
+# Without it a runtime error is generated, but there is likely a more clever way to do this
+
+
+if __name__ == '__main__':
+    start = time.time()
+    run()
+    end = time.time()
+    print('Done in {:.4f} seconds'.format(end - start))
\ No newline at end of file