Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 8 additions & 8 deletions dataset/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ def load_data(self, name, fpath, na_values=None, entity_col=None, src_col=None):
specifies the column containing the source for each "mention" of an
entity.
"""
tic = time.clock()
tic = time.time()
try:
# Do not include TID and source column as trainable attributes
exclude_attr_cols = ['_tid_']
Expand Down Expand Up @@ -121,7 +121,7 @@ def load_data(self, name, fpath, na_values=None, entity_col=None, src_col=None):
except Exception:
logging.error('loading data for table %s', name)
raise
toc = time.clock()
toc = time.time()
load_time = toc - tic
return status, load_time

Expand Down Expand Up @@ -218,9 +218,9 @@ def get_statistics(self):
"""
if not self.stats_ready:
logging.debug('computing frequency and co-occurrence statistics from raw data...')
tic = time.clock()
tic = time.time()
self.collect_stats()
logging.debug('DONE computing statistics in %.2fs', time.clock() - tic)
logging.debug('DONE computing statistics in %.2fs', time.time() - tic)

stats = (self.total_tuples, self.single_attr_stats, self.pair_attr_stats)
self.stats_ready = True
Expand Down Expand Up @@ -287,7 +287,7 @@ def get_domain_info(self):
return total_vars, classes

def get_inferred_values(self):
tic = time.clock()
tic = time.time()
# index into domain with inferred_val_idx + 1 since SQL arrays begin at index 1.
query = "SELECT t1._tid_, t1.attribute, domain[inferred_val_idx + 1] as rv_value " \
"FROM " \
Expand All @@ -298,12 +298,12 @@ def get_inferred_values(self):
self.generate_aux_table_sql(AuxTables.inf_values_dom, query, index_attrs=['_tid_'])
self.aux_table[AuxTables.inf_values_dom].create_db_index(self.engine, ['attribute'])
status = "DONE collecting the inferred values."
toc = time.clock()
toc = time.time()
total_time = toc - tic
return status, total_time

def get_repaired_dataset(self):
tic = time.clock()
tic = time.time()
init_records = self.raw_data.df.sort_values(['_tid_']).to_records(index=False)
t = self.aux_table[AuxTables.inf_values_dom]
repaired_vals = dictify_df(t.df.reset_index())
Expand All @@ -315,6 +315,6 @@ def get_repaired_dataset(self):
self.repaired_data = Table(name, Source.DF, df=repaired_df)
self.repaired_data.store_to_db(self.engine.engine)
status = "DONE generating repaired dataset"
toc = time.clock()
toc = time.time()
total_time = toc - tic
return status, total_time
28 changes: 14 additions & 14 deletions dataset/dbengine.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,9 @@ def execute_queries(self, queries):
:param queries: (list[str]) list of SQL queries to be executed
"""
logging.debug('Preparing to execute %d queries.', len(queries))
tic = time.clock()
tic = time.time()
results = self._apply_func(partial(_execute_query, conn_args=self.conn_args), [(idx, q) for idx, q in enumerate(queries)])
toc = time.clock()
toc = time.time()
logging.debug('Time to execute %d queries: %.2f secs', len(queries), toc-tic)
return results

Expand All @@ -48,11 +48,11 @@ def execute_queries_w_backup(self, queries):
:param queries: (list[str]) list of SQL queries to be executed
"""
logging.debug('Preparing to execute %d queries.', len(queries))
tic = time.clock()
tic = time.time()
results = self._apply_func(
partial(_execute_query_w_backup, conn_args=self.conn_args, timeout=self.timeout),
[(idx, q) for idx, q in enumerate(queries)])
toc = time.clock()
toc = time.time()
logging.debug('Time to execute %d queries: %.2f secs', len(queries), toc-tic)
return results

Expand All @@ -62,23 +62,23 @@ def execute_query(self, query):

:param query: (str) SQL query to be executed
"""
tic = time.clock()
tic = time.time()
conn = self.engine.connect()
result = conn.execute(query).fetchall()
conn.close()
toc = time.clock()
toc = time.time()
logging.debug('Time to execute query: %.2f secs', toc-tic)
return result

def create_db_table_from_query(self, name, query):
tic = time.clock()
tic = time.time()
drop = drop_table_template.substitute(table=name)
create = create_table_template.substitute(table=name, stmt=query)
conn = self.engine.connect()
conn.execute(drop)
conn.execute(create)
conn.close()
toc = time.clock()
toc = time.time()
logging.debug('Time to create table: %.2f secs', toc-tic)
return True

Expand All @@ -95,11 +95,11 @@ def create_db_index(self, name, table, attr_list):
# We need to quote each attribute since Postgres auto-downcases unquoted column references
quoted_attrs = map(lambda attr: '"{}"'.format(attr), attr_list)
stmt = index_template.substitute(idx_title=name, table=table, attrs=','.join(quoted_attrs))
tic = time.clock()
tic = time.time()
conn = self.engine.connect()
result = conn.execute(stmt)
conn.close()
toc = time.clock()
toc = time.time()
logging.debug('Time to create index: %.2f secs', toc-tic)
return result

Expand All @@ -113,13 +113,13 @@ def _execute_query(args, conn_args):
query_id = args[0]
query = args[1]
logging.debug("Starting to execute query %s with id %s", query, query_id)
tic = time.clock()
tic = time.time()
con = psycopg2.connect(conn_args)
cur = con.cursor()
cur.execute(query)
res = cur.fetchall()
con.close()
toc = time.clock()
toc = time.time()
logging.debug('Time to execute query with id %d: %.2f secs', query_id, (toc - tic))
return res

Expand All @@ -129,7 +129,7 @@ def _execute_query_w_backup(args, conn_args, timeout):
query = args[1][0]
query_backup = args[1][1]
logging.debug("Starting to execute query %s with id %s", query, query_id)
tic = time.clock()
tic = time.time()
con = psycopg2.connect(conn_args)
cur = con.cursor()
cur.execute("SET statement_timeout to %d;"%timeout)
Expand All @@ -151,6 +151,6 @@ def _execute_query_w_backup(args, conn_args, timeout):
cur.execute(query_backup)
res = cur.fetchall()
con.close()
toc = time.clock()
toc = time.time()
logging.debug('Time to execute query with id %d: %.2f secs', query_id, toc - tic)
return res
6 changes: 3 additions & 3 deletions dcparser/dcparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,10 @@ def load_denial_constraints(self, fpath):

:param fpath: filepath to TXT file containing denial constraints
"""
tic = time.clock()
tic = time.time()
if not self.ds.raw_data:
status = 'No dataset specified'
toc = time.clock()
toc = time.time()
return status, toc - tic
attrs = self.ds.raw_data.get_attributes()
try:
Expand All @@ -47,7 +47,7 @@ def load_denial_constraints(self, fpath):
except Exception:
logging.error('FAILED to load constraints from file %s', os.path.basename(fpath))
raise
toc = time.clock()
toc = time.time()
return status, toc - tic

def get_dcs(self):
Expand Down
8 changes: 4 additions & 4 deletions detect/detect.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,17 +17,17 @@ def detect_errors(self, detectors):
:param detectors: (list) of ErrorDetector objects
"""
errors = []
tic_total = time.clock()
tic_total = time.time()

# Initialize all error detectors.
for detector in detectors:
detector.setup(self.ds, self.env)

# Run detection using each detector.
for detector in detectors:
tic = time.clock()
tic = time.time()
error_df = detector.detect_noisy_cells()
toc = time.clock()
toc = time.time()
logging.debug("DONE with Error Detector: %s in %.2f secs", detector.name, toc-tic)
errors.append(error_df)

Expand All @@ -39,7 +39,7 @@ def detect_errors(self, detectors):
# Store errors to db.
self.store_detected_errors(errors_df)
status = "DONE with error detection."
toc_total = time.clock()
toc_total = time.time()
detect_time = toc_total - tic_total
return status, detect_time

Expand Down
20 changes: 10 additions & 10 deletions domain/domain.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,9 +134,9 @@ def setup_attributes(self):
self.total = total
self.single_stats = single_stats
logging.debug("preparing pruned co-occurring statistics...")
tic = time.clock()
tic = time.time()
self.pair_stats = self._pruned_pair_stats(pair_stats)
logging.debug("DONE with pruned co-occurring statistics in %.2f secs", time.clock() - tic)
logging.debug("DONE with pruned co-occurring statistics in %.2f secs", time.time() - tic)
self.setup_complete = True

def _pruned_pair_stats(self, pair_stats):
Expand Down Expand Up @@ -231,7 +231,7 @@ def generate_domain(self):
"Call <setup_attributes> to setup active attributes. Error detection should be performed before setup.")

logging.debug('generating initial set of un-pruned domain values...')
tic = time.clock()
tic = time.time()
# Iterate over dataset rows.
cells = []
vid = 0
Expand Down Expand Up @@ -297,7 +297,7 @@ def generate_domain(self):
"fixed": cell_status})
vid += 1
domain_df = pd.DataFrame(data=cells).sort_values('_vid_')
logging.debug('DONE generating initial set of domain values in %.2f', time.clock() - tic)
logging.debug('DONE generating initial set of domain values in %.2f', time.time() - tic)

# Skip estimator model since we do not require any weak labelling or domain
# pruning based on posterior probabilities.
Expand All @@ -307,18 +307,18 @@ def generate_domain(self):
# Run pruned domain values from correlated attributes above through
# posterior model for a naive probability estimation.
logging.debug('training posterior model for estimating domain value probabilities...')
tic = time.clock()
tic = time.time()
estimator = NaiveBayes(self.env, self.ds, domain_df, self.correlations)
logging.debug('DONE training posterior model in %.2fs', time.clock() - tic)
logging.debug('DONE training posterior model in %.2fs', time.time() - tic)

# Predict probabilities for all pruned domain values.
logging.debug('predicting domain value probabilities from posterior model...')
tic = time.clock()
tic = time.time()
preds_by_cell = estimator.predict_pp_batch()
logging.debug('DONE predictions in %.2f secs, re-constructing cell domain...', time.clock() - tic)
logging.debug('DONE predictions in %.2f secs, re-constructing cell domain...', time.time() - tic)

logging.debug('re-assembling final cell domain table...')
tic = time.clock()
tic = time.time()
# iterate through raw/current data and generate posterior probabilities for
# weak labelling
num_weak_labels = 0
Expand Down Expand Up @@ -365,7 +365,7 @@ def generate_domain(self):

# update our cell domain df with our new updated domain
domain_df = pd.DataFrame.from_records(updated_domain_df, columns=updated_domain_df[0].dtype.names).drop('index', axis=1).sort_values('_vid_')
logging.debug('DONE assembling cell domain table in %.2fs', time.clock() - tic)
logging.debug('DONE assembling cell domain table in %.2fs', time.time() - tic)

logging.info('number of (additional) weak labels assigned from posterior model: %d', num_weak_labels)

Expand Down
4 changes: 2 additions & 2 deletions domain/estimators/logistic.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def _gen_training_data(self):
used for training and prediction.
"""
logging.debug('Logistic: featurizing training data...')
tic = time.clock()
tic = time.time()
# Each row corresponds to a possible value for a given attribute
# and given TID
self._X = torch.zeros(self.n_samples, self.num_features)
Expand Down Expand Up @@ -120,7 +120,7 @@ def _gen_training_data(self):
# Convert this to a vector of indices rather than a vector mask.
self._train_idx = (self._train_idx == 1).nonzero()[:,0]

logging.debug('Logistic: DONE featurization in %.2fs', time.clock() - tic)
logging.debug('Logistic: DONE featurization in %.2fs', time.time() - tic)

def _gen_feat_tensor(self, init_row, attr, domain_vals):
"""
Expand Down
10 changes: 5 additions & 5 deletions evaluate/eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def __init__(self, env, dataset):
self.ds = dataset

def load_data(self, name, fpath, tid_col, attr_col, val_col, na_values=None):
tic = time.clock()
tic = time.time()
try:
raw_data = pd.read_csv(fpath, na_values=na_values, encoding='utf-8')
# We drop any ground truth values that are NULLs since we follow
Expand All @@ -73,7 +73,7 @@ def load_data(self, name, fpath, tid_col, attr_col, val_col, na_values=None):
except Exception:
logging.error('load_data for table %s', name)
raise
toc = time.clock()
toc = time.time()
load_time = toc - tic
return status, load_time

Expand All @@ -98,7 +98,7 @@ def eval_report(self):
"""
Returns an EvalReport named tuple containing the experiment results.
"""
tic = time.clock()
tic = time.time()
try:
prec, rec, rep_recall, f1, rep_f1 = self.evaluate_repairs()
report = "Precision = %.2f, Recall = %.2f, Repairing Recall = %.2f, F1 = %.2f, Repairing F1 = %.2f, Detected Errors = %d, Total Errors = %d, Correct Repairs = %d, Total Repairs = %d, Total Repairs on correct cells (Grdth present) = %d, Total Repairs on incorrect cells (Grdth present) = %d" % (
Expand All @@ -112,7 +112,7 @@ def eval_report(self):
logging.error("ERROR generating evaluation report %s" % e)
raise

toc = time.clock()
toc = time.time()
report_time = toc - tic
return report, report_time, eval_report

Expand Down Expand Up @@ -315,7 +315,7 @@ def log_weak_label_stats(self):
logging.debug("weak label statistics:")
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', len(df_stats))
pd.set_option('display.max_colwidth', -1)
pd.set_option('display.max_colwidth', None)
logging.debug("%s", df_stats)
pd.reset_option('display.max_columns')
pd.reset_option('display.max_rows')
Expand Down
Loading