diff --git a/BioCompass/BioCompass.py b/BioCompass/BioCompass.py
index 4dcc3db..0507e6e 100644
--- a/BioCompass/BioCompass.py
+++ b/BioCompass/BioCompass.py
@@ -102,9 +102,9 @@ def parse_antiSMASH(content):
 
 
     QueryCluster = OrderedDict()
-    for k in re.search(
+    for k in list(re.search(
             rule_query_cluster, parsed['QueryCluster'],
-            re.VERBOSE).groupdict().keys():
+            re.VERBOSE).groupdict().keys()):
         QueryCluster[k] = []
     for row in re.finditer(
             rule_query_cluster, parsed['QueryCluster'], re.VERBOSE):
@@ -186,7 +186,7 @@ def __getitem__(self, item):
         return self.data[item]
 
     def keys(self):
-        return self.data.keys()
+        return list(self.data.keys())
 
     def load(self, filename):
         self.data = {}
@@ -221,8 +221,8 @@ def efetch_hit(term, seq_start, seq_stop):
             downloaded = True
         except:
             nap = ntry*3
-            print "Fail to download (term). I'll take a nap of %s seconds ", \
-                    " and try again."
+            print("Fail to download (term). I'll take a nap of %s seconds ", \
+                    " and try again.")
             time.sleep(ntry*3)
 
     return content
@@ -233,7 +233,7 @@ def download_hits(filename, output_path):
     """
     c = antiSMASH_file(filename)
 
-    for cs in c['SignificantHits'].keys():
+    for cs in list(c['SignificantHits'].keys()):
         locus = c['SignificantHits'][cs]['locus']
         table_genes = c['SignificantHits'][cs]['TableGenes']
 
@@ -244,25 +244,25 @@ def download_hits(filename, output_path):
                     max(table_genes['location_end'])))
 
         if os.path.isfile(filename_out):
-            print "Already downloaded %s" % filename_out
+            print("Already downloaded %s" % filename_out)
         else:
-            print "Requesting cluster_subject: %s, start: %s, end: %s" % (
+            print("Requesting cluster_subject: %s, start: %s, end: %s" % (
                     locus,
                     min(table_genes['location_start']),
-                    max(table_genes['location_end']))
+                    max(table_genes['location_end'])))
 
             content = efetch_hit(
                 term=locus,
                 seq_start=min(table_genes['location_start']),
                 seq_stop=max(table_genes['location_end']))
 
-            print "Saving %s" % filename_out
+            print("Saving %s" % filename_out)
             with open(filename_out, 'w') as f:
                 f.write(content)
 
 
-import urlparse
-import urllib2
+import urllib.parse
+import urllib.request, urllib.error, urllib.parse
 import tempfile
 import tarfile
 import os
@@ -274,10 +274,10 @@ def download_mibig(outputdir, version='1.3'):
 
     server = 'http://mibig.secondarymetabolites.org'
     filename = "mibig_gbk_%s.tar.gz" % version
-    url = urlparse.urljoin(server, filename)
+    url = urllib.parse.urljoin(server, filename)
 
     with tempfile.NamedTemporaryFile(delete=True) as f:
-        u = urllib2.urlopen(url)
+        u = urllib.request.urlopen(url)
         f.write(u.read())
         f.file.flush()
         tar = tarfile.open(f.name)
diff --git a/BioCompass/BioCompass.py.bak b/BioCompass/BioCompass.py.bak
new file mode 100644
index 0000000..4dcc3db
--- /dev/null
+++ b/BioCompass/BioCompass.py.bak
@@ -0,0 +1,353 @@
+
+import os
+import re
+import time
+import json
+from collections import OrderedDict
+import pkg_resources
+
+import pandas as pd
+from Bio import SeqIO
+from Bio import Entrez
+
+Entrez.email = "testing@ucsd.edu"
+
+
+def untag(rule):
+    return re.sub('\?P<.*?>', '', rule)
+
+def parse_antiSMASH(content):
+    """ Parse antiSMASH output
+    """
+
+    rule_table_genes = r"""
+        (?P<subject_gene> \w+ \"?) \t
+        \w+ \t
+        (?P<location_start> \d+) \t
+        (?P<location_end> \d+) \t
+        (?P<strands> [+|-]) \t
+        (?P<product> .*) \n
+        """
+
+    rule_table_blasthit = r"""
+        (?P<query_gene> \w+ )\"? \t
+        (?P<subject_gene> \w+ )\"? \t
+        (?P<identity> \d+) \t
+        (?P<blast_score> \d+) \t
+        (?P<coverage> \d+(?:\.\d+)?) \t
+        (?P<evalue> \d+\.\d+e[+|-]\d+) \t
+        \n
+        """
+
+    rule_query_cluster = r"""
+        (?P<query_gene> \w+) \s+
+        (?P<location_start> \d+) \s
+        (?P<location_end> \d+) \s
+        (?P<strands> [+|-]) \s
+        (?P<product> \w+ (?:\s \w+)?) \s* \n+
+        """
+
+    rule_detail = r"""
+        >>\n
+        (?P<id>\d+) \. \s+
+            (?P<cluster_subject> (?P<locus>\w+)_(?P<cluster>\w+)) \n
+        Source: \s+ (?P<source>.+?) \s* \n
+        Type: \s+ (?P<type>.+) \s* \n
+        Number\ of\ proteins\ with\ BLAST\ hits\ to\ this\ cluster:\ (?P<n_hits> \d+ ) \n
+        Cumulative\ BLAST\ score:\ (?P<cum_BLAST_score> \d+ )
+        \n \n
+        Table\ of\ genes,\ locations,\ strands\ and\ annotations\ of\ subject\ cluster:\n
+        (?P<TableGenes>
+            (
+            """ + untag(rule_table_genes) + r"""
+            )+
+        )
+        \n
+        Table\ of\ Blast\ hits\ \(query\ gene,\ subject\ gene,\ %identity,\ blast\ score,\ %coverage,\ e-value\): \n
+        (?P<BlastHit>
+            (\w+ \t \w+ \"? \t \d+ \t \d+ \t \d+\.\d+ \t \d+\.\d+e[+|-]\d+ \t \n)+
+        )
+        \n+
+        """
+
+    rule = r"""
+        ^
+        ClusterBlast\ scores\ for\ (?P<target>.*)\n+
+        Table\ of\ genes,\ locations,\ strands\ and\ annotations\ of\ query\ cluster:\n+
+        (?P<QueryCluster>
+            (
+            """ + untag(rule_query_cluster) + r"""
+            )+
+        )
+        \n \n+
+        Significant \  hits:\ \n
+        (?P<SignificantHits>
+          (\d+ \. \ \w+ \t .* \n+)+
+          )
+        \n \n
+        (?P<Details>
+          Details:\n\n
+          (
+            """ + untag(rule_detail) + r"""
+          )+
+        )
+        \n*
+        $
+        """
+    parsed = re.search(rule, content, re.VERBOSE).groupdict()
+
+    output = {}
+    for k in ['target', 'QueryCluster', 'SignificantHits']:
+        output[k] = parsed[k]
+
+
+    QueryCluster = OrderedDict()
+    for k in re.search(
+            rule_query_cluster, parsed['QueryCluster'],
+            re.VERBOSE).groupdict().keys():
+        QueryCluster[k] = []
+    for row in re.finditer(
+            rule_query_cluster, parsed['QueryCluster'], re.VERBOSE):
+        row = row.groupdict()
+        for k in row:
+            QueryCluster[k].append(row[k])
+    output['QueryCluster'] = QueryCluster
+
+
+    output['SignificantHits'] = OrderedDict()
+    for row in re.finditer(
+            r"""(?P<id>\d+) \. \ (?P<cluster_subject> (?P<locus>\w+)_(?P<locus_cluster>\w+)) \t (?P<description>.*) \n+""", parsed['SignificantHits'], re.VERBOSE):
+        hit = row.groupdict()
+        cs = hit['cluster_subject']
+
+        if cs not in output['SignificantHits']:
+            output['SignificantHits'][cs] = OrderedDict()
+
+        for v in ['id', 'description', 'locus', 'locus_cluster']:
+            output['SignificantHits'][cs][v] = hit[v]
+
+    for block in re.finditer(rule_detail, parsed['Details'], re.VERBOSE):
+        block = dict(block.groupdict())
+
+        content = block['TableGenes']
+        block['TableGenes'] = OrderedDict()
+        for k in re.findall('\(\?P<(.*?)>', rule_table_genes):
+            block['TableGenes'][k] = []
+        for row in re.finditer(rule_table_genes, content, re.VERBOSE):
+            row = row.groupdict()
+            for k in row:
+                block['TableGenes'][k].append(row[k])
+
+        content = block['BlastHit']
+        block['BlastHit'] = OrderedDict()
+        for k in re.findall('\(\?P<(.*?)>', rule_table_blasthit):
+            block['BlastHit'][k] = []
+        for row in re.finditer(rule_table_blasthit, content, re.VERBOSE):
+            row = row.groupdict()
+            for k in row:
+                block['BlastHit'][k].append(row[k])
+
+        for k in block:
+            output['SignificantHits'][block['cluster_subject']][k] = block[k]
+
+    return output
+
+
+def antiSMASH_to_dataFrame(content):
+    """ Extract an antiSMASH file as a pandas.DataFrame
+    """
+    parsed = parse_antiSMASH(content)
+    output = pd.DataFrame()
+    for cs in parsed['SignificantHits']:
+        clusterSubject = parsed['SignificantHits'][cs].copy()
+        df = pd.merge(
+                pd.DataFrame(clusterSubject['BlastHit']),
+                pd.DataFrame(clusterSubject['TableGenes']),
+                on='subject_gene', how='outer')
+
+        del(clusterSubject['BlastHit'])
+        del(clusterSubject['TableGenes'])
+
+        for v in clusterSubject:
+            df[v] = clusterSubject[v]
+        output = output.append(df, ignore_index=True)
+
+    return output
+
+
+class antiSMASH_file(object):
+    """ A class to handle antiSMASH file output.
+    """
+    def __init__(self, filename):
+        self.data = {}
+        self.load(filename)
+
+    def __getitem__(self, item):
+        return self.data[item]
+
+    def keys(self):
+        return self.data.keys()
+
+    def load(self, filename):
+        self.data = {}
+        with open(filename, 'r') as f:
+            parsed = parse_antiSMASH(f.read())
+            for v in parsed:
+                self.data[v] = parsed[v]
+
+
+def efetch_hit(term, seq_start, seq_stop):
+    """ Fetch the relevant part of a hit
+    """
+    db = "nucleotide"
+
+    maxtry = 3
+    ntry = -1
+    downloaded = False
+
+    while ~downloaded and (ntry <= maxtry):
+        ntry += 1
+        try:
+            handle = Entrez.esearch(db=db, term=term)
+            record = Entrez.read(handle)
+
+            assert len(record['IdList']) == 1, \
+                    "Sorry, I'm not ready to handle more than one record"
+
+            handle = Entrez.efetch(db=db, rettype="gb", retmode="text",
+                    id=record['IdList'][0],
+                    seq_start=seq_start, seq_stop=seq_stop)
+            content = handle.read()
+            downloaded = True
+        except:
+            nap = ntry*3
+            print "Fail to download (term). I'll take a nap of %s seconds ", \
+                    " and try again."
+            time.sleep(ntry*3)
+
+    return content
+
+
+def download_hits(filename, output_path):
+    """ Download the GenBank block for all hits by antiSMASH
+    """
+    c = antiSMASH_file(filename)
+
+    for cs in c['SignificantHits'].keys():
+        locus = c['SignificantHits'][cs]['locus']
+        table_genes = c['SignificantHits'][cs]['TableGenes']
+
+        filename_out = os.path.join(
+                output_path,
+                "%s_%s-%s.gbk" % (locus,
+                    min(table_genes['location_start']),
+                    max(table_genes['location_end'])))
+
+        if os.path.isfile(filename_out):
+            print "Already downloaded %s" % filename_out
+        else:
+            print "Requesting cluster_subject: %s, start: %s, end: %s" % (
+                    locus,
+                    min(table_genes['location_start']),
+                    max(table_genes['location_end']))
+
+            content = efetch_hit(
+                term=locus,
+                seq_start=min(table_genes['location_start']),
+                seq_stop=max(table_genes['location_end']))
+
+            print "Saving %s" % filename_out
+            with open(filename_out, 'w') as f:
+                f.write(content)
+
+
+import urlparse
+import urllib2
+import tempfile
+import tarfile
+import os
+def download_mibig(outputdir, version='1.3'):
+    """ Download and extract MIBiG files into outputdir
+    """
+    assert version in ['1.0', '1.1', '1.2', '1.3'], \
+            "Invalid version of MIBiG"
+
+    server = 'http://mibig.secondarymetabolites.org'
+    filename = "mibig_gbk_%s.tar.gz" % version
+    url = urlparse.urljoin(server, filename)
+
+    with tempfile.NamedTemporaryFile(delete=True) as f:
+        u = urllib2.urlopen(url)
+        f.write(u.read())
+        f.file.flush()
+        tar = tarfile.open(f.name)
+        tar.extractall(path=outputdir)
+        tar.close()
+
+    # MIBiG was packed with strange files ._*gbk. Let's remove it
+    for f in [f for f in os.listdir(outputdir) if f[:2] == '._']:
+        os.remove(os.path.join(outputdir, f))
+
+#def gbk2tablegen(gb_file, strain_id=None):
+#def cds_from_gbk(gb_file, strain_id=None):
+def cds_from_gbk(gb_file):
+    gb_record = SeqIO.read(open(gb_file,"rU"), "genbank")
+
+    #if strain_id is not None:
+    #    gb_record.id = strain_id
+
+    output = pd.DataFrame()
+    sign = lambda x: '+' if x > 0 else '-'
+    for feature in gb_record.features:
+        if feature.type == "CDS":
+            tmp = {}
+            tmp = {'BGC': gb_record.id,
+                    'locus_tag': feature.qualifiers['locus_tag'][0],
+                    'start': feature.location.start.position,
+                    'stop': feature.location.end.position,
+                    'strand': sign(feature.location.strand) }
+            if 'note' in feature.qualifiers:
+                for note in feature.qualifiers['note']:
+                    product = re.search( r"""smCOG: \s (?P<product>.*?) \s+ \(Score: \s* (?P<score>.*); \s* E-value: \s (?P<e_value>.*?)\);""", note, re.VERBOSE)
+                    if product is not None:
+                        product = product.groupdict()
+                        product['score'] = float(product['score'])
+                        product['e_value'] = float(product['e_value'])
+                        for p in product:
+                            tmp[p] = product[p]
+            output = output.append(pd.Series(tmp), ignore_index=True)
+    return output
+
+
+def find_category_from_product(df):
+    subcluster = json.loads(
+            pkg_resources.resource_string(
+                __name__, 'subcluster_dictionary.json'))
+    def get_category(product):
+        for s in subcluster:
+            if re.search(s, product):
+                return subcluster[s]
+        return 'hypothetical'
+
+    idx = df['product'].notnull()
+    df['category'] = df.loc[idx, 'product'].apply(get_category)
+    df['category'].fillna('hypothetical', inplace=True)
+    return df
+
+
+def get_hits(filename, criteria='cum_BLAST_score'):
+    """
+
+        Reproduces original Tiago's code: table_1_extender.py
+
+        In the future allow different criteria. Right now it takes
+          from the very first block, which has the highest Cumulative
+          BLAST.
+    """
+    with open(filename) as f:
+        df = antiSMASH_to_dataFrame(f.read())
+
+    df.dropna(subset=['query_gene'], inplace=True)
+    df.sort_values(by=criteria, ascending=False, na_position='last',
+            inplace=True)
+    return df.groupby('query_gene', as_index=False).first()
diff --git a/BioCompass/cli.py b/BioCompass/cli.py
index df4db02..02a28d5 100644
--- a/BioCompass/cli.py
+++ b/BioCompass/cli.py
@@ -21,7 +21,7 @@ def downloadHits(mgbfile, outputdir):
 @main.command(name="download-MIBiG")
 @click.option('--outputdir', default='./', type=click.Path(exists=True),
         help="Path to save the MIBig genbank files.")
-@click.option('--version', type=unicode, default='1.3',
+@click.option('--version', type=str, default='1.3',
         help="Version of MIBiG to download.")
 def downloadMIBiG(outputdir, version):
     """Download MIBiG gbk database."""
diff --git a/BioCompass/cli.py.bak b/BioCompass/cli.py.bak
new file mode 100644
index 0000000..df4db02
--- /dev/null
+++ b/BioCompass/cli.py.bak
@@ -0,0 +1,31 @@
+# -*- coding: utf-8 -*-
+
+import click
+
+from .BioCompass import download_hits
+from .BioCompass import download_mibig
+
+@click.group()
+def main():
+    pass
+
+@main.command(name="download-hits")
+@click.option('--outputdir', default='./', type=click.Path(exists=True),
+        help="Path to save the NCBI clusters.")
+@click.argument('mgbfile', type=click.Path(exists=True))
+        #help="Multigeneblast file containing NCBI references to be downloaded.")
+def downloadHits(mgbfile, outputdir):
+    """Download NCBI clusters listed t in multigeneblast file."""
+    download_hits(mgbfile, outputdir)
+
+@main.command(name="download-MIBiG")
+@click.option('--outputdir', default='./', type=click.Path(exists=True),
+        help="Path to save the MIBig genbank files.")
+@click.option('--version', type=unicode, default='1.3',
+        help="Version of MIBiG to download.")
+def downloadMIBiG(outputdir, version):
+    """Download MIBiG gbk database."""
+    download_mibig(outputdir, version=version)
+
+if __name__ == "__main__":
+    main()
diff --git a/BioCompass/edges_gen.py b/BioCompass/edges_gen.py
index e1c3e78..3d05ea7 100644
--- a/BioCompass/edges_gen.py
+++ b/BioCompass/edges_gen.py
@@ -21,7 +21,7 @@
 for index, row in table2_df.iterrows():
     file_name = './%s_%s_%s/clusterblast_output.txt'%(table2_df.BGC.loc[index],itineration,table2_df.subcluster.loc[index])
     if os.path.isfile(file_name):
-        with open(file_name).xreadlines() as f:
+        with open(file_name) as f:
             for line in f:
                 hit = re.search(r'^(\d*). (\S*)_(\S*)$',line)
                 matching_genes = re.search(r'^Number of proteins with BLAST hits to this cluster: (\d*)$', line)
diff --git a/BioCompass/edges_gen.py.bak b/BioCompass/edges_gen.py.bak
new file mode 100644
index 0000000..e1c3e78
--- /dev/null
+++ b/BioCompass/edges_gen.py.bak
@@ -0,0 +1,63 @@
+import pandas as pd
+import re
+import os.path
+import itertools
+from sys import argv
+
+script, cluster_name, itineration = argv
+
+table2_df = pd.read_csv('../tables/%s_table2_%s.csv'%(cluster_name,itineration), sep='\t')
+
+col1 = []
+col2 = []
+col3 = []
+col4 = []
+col5 = []
+col6 = []
+col7 = []
+col8 = []
+col9 = []
+
+for index, row in table2_df.iterrows():
+    file_name = './%s_%s_%s/clusterblast_output.txt'%(table2_df.BGC.loc[index],itineration,table2_df.subcluster.loc[index])
+    if os.path.isfile(file_name):
+        with open(file_name).xreadlines() as f:
+            for line in f:
+                hit = re.search(r'^(\d*). (\S*)_(\S*)$',line)
+                matching_genes = re.search(r'^Number of proteins with BLAST hits to this cluster: (\d*)$', line)
+                mgb_score = re.search(r'^MultiGeneBlast score: (\d*).(\d*)$', line)
+                blast_score = re.search(r'^Cumulative Blast bit score: (\d*)$',line)
+                if hit != None:
+                    col1.append('%s'%table2_df.BGC.loc[index])
+                    col2.append('%s'%table2_df.subcluster.loc[index])
+                    col3.append('%s'%table2_df.category.loc[index])
+                    col4.append('%s'%table2_df.CDSs.loc[index])
+                    col5.append(hit.group(2))
+                    col9.append('%s'%itineration)
+                if mgb_score != None:
+                    col6.append('%s.%s'%mgb_score.group(1,2))
+                if blast_score != None:
+                    col7.append(blast_score.group(1))
+                if matching_genes != None:
+                    perc_matching = int(matching_genes.group(1))*100/(table2_df.CDSs.loc[index])
+                    col8.append(perc_matching)
+                
+    else:
+        col1.append('%s'%table2_df.BGC.loc[index])
+        col2.append('%s'%table2_df.subcluster.loc[index])
+        col3.append('%s'%table2_df.category.loc[index])
+        col4.append('%s'%table2_df.CDSs.loc[index])
+        col5.append('%s'%table2_df.BGC.loc[index])
+        col6.append('NA')
+        col7.append('NA')
+        col8.append('NA')
+        col9.append('%s'%itineration)
+        
+frames = {'BGC':col1, 'BGC_subcluster':col2, 'BGC_subcluster_category':col3, 'BGC_subcluster_genes':col4, 'BLAST_hit':col5, 'MultiGeneBlast_score':col6, 'BLAST_score':col7, 'percent_matching_genes':col8, 'BGC_iteration':col9}
+
+edges_df = pd.DataFrame(frames)
+
+edges_handle = open('%s_edges_%s.txt'%(cluster_name,itineration), "w")
+edges_df.to_csv(edges_handle, sep='\t', index=False)
+edges_handle.close()
+
diff --git a/BioCompass/filter_edges.py b/BioCompass/filter_edges.py
index 42122fe..200976b 100644
--- a/BioCompass/filter_edges.py
+++ b/BioCompass/filter_edges.py
@@ -9,9 +9,9 @@
 
 while True:
     try:
-        per_genes = int(raw_input("\n What minimum percentage of matching genes would you like [from 0 to 100]? "))
-        blast_score = int(raw_input("\n What minimum cumulative BLAST score would you like [from %s to %s]? "%(edges_final.BLAST_score.min(),edges_final.BLAST_score.max())))
-        mgb_score = int(raw_input("\n What minimum multigeneBLAST score [from %s to %s]? "%(edges_final.MultiGeneBlast_score.min(),edges_final.MultiGeneBlast_score.max())))
+        per_genes = int(input("\n What minimum percentage of matching genes would you like [from 0 to 100]? "))
+        blast_score = int(input("\n What minimum cumulative BLAST score would you like [from %s to %s]? "%(edges_final.BLAST_score.min(),edges_final.BLAST_score.max())))
+        mgb_score = int(input("\n What minimum multigeneBLAST score [from %s to %s]? "%(edges_final.MultiGeneBlast_score.min(),edges_final.MultiGeneBlast_score.max())))
         break
     except ValueError:
         print("Sorry, I didn't understand that. Try again using INTERGERS ONLY.")
@@ -40,7 +40,7 @@
 edges_final = edges_final.drop(edges_final.index[indexes_to_drop])
             
 while True:
-    self_loop = raw_input("\n Would you like to include subclusters with no matches?[Yes or No only!] ")
+    self_loop = input("\n Would you like to include subclusters with no matches?[Yes or No only!] ")
     if self_loop == "Yes" or self_loop == "yes":
         edges_final.MultiGeneBlast_score.replace(to_replace='NaN', value=mgb_score, inplace=True)
         edges_final.BLAST_score.replace(to_replace='NaN', value=blast_score, inplace=True)
diff --git a/BioCompass/filter_edges.py.bak b/BioCompass/filter_edges.py.bak
new file mode 100644
index 0000000..42122fe
--- /dev/null
+++ b/BioCompass/filter_edges.py.bak
@@ -0,0 +1,57 @@
+import pandas as pd
+import itertools
+from sys import argv
+
+script, strain_name = argv
+
+edges_final = pd.read_csv('%s_edges_best_itineration.txt'%strain_name, sep='\t')
+
+
+while True:
+    try:
+        per_genes = int(raw_input("\n What minimum percentage of matching genes would you like [from 0 to 100]? "))
+        blast_score = int(raw_input("\n What minimum cumulative BLAST score would you like [from %s to %s]? "%(edges_final.BLAST_score.min(),edges_final.BLAST_score.max())))
+        mgb_score = int(raw_input("\n What minimum multigeneBLAST score [from %s to %s]? "%(edges_final.MultiGeneBlast_score.min(),edges_final.MultiGeneBlast_score.max())))
+        break
+    except ValueError:
+        print("Sorry, I didn't understand that. Try again using INTERGERS ONLY.")
+        continue
+    else:
+        break
+        
+seen = []
+indexes_to_drop = []
+        
+for index,row in edges_final.iterrows():
+    query = '%s_%s'%(edges_final.BGC.loc[index],edges_final.BGC_subcluster.loc[index])
+    if edges_final.percent_matching_genes.loc[index] >= per_genes and edges_final.BLAST_score.loc[index] >= blast_score and edges_final.MultiGeneBlast_score.loc[index] >= mgb_score:
+        seen.append(query)
+        continue
+    else:
+        if query in seen:
+            indexes_to_drop.append(index)
+        else:
+            seen.append(query)
+            edges_final.set_value(index,'BLAST_hit','%s'%edges_final.BGC.loc[index])
+            edges_final.set_value(index,'percent_matching_genes',None)
+            edges_final.set_value(index,'BLAST_score',None)
+            edges_final.set_value(index,'MultiGeneBlast_score',None)
+
+edges_final = edges_final.drop(edges_final.index[indexes_to_drop])
+            
+while True:
+    self_loop = raw_input("\n Would you like to include subclusters with no matches?[Yes or No only!] ")
+    if self_loop == "Yes" or self_loop == "yes":
+        edges_final.MultiGeneBlast_score.replace(to_replace='NaN', value=mgb_score, inplace=True)
+        edges_final.BLAST_score.replace(to_replace='NaN', value=blast_score, inplace=True)
+        edges_final.percent_matching_genes.replace(to_replace='NaN', value=100, inplace=True)
+        break
+    elif self_loop == "No" or self_loop == "no":
+        break
+    else:
+        print("Sorry, answer Yes or No only")
+        continue
+
+edges_final_handle = open('%s_edges_filtered.txt'%strain_name, "w")
+edges_final.to_csv(edges_final_handle, sep='\t', index=False)
+edges_final_handle.close()
\ No newline at end of file
diff --git a/BioCompass/subcluster_gen.py b/BioCompass/subcluster_gen.py
index 1ba90fa..5971273 100644
--- a/BioCompass/subcluster_gen.py
+++ b/BioCompass/subcluster_gen.py
@@ -67,7 +67,7 @@ def parse_db(db):
     D = defaultdict(list)
     for i,item in enumerate(db):
         D[item].append(i)
-    D = {k:v for k,v in D.items() if len(v)>1}
+    D = {k:v for k,v in list(D.items()) if len(v)>1}
     return D
 
 def find_category(categories,col5):
@@ -97,9 +97,9 @@ def find_category(categories,col5):
         col3 = []
         col4 = []
         col5 = []
-        for key, value in subcluster_dict.iteritems():
+        for key, value in subcluster_dict.items():
             col1.append(strain_name)
-            col2.append(string.ascii_uppercase[subcluster_dict.keys().index(key)])
+            col2.append(string.ascii_uppercase[list(subcluster_dict.keys()).index(key)])
             col3.append(len(value))
             categories = []
             genes = []
diff --git a/BioCompass/subcluster_gen.py.bak b/BioCompass/subcluster_gen.py.bak
new file mode 100644
index 0000000..1ba90fa
--- /dev/null
+++ b/BioCompass/subcluster_gen.py.bak
@@ -0,0 +1,115 @@
+from collections import defaultdict
+from sys import argv
+import pandas as pd
+import re
+import string
+import numpy as np
+from sklearn.cluster import DBSCAN
+from collections import Counter
+
+script, strain_name = argv
+
+table1_df = pd.read_csv('%s_table1.csv' % strain_name, sep='\t')
+table1_df['product'].fillna('None', inplace=True)
+
+#This first portion will create the distance matrix
+
+def score_match(table, index_gene1, index_gene2):
+    score = 0
+    gene1_category = table1_df.category.loc[index_gene1]
+    gene2_category = table1_df.category.loc[index_gene2]
+    if  gene1_category == 'hypothetical' or gene2_category == 'hypothetical':
+        if gene1_category == gene2_category:
+            score = score + 1
+        elif gene1_category != gene2_category:
+            score = score + 2
+    else:
+        if gene1_category == gene2_category:
+            score = score + 5
+    gene1_best_BGC = table1_df.best_hit_BGC.loc[index_gene1]
+    gene2_best_BGC = table1_df.best_hit_BGC.loc[index_gene2]
+    if gene1_best_BGC != 'None' and gene2_best_BGC != 'None':
+        if gene1_best_BGC == gene2_best_BGC:
+            score = score + 2
+            gene1_best_hit_pos = re.search(r'^\D*([0-9]*)',table1_df.best_hit_gene_loc.loc[index_gene1])
+            gene2_best_hit_pos = re.search(r'^\D*([0-9]*)',table1_df.best_hit_gene_loc.loc[index_gene2])
+            dif_best_hit_pos = abs(abs((int(gene2_best_hit_pos.group(1)) - int(gene1_best_hit_pos.group(1)))) - abs((index_gene2-index_gene1)))
+            if dif_best_hit_pos == 0:
+                score = score + 3
+            elif dif_best_hit_pos == 1:
+                score = score + 2
+            elif dif_best_hit_pos == 2:
+                score = score + 1
+    else:
+        score = score + 1
+    return score
+
+for index,row in table1_df.iterrows():
+    scores = []
+    for gene in range(0,len(table1_df)):
+        scores.append(score_match(table1_df,gene,index))
+    if index == 0:
+        A = np.vstack([scores])
+    else:
+        A = np.vstack([A,scores])
+
+#This second portion will run dbscan to create a subclusters possibilities
+
+def repeated(db_arrays,db):
+    for i in range(0,len(db_arrays)-1):
+        if np.array_equal(db_arrays[i],db) == False:
+            continue
+        else:
+            return True
+            break
+
+def parse_db(db):
+    D = defaultdict(list)
+    for i,item in enumerate(db):
+        D[item].append(i)
+    D = {k:v for k,v in D.items() if len(v)>1}
+    return D
+
+def find_category(categories,col5):
+    if 'biosynthetic' in categories:
+        col5.append('biosynthetic')
+    else:
+        if len(categories) > 1:
+            category = re.search(r'^\[\(\'(\S*)\'',str(Counter(categories).most_common(1))).group(1)
+            col5.append('%s'%category)
+        else:
+            col5.append('%s'%categories[0])
+    
+count = 0
+    
+for itn in range(1,len(A)):
+    db = DBSCAN(eps=itn, min_samples=2).fit_predict(A)
+    if itn == 1:
+        db_arrays = np.vstack([db])
+    else:
+        db_arrays = np.vstack([db_arrays,db])
+    if repeated(db_arrays,db) == True:
+        continue
+    else:
+        subcluster_dict = parse_db(db)
+        col1 = []
+        col2 = []
+        col3 = []
+        col4 = []
+        col5 = []
+        for key, value in subcluster_dict.iteritems():
+            col1.append(strain_name)
+            col2.append(string.ascii_uppercase[subcluster_dict.keys().index(key)])
+            col3.append(len(value))
+            categories = []
+            genes = []
+            for item in value:
+                categories.append(table1_df.category.loc[item])
+                genes.append(table1_df.locus_tag.loc[item])
+            genes = ','.join(genes)
+            col4.append(genes)
+            find_category(categories,col5)
+        frames = {'BGC':col1, 'subcluster':col2, 'CDSs':col3, 'loci':col4, 'category':col5}
+        count = count + 1
+        table2_df = pd.DataFrame(frames, index=None)
+        table2_df.to_csv('%s_table2_%d.csv' % (strain_name,count), sep='\t', index=False)
diff --git a/BioCompass/table_1_extender.py b/BioCompass/table_1_extender.py
index ef12199..13613b7 100644
--- a/BioCompass/table_1_extender.py
+++ b/BioCompass/table_1_extender.py
@@ -10,13 +10,13 @@
 table1_df = pd.read_csv('../outputs/tables/%s_%s_table1.csv'%(strain_name,cluster_number), sep='\t')
 
 def find_boarders(file_name):
-    with open(file_name).xreadlines() as f:
+    with open(file_name) as f:
         for num, line in enumerate(f):
             header = re.search(r'^(\d*). (\S*)_(\S*)$',line)
             if header != None and num not in numbers:
                 numbers.append(num)
                 headers.append(header.group())
-    temp_dict = dict(zip(numbers, headers))
+    temp_dict = dict(list(zip(numbers, headers)))
     return temp_dict
                 
 def itinerate_temp_file(file_name,temp_dict):
@@ -38,7 +38,7 @@ def find_best_hits(table1_df):
     """ For each ctg1_* @ table1_df['locus_tag'], get
     """
     for item in table1_df['locus_tag']:
-        with open("temp.txt", "r").xreadlines() as tf:
+        with open("temp.txt", "r") as tf:
             for line in tf:
                 # query gene, subject gene, %identity, blast score, %coverage, e-value
                 # e.g line <- 'ctg1_160\tSCATT_35940\t69\t169\t100.0\t2.3e-40\t\n'
@@ -70,11 +70,11 @@ def find_best_hits(table1_df):
     col9 = []
     col10 = []
     col11 = []
-    it = iter(sorted(temp_dict.iteritems()))
-    i = it.next()
+    it = iter(sorted(temp_dict.items()))
+    i = next(it)
     # e.g. i <- (139, '1. CP003219_c13')
     if len(temp_dict) > 1:
-        j = it.next()
+        j = next(it)
     while True:
         if i[0] == sorted(temp_dict)[-1]:
             itinerate_temp_file(file_name,temp_dict)
@@ -85,7 +85,7 @@ def find_best_hits(table1_df):
             find_best_hits(table1_df)
             i = j
             if j[0] != sorted(temp_dict)[-1]:
-                j = it.next()
+                j = next(it)
     """
     ctg1_160        SCATT_35940     69      169     100.0   2.3e-40
     hence:
@@ -94,7 +94,7 @@ def find_best_hits(table1_df):
     """
     col12 = []
     for item in col9:
-        with open(file_name, "r").xreadlines() as tf:
+        with open(file_name, "r") as tf:
             seen = []
             for line in tf:
                 best_hit_loc = re.search(r'^%s\t(\S*)\t(.*)'%item,line)
diff --git a/BioCompass/table_1_extender.py.bak b/BioCompass/table_1_extender.py.bak
new file mode 100644
index 0000000..ef12199
--- /dev/null
+++ b/BioCompass/table_1_extender.py.bak
@@ -0,0 +1,126 @@
+from sys import argv
+from Bio import SeqIO
+import pandas as pd
+import re
+import itertools
+import os.path
+
+script, strain_name, cluster_number = argv
+
+table1_df = pd.read_csv('../outputs/tables/%s_%s_table1.csv'%(strain_name,cluster_number), sep='\t')
+
+def find_boarders(file_name):
+    with open(file_name).xreadlines() as f:
+        for num, line in enumerate(f):
+            header = re.search(r'^(\d*). (\S*)_(\S*)$',line)
+            if header != None and num not in numbers:
+                numbers.append(num)
+                headers.append(header.group())
+    temp_dict = dict(zip(numbers, headers))
+    return temp_dict
+                
+def itinerate_temp_file(file_name,temp_dict):
+    """ Extract from file_name a section defined by i into temp.txt"""
+    with open(file_name) as fp:
+        temp_file = open("temp.txt", "w")
+        for num, line in enumerate(fp):
+            # Is not the last one
+            if i[0] != sorted(temp_dict)[-1]:
+                if num >= i[0] and num < j[0]:
+                    temp_file.writelines(line)
+            else:
+                if num >= i[0]:
+                    temp_file.writelines(line)
+        temp_file.close()
+
+    
+def find_best_hits(table1_df):
+    """ For each ctg1_* @ table1_df['locus_tag'], get
+    """
+    for item in table1_df['locus_tag']:
+        with open("temp.txt", "r").xreadlines() as tf:
+            for line in tf:
+                # query gene, subject gene, %identity, blast score, %coverage, e-value
+                # e.g line <- 'ctg1_160\tSCATT_35940\t69\t169\t100.0\t2.3e-40\t\n'
+                best_hit = re.search(r'^%s\t(\S*)\t(\d*)\t(\d*)\t(\d*)'%item,line)
+                if best_hit and item not in col7:
+                    # e.g. ctg1_160
+                    col7.append(item)
+                    # e.g i[1] <- '1. CP003219_c13'
+                    hit = re.search(r'^(\d*). (\S*)_(\S*)',i[1])
+                    # e.g. 'CP003219'
+                    col8.append(hit.group(2))
+                    # e.g. 'SCATT_35940'
+                    col9.append(best_hit.group(1))
+                    # e.g. '69'
+                    col10.append(best_hit.group(2))
+                    # e.g. '100'
+                    col11.append(best_hit.group(4))
+
+#find boarders for temp_file
+short_cluster_number = re.search(r'0*([0-9]*)',cluster_number).group(1)
+file_name = '../antiSMASH_input/%s/clusterblast/cluster%s.txt' % (strain_name,short_cluster_number)
+numbers = []
+headers = []
+temp_dict = []
+if os.path.isfile(file_name):
+    temp_dict = find_boarders(file_name)
+    col7 = []
+    col8 = []
+    col9 = []
+    col10 = []
+    col11 = []
+    it = iter(sorted(temp_dict.iteritems()))
+    i = it.next()
+    # e.g. i <- (139, '1. CP003219_c13')
+    if len(temp_dict) > 1:
+        j = it.next()
+    while True:
+        if i[0] == sorted(temp_dict)[-1]:
+            itinerate_temp_file(file_name,temp_dict)
+            find_best_hits(table1_df)
+            break
+        else:
+            itinerate_temp_file(file_name,temp_dict)
+            find_best_hits(table1_df)
+            i = j
+            if j[0] != sorted(temp_dict)[-1]:
+                j = it.next()
+    """
+    ctg1_160        SCATT_35940     69      169     100.0   2.3e-40
+    hence:
+    best_hit_loc ->
+      SCATT_35940     AEW95965        3878018 3878386 +       50S_ribosomal_protein_L14
+    """
+    col12 = []
+    for item in col9:
+        with open(file_name, "r").xreadlines() as tf:
+            seen = []
+            for line in tf:
+                best_hit_loc = re.search(r'^%s\t(\S*)\t(.*)'%item,line)
+                if best_hit_loc and item not in seen:
+                    col12.append(best_hit_loc.group(1))
+                    seen.append(item)
+    frames = {'locus_tag':col7,'best_hit_BGC':col8,'best_hit_gene':col9,'best_hit_%id':col10,'best_hit_%cov':col11,'best_hit_gene_loc':col12}
+    new_cols_df = pd.DataFrame(frames, index=None)
+    table1_df = pd.merge(table1_df, new_cols_df, on='locus_tag', how='outer')
+    table1_df.fillna('None', inplace=True)   
+else:
+    col7 = []
+    col8 = []
+    for item in table1_df['locus_tag']:
+        col7.append(item)
+        col8.append('None')
+    frames = {'locus_tag':col7,'best_hit_BGC':col8}
+    new_cols_df = pd.DataFrame(frames, index=None)
+    table1_df = pd.merge(table1_df, new_cols_df, on='locus_tag', how='outer')
+
+
+table1_handle = pd.HDFStore('../outputs/tables/table1.h5')
+table1_handle['%s_%s' % (strain_name,cluster_number)] = table1_df
+table1_handle.close()
+
+
+table1_handle = open('../outputs/tables/%s_%s_table1.csv'%(strain_name,cluster_number), "w")
+table1_df.to_csv(table1_handle, sep='\t', index=False)
+table1_handle.close()