From d1ddd83bd9ed0f84d7649acf01ef6f3b04ac41d1 Mon Sep 17 00:00:00 2001
From: "Ian M. B." <99409346+iPsychonaut@users.noreply.github.com>
Date: Thu, 13 Feb 2025 15:17:02 -0700
Subject: [PATCH 1/2] Create setup.py

Initial generation for python 3 setup
---
 setup.py | 38 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)
 create mode 100644 setup.py

diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..91f8ddd
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,38 @@
+from setuptools import setup, find_packages
+import os
+
+# Read the long description from the README file.
+this_directory = os.path.abspath(os.path.dirname(__file__))
+with open(os.path.join(this_directory, "README.md"), encoding="utf-8") as fh:
+    long_description = fh.read()
+
+setup(
+    name="GFFtoolsGX",
+    version="0.0.1",
+    description="A collection of tools for converting genome annotation between GTF, BED, GenBank and GFF.",
+    long_description=long_description,
+    long_description_content_type="text/markdown",
+    author="Vipin T Sreedharan",
+    author_email="vipin@cbio.mskcc.org",
+    url="https://github.com/iPsychonaut/GFFtools-GX",
+    packages=find_packages(),
+    install_requires=[
+        "biopython",
+    ],
+    entry_points={
+        "console_scripts": [
+            "gff_parser=GFFtoolsGX.GFFParser:main",
+            "bed_to_gff=GFFtoolsGX.bed_to_gff:main",
+            "gbk_to_gff=GFFtoolsGX.gbk_to_gff:main",
+            "gff_to_bed=GFFtoolsGX.gff_to_bed:main",
+            "gff_to_gtf=GFFtoolsGX.gff_to_gtf:main",
+            "gtf_to_gff=GFFtoolsGX.gtf_to_gff:main",
+        ]
+    },
+    classifiers=[
+        "Programming Language :: Python :: 3",
+        "License :: OSI Approved :: BSD License",
+        "Operating System :: OS Independent",
+    ],
+    python_requires=">=3.6",
+)

From da582942318e4d06155d1d5dac845468c2c5f0fa Mon Sep 17 00:00:00 2001
From: iPsychonaut <ian.michael.bollinger@gmail.com>
Date: Thu, 13 Feb 2025 15:25:49 -0700
Subject: [PATCH 2/2] Update code for Python 3 compatibility

---
 GFFParser.py      |  16 +-
 GFFParser.py.bak  | 498 ++++++++++++++++++++++++++++++++++++++++++++++
 bed_to_gff.py     |   2 +-
 bed_to_gff.py.bak |  71 +++++++
 gbk_to_gff.py     |   8 +-
 gbk_to_gff.py.bak | 216 ++++++++++++++++++++
 gff_to_bed.py     |   6 +-
 gff_to_bed.py.bak | 116 +++++++++++
 gff_to_gtf.py     |   2 +-
 gff_to_gtf.py.bak |  79 ++++++++
 gtf_to_gff.py     |   2 +-
 gtf_to_gff.py.bak |  78 ++++++++
 12 files changed, 1076 insertions(+), 18 deletions(-)
 create mode 100644 GFFParser.py.bak
 create mode 100644 bed_to_gff.py.bak
 create mode 100644 gbk_to_gff.py.bak
 create mode 100644 gff_to_bed.py.bak
 create mode 100644 gff_to_gtf.py.bak
 create mode 100644 gtf_to_gff.py.bak

diff --git a/GFFParser.py b/GFFParser.py
index f85b274..78b40e7 100644
--- a/GFFParser.py
+++ b/GFFParser.py
@@ -14,7 +14,7 @@
 import re
 import os
 import sys
-import urllib
+import urllib.request, urllib.parse, urllib.error
 import numpy as np
 import helper as utils 
 from collections import defaultdict
@@ -62,7 +62,7 @@ def attribute_tags(col9):
         # replace the double qoutes from feature identifier 
         val = re.sub('"', '', val)
         # replace the web formating place holders to plain text format 
-        info[key].extend([urllib.unquote(v) for v in val.split(',') if v])
+        info[key].extend([urllib.parse.unquote(v) for v in val.split(',') if v])
 
     return is_gff, info
                 
@@ -87,7 +87,7 @@ def spec_features_keywd(gff_parts):
             pass
     ## TODO key words
     for flat_name in ["Transcript", "CDS"]:
-        if gff_parts["info"].has_key(flat_name):
+        if flat_name in gff_parts["info"]:
             # parents
             if gff_parts['type'] in [flat_name] or re.search(r'transcript', gff_parts['type'], re.IGNORECASE):
                 if not gff_parts['id']:
@@ -158,7 +158,7 @@ def Parse(ga_file):
                 gff_info = spec_features_keywd(gff_info)
             
             # link the feature relationships
-            if gff_info['info'].has_key('Parent'): 
+            if 'Parent' in gff_info['info']: 
                 for p in gff_info['info']['Parent']:
                     if p == gff_info['id']:
                         gff_info['id'] = ''
@@ -217,7 +217,7 @@ def format_gene_models(parent_nf_map, child_nf_map):
     g_cnt = 0 
     gene = np.zeros((len(parent_nf_map),), dtype = utils.init_gene())
 
-    for pkey, pdet in parent_nf_map.items():
+    for pkey, pdet in list(parent_nf_map.items()):
         # considering only gene features 
         #if not re.search(r'gene', pdet.get('type', '')):
         #    continue 
@@ -284,7 +284,7 @@ def format_gene_models(parent_nf_map, child_nf_map):
             
             # make general ascending order of coordinates 
             if orient == '-':
-                for etype, excod in child_feat.items():
+                for etype, excod in list(child_feat.items()):
                     if len(excod) > 1:
                         if excod[0][0] > excod[-1][0]:
                             excod.reverse()
@@ -412,7 +412,7 @@ def format_gene_models(parent_nf_map, child_nf_map):
             break
     
     if XPFLG==1:
-        XQC = range(XP, len(gene)+1)
+        XQC = list(range(XP, len(gene)+1))
         gene = np.delete(gene, XQC)
 
     return gene 
@@ -445,7 +445,7 @@ def create_missing_feature_type(p_feat, c_feat):
     """
 
     child_n_map = defaultdict(list)
-    for fid, det in c_feat.items():
+    for fid, det in list(c_feat.items()):
         # get the details from grand child  
         GID = STRD = SCR = None
         SPOS, EPOS = [], [] 
diff --git a/GFFParser.py.bak b/GFFParser.py.bak
new file mode 100644
index 0000000..f85b274
--- /dev/null
+++ b/GFFParser.py.bak
@@ -0,0 +1,498 @@
+#!/usr/bin/env python
+"""
+Extract genome annotation from a GFF (a tab delimited format for storing sequence features and annotations) file.
+
+Requirements: 
+    Numpy :- http://numpy.org/ 
+
+Copyright (C)	
+
+2009-2012 Friedrich Miescher Laboratory of the Max Planck Society, Tubingen, Germany. 
+2012-2015 Memorial Sloan Kettering Cancer Center, New York City, USA.
+"""
+
+import re
+import os
+import sys
+import urllib
+import numpy as np
+import helper as utils 
+from collections import defaultdict
+
+def attribute_tags(col9):
+    """ 
+    Split the key-value tags from the attribute column, it takes column number 9 from GTF/GFF file 
+
+    @args col9: attribute column from GFF file 
+    @type col9: str
+    """
+    info = defaultdict(list)
+    is_gff = False
+    
+    if not col9:
+        return is_gff, info
+        
+    # trim the line ending semi-colon  ucsc may have some white-space  
+    col9 = col9.rstrip(';| ')
+    # attributes from 9th column 
+    atbs = col9.split(" ; ")
+    if len(atbs) == 1:
+        atbs = col9.split("; ")
+        if len(atbs) == 1:
+            atbs = col9.split(";")
+    # check the GFF3 pattern which has key value pairs like:
+    gff3_pat = re.compile("\w+=")
+    # sometime GTF have: gene_id uc002zkg.1;
+    gtf_pat = re.compile("\s?\w+\s")
+
+    key_vals = []
+
+    if gff3_pat.match(atbs[0]): # gff3 pattern 
+        is_gff = True
+        key_vals = [at.split('=') for at in atbs]
+    elif gtf_pat.match(atbs[0]): # gtf pattern
+        for at in atbs:
+            key_vals.append(at.strip().split(" ",1))
+    else:
+        # to handle attribute column has only single value 
+        key_vals.append(['ID', atbs[0]])
+    # get key, val items 
+    for item in key_vals:
+        key, val = item
+        # replace the double qoutes from feature identifier 
+        val = re.sub('"', '', val)
+        # replace the web formating place holders to plain text format 
+        info[key].extend([urllib.unquote(v) for v in val.split(',') if v])
+
+    return is_gff, info
+                
+def spec_features_keywd(gff_parts):
+    """
+    Specify the feature key word according to the GFF specifications
+
+    @args gff_parts: attribute field key 
+    @type gff_parts: str 
+    """
+    for t_id in ["transcript_id", "transcriptId", "proteinId"]:
+        try:
+            gff_parts["info"]["Parent"] = gff_parts["info"][t_id]
+            break
+        except KeyError:
+            pass
+    for g_id in ["gene_id", "geneid", "geneId", "name", "gene_name", "genename"]:
+        try:
+            gff_parts["info"]["GParent"] = gff_parts["info"][g_id]
+            break
+        except KeyError:
+            pass
+    ## TODO key words
+    for flat_name in ["Transcript", "CDS"]:
+        if gff_parts["info"].has_key(flat_name):
+            # parents
+            if gff_parts['type'] in [flat_name] or re.search(r'transcript', gff_parts['type'], re.IGNORECASE):
+                if not gff_parts['id']:
+                    gff_parts['id'] = gff_parts['info'][flat_name][0]
+                    #gff_parts["info"]["ID"] = [gff_parts["id"]]
+            # children 
+            elif gff_parts["type"] in ["intron", "exon", "three_prime_UTR",
+                        "coding_exon", "five_prime_UTR", "CDS", "stop_codon",
+                        "start_codon"]:
+                gff_parts["info"]["Parent"] = gff_parts["info"][flat_name]
+            break
+    return gff_parts
+
+def Parse(ga_file):
+    """
+    Parsing GFF/GTF file based on feature relationship, it takes the input file.
+
+    @args ga_file: input file name 
+    @type ga_file: str 
+    """
+    child_map = defaultdict(list)
+    parent_map = dict()
+
+    ga_handle = utils.open_file(ga_file)
+
+    for rec in ga_handle:
+        rec = rec.strip('\n\r')
+        
+        # skip empty line fasta identifier and commented line
+        if not rec or rec[0] in  ['#', '>']:
+            continue
+        # skip the genome sequence 
+        if not re.search('\t', rec):
+            continue
+
+        parts = rec.split('\t')
+        assert len(parts) >= 8, rec
+
+        # process the attribute column (9th column)
+        ftype, tags = attribute_tags(parts[-1])
+        if not tags: # skip the line if no attribute column.
+	        continue 
+
+        # extract fields  
+        if parts[1]:
+            tags["source"] = parts[1]
+        if parts[7]:
+            tags["phase"] = parts[7]
+
+        gff_info = dict()
+        gff_info['info'] = dict(tags)
+        gff_info["is_gff3"] = ftype
+        gff_info['chr'] = parts[0]
+        gff_info['score'] = parts[5]
+
+        if parts[3] and parts[4]:
+            gff_info['location'] = [int(parts[3]) ,
+                        int(parts[4])]
+            gff_info['type'] = parts[2]
+            gff_info['id'] = tags.get('ID', [''])[0]
+            if parts[6] in ['?', '.']:
+                parts[6] = None 
+            gff_info['strand'] = parts[6]
+
+            # key word according to the GFF spec.
+            # is_gff3 flag is false check this condition and get the attribute fields 
+            if not ftype:
+                gff_info = spec_features_keywd(gff_info)
+            
+            # link the feature relationships
+            if gff_info['info'].has_key('Parent'): 
+                for p in gff_info['info']['Parent']:
+                    if p == gff_info['id']:
+                        gff_info['id'] = ''
+                        break
+                rec_category = 'child'
+            elif gff_info['id']:
+                rec_category = 'parent'
+            else:
+                rec_category = 'record'
+
+            # depends on the record category organize the features
+            if rec_category == 'child':
+                for p in gff_info['info']['Parent']:
+                    # create the data structure based on source and feature id 
+                    child_map[(gff_info['chr'], gff_info['info']['source'], p)].append(
+                                            dict( type = gff_info['type'], 
+                                            location =  gff_info['location'], 
+                                            strand = gff_info['strand'], 
+                                            score = gff_info['score'], 
+                                            ID = gff_info['id'],
+                                            gene_name = gff_info['info'].get('gene_name', [''])[0],
+                                            gene_id = gff_info['info'].get('GParent', [''])[0] 
+                                            ))
+            elif rec_category == 'parent':
+                parent_map[(gff_info['chr'], gff_info['info']['source'], gff_info['id'])] = dict( 
+                                            type = gff_info['type'], 
+                                            location = gff_info['location'],
+                                            strand = gff_info['strand'],
+                                            score = gff_info['score'], 
+                                            name = tags.get('Name', [''])[0])
+            elif rec_category == 'record':
+                #TODO how to handle plain records?
+                considering_soon = 1 
+    ga_handle.close()
+
+    # depends on file type create parent feature  
+    if not ftype:
+        parent_map, child_map = create_missing_feature_type(parent_map, child_map)    
+    
+    # connecting parent child relations  
+    # essentially the parent child features are here from any type of GTF/GFF2/GFF3 file
+    gene_mat = format_gene_models(parent_map, child_map) 
+
+    return gene_mat 
+    
+def format_gene_models(parent_nf_map, child_nf_map): 
+    """
+    Genarate GeneObject based on the parsed file contents
+
+    @args parent_nf_map: parent features with source and chromosome information 
+    @type parent_nf_map: collections defaultdict
+    @args child_nf_map: transctipt and exon information are encoded 
+    @type child_nf_map: collections defaultdict
+    """
+
+    g_cnt = 0 
+    gene = np.zeros((len(parent_nf_map),), dtype = utils.init_gene())
+
+    for pkey, pdet in parent_nf_map.items():
+        # considering only gene features 
+        #if not re.search(r'gene', pdet.get('type', '')):
+        #    continue 
+
+        # infer the gene start and stop if not there in the 
+        if not pdet.get('location', []):
+            GNS, GNE = [], []
+            # multiple number of transcripts 
+            for L1 in child_nf_map[pkey]:
+                GNS.append(L1.get('location', [])[0]) 
+                GNE.append(L1.get('location', [])[1]) 
+            GNS.sort()
+            GNE.sort()
+            pdet['location'] = [GNS[0], GNE[-1]]
+
+        orient = pdet.get('strand', '')
+        gene[g_cnt]['id'] = g_cnt +1 
+        gene[g_cnt]['chr'] = pkey[0]
+        gene[g_cnt]['source'] = pkey[1]
+        gene[g_cnt]['name'] = pkey[-1]
+        gene[g_cnt]['start'] = pdet.get('location', [])[0]
+        gene[g_cnt]['stop'] = pdet.get('location', [])[1]
+        gene[g_cnt]['strand'] = orient  
+        gene[g_cnt]['score'] = pdet.get('score','')
+
+        # default value 
+        gene[g_cnt]['is_alt_spliced'] = gene[g_cnt]['is_alt'] = 0
+        if len(child_nf_map[pkey]) > 1:
+            gene[g_cnt]['is_alt_spliced'] = gene[g_cnt]['is_alt'] = 1
+
+        # complete sub-feature for all transcripts 
+        dim = len(child_nf_map[pkey])
+        TRS = np.zeros((dim,), dtype=np.object)
+        TR_TYP = np.zeros((dim,), dtype=np.object)
+        EXON = np.zeros((dim,), dtype=np.object)
+        UTR5 = np.zeros((dim,), dtype=np.object)
+        UTR3 = np.zeros((dim,), dtype=np.object)
+        CDS = np.zeros((dim,), dtype=np.object)
+        TISc = np.zeros((dim,), dtype=np.object)
+        TSSc = np.zeros((dim,), dtype=np.object)
+        CLV = np.zeros((dim,), dtype=np.object)
+        CSTOP = np.zeros((dim,), dtype=np.object)
+        TSTAT = np.zeros((dim,), dtype=np.object)
+        TSCORE = np.zeros((dim,), dtype=np.object)
+
+        # fetching corresponding transcripts 
+        for xq, Lv1 in enumerate(child_nf_map[pkey]):
+
+            TID = Lv1.get('ID', '')
+            TRS[xq]= np.array([TID])
+
+            TYPE = Lv1.get('type', '')
+            TR_TYP[xq] = np.array('')
+            TR_TYP[xq] = np.array(TYPE) if TYPE else TR_TYP[xq]
+
+            orient = Lv1.get('strand', '')
+            tr_score = Lv1.get('score', '')
+
+            # fetching different sub-features 
+            child_feat = defaultdict(list)
+            for Lv2 in child_nf_map[(pkey[0], pkey[1], TID)]:
+                E_TYP = Lv2.get('type', '')
+                child_feat[E_TYP].append(Lv2.get('location'))
+            
+            # make general ascending order of coordinates 
+            if orient == '-':
+                for etype, excod in child_feat.items():
+                    if len(excod) > 1:
+                        if excod[0][0] > excod[-1][0]:
+                            excod.reverse()
+                            child_feat[etype] = excod
+
+            # make exon coordinate from cds and utr regions 
+            if not child_feat.get('exon'):  
+                if child_feat.get('CDS'):
+                    exon_cod = utils.make_Exon_cod( orient, 
+                                NonetoemptyList(child_feat.get('five_prime_UTR')), 
+                                NonetoemptyList(child_feat.get('CDS')),
+                                NonetoemptyList(child_feat.get('three_prime_UTR')))
+                    child_feat['exon'] = exon_cod 
+                else: 
+                    # TODO only UTR's
+                    # searching through keys to find a pattern describing exon feature 
+                    ex_key_pattern = [k for k in child_feat if k.endswith("exon")]
+                    if ex_key_pattern:
+                        child_feat['exon'] = child_feat[ex_key_pattern[0]]
+
+            # stop_codon are seperated from CDS, add the coordinates based on strand
+            if child_feat.get('stop_codon'):
+                if orient == '+':
+                    if child_feat.get('stop_codon')[0][0] - child_feat.get('CDS')[-1][1] == 1:
+                        child_feat['CDS'][-1] = [child_feat.get('CDS')[-1][0], child_feat.get('stop_codon')[0][1]]
+                    else:
+                        child_feat['CDS'].append(child_feat.get('stop_codon')[0])
+                elif orient == '-':
+                    if child_feat.get('CDS')[0][0] - child_feat.get('stop_codon')[0][1] == 1:
+                        child_feat['CDS'][0] = [child_feat.get('stop_codon')[0][0], child_feat.get('CDS')[0][1]]
+                    else:
+                        child_feat['CDS'].insert(0, child_feat.get('stop_codon')[0])
+
+            # transcript signal sites 
+            TIS, cdsStop, TSS, cleave = [], [], [], []
+            cds_status, exon_status, utr_status = 0, 0, 0
+
+            if child_feat.get('exon'):
+                TSS = [child_feat.get('exon')[-1][1]]
+                TSS = [child_feat.get('exon')[0][0]] if orient == '+' else TSS 
+                cleave = [child_feat.get('exon')[0][0]]
+                cleave = [child_feat.get('exon')[-1][1]] if orient == '+' else cleave
+                exon_status = 1
+
+            if child_feat.get('CDS'):
+                if orient == '+': 
+                    TIS = [child_feat.get('CDS')[0][0]]
+                    cdsStop = [child_feat.get('CDS')[-1][1]-3]
+                else:
+                    TIS = [child_feat.get('CDS')[-1][1]]
+                    cdsStop = [child_feat.get('CDS')[0][0]+3]
+                cds_status = 1 
+                # cds phase calculation 
+                child_feat['CDS'] = utils.add_CDS_phase(orient, child_feat.get('CDS'))
+            
+            # sub-feature status 
+            if child_feat.get('three_prime_UTR') or child_feat.get('five_prime_UTR'):
+                utr_status =1 
+            
+            if utr_status == cds_status == exon_status == 1: 
+                t_status = 1
+            else:
+                t_status = 0
+            
+            # add sub-feature # make array for export to different out
+            TSTAT[xq] = t_status
+            EXON[xq] = np.array(child_feat.get('exon'), np.float64)
+            UTR5[xq] = np.array(NonetoemptyList(child_feat.get('five_prime_UTR')))
+            UTR3[xq] = np.array(NonetoemptyList(child_feat.get('three_prime_UTR')))
+            CDS[xq] = np.array(NonetoemptyList(child_feat.get('CDS')))
+            TISc[xq] = np.array(TIS)
+            CSTOP[xq] = np.array(cdsStop)
+            TSSc[xq] = np.array(TSS)
+            CLV[xq] = np.array(cleave)
+            TSCORE[xq] = tr_score 
+            
+        # add sub-features to the parent gene feature
+        gene[g_cnt]['transcript_status'] = TSTAT
+        gene[g_cnt]['transcripts'] = TRS 
+        gene[g_cnt]['exons'] = EXON
+        gene[g_cnt]['utr5_exons'] = UTR5 
+        gene[g_cnt]['cds_exons'] = CDS 
+        gene[g_cnt]['utr3_exons'] = UTR3 
+        gene[g_cnt]['transcript_type'] = TR_TYP
+        gene[g_cnt]['tis'] = TISc
+        gene[g_cnt]['cdsStop'] = CSTOP
+        gene[g_cnt]['tss'] = TSSc
+        gene[g_cnt]['cleave'] = CLV
+        gene[g_cnt]['transcript_score'] = TSCORE
+
+        gene[g_cnt]['gene_info'] = dict( ID = pkey[-1], 
+                                Name = pdet.get('name'), 
+                                Source = pkey[1]) 
+        # few empty fields // TODO fill this:
+        gene[g_cnt]['anno_id'] = []
+        gene[g_cnt]['confgenes_id'] = []
+        gene[g_cnt]['alias'] = ''
+        gene[g_cnt]['name2'] = []
+        gene[g_cnt]['chr_num'] = []
+        gene[g_cnt]['paralogs'] = []
+        gene[g_cnt]['transcript_valid'] = []
+        gene[g_cnt]['exons_confirmed'] = []
+        gene[g_cnt]['tis_conf'] = []
+        gene[g_cnt]['tis_info'] = []
+        gene[g_cnt]['cdsStop_conf'] = []
+        gene[g_cnt]['cdsStop_info'] = []
+        gene[g_cnt]['tss_info'] = []
+        gene[g_cnt]['tss_conf'] = []
+        gene[g_cnt]['cleave_info'] = []
+        gene[g_cnt]['cleave_conf'] = []
+        gene[g_cnt]['polya_info'] = []
+        gene[g_cnt]['polya_conf'] = []
+        gene[g_cnt]['is_valid'] = []
+        gene[g_cnt]['transcript_complete'] = []
+        gene[g_cnt]['is_complete'] = []
+        gene[g_cnt]['is_correctly_gff3_referenced'] = ''
+        gene[g_cnt]['splicegraph'] = []
+        g_cnt += 1 
+
+    ## deleting empty gene records from the main array
+    XPFLG=0
+    for XP, ens in enumerate(gene):
+        if ens[0]==0:
+            XPFLG=1
+            break
+    
+    if XPFLG==1:
+        XQC = range(XP, len(gene)+1)
+        gene = np.delete(gene, XQC)
+
+    return gene 
+
+def NonetoemptyList(XS):
+    """
+    Convert a None type to empty list 
+
+    @args XS: None type 
+    @type XS: str 
+    """
+    return [] if XS is None else XS 
+
+def create_missing_feature_type(p_feat, c_feat):
+    """
+    GFF/GTF file defines only child features. This function tries to create 
+    the parent feature from the information provided in the attribute column. 
+
+    example: 
+    chr21   hg19_knownGene  exon    9690071 9690100 0.000000        +       .       gene_id "uc002zkg.1"; transcript_id "uc002zkg.1"; 
+    chr21   hg19_knownGene  exon    9692178 9692207 0.000000        +       .       gene_id "uc021wgt.1"; transcript_id "uc021wgt.1"; 
+    chr21   hg19_knownGene  exon    9711935 9712038 0.000000        +       .       gene_id "uc011abu.2"; transcript_id "uc011abu.2"; 
+
+    This function gets the parsed feature annotations. 
+    
+    @args p_feat: Parent feature map  
+    @type p_feat: collections defaultdict
+    @args c_feat: Child feature map  
+    @type c_feat: collections defaultdict
+    """
+
+    child_n_map = defaultdict(list)
+    for fid, det in c_feat.items():
+        # get the details from grand child  
+        GID = STRD = SCR = None
+        SPOS, EPOS = [], [] 
+        TYP = dict()
+        for gchild in det:
+            GID = gchild.get('gene_id', '') 
+            GNAME = gchild.get('gene_name', '') 
+            SPOS.append(gchild.get('location', [])[0]) 
+            EPOS.append(gchild.get('location', [])[1]) 
+            STRD = gchild.get('strand', '')
+            SCR = gchild.get('score', '')
+            if gchild.get('type', '') == "gene": ## gencode GTF file has this problem 
+                continue
+            TYP[gchild.get('type', '')] = 1
+        SPOS.sort() 
+        EPOS.sort()
+        
+        # infer transcript type
+        transcript_type = 'transcript'
+        transcript_type = 'mRNA' if TYP.get('CDS', '') or TYP.get('cds', '') else transcript_type
+        
+        # gene id and transcript id are same
+        transcript_id = fid[-1]
+        if GID == transcript_id:
+            transcript_id = 'Transcript:' + str(GID)
+        
+        # level -1 feature type 
+        p_feat[(fid[0], fid[1], GID)] = dict( type = 'gene',
+                                            location = [], ## infer location based on multiple transcripts  
+                                            strand = STRD,
+                                            name = GNAME )
+        # level -2 feature type 
+        child_n_map[(fid[0], fid[1], GID)].append(
+                                            dict( type = transcript_type,
+                                            location =  [SPOS[0], EPOS[-1]], 
+                                            strand = STRD, 
+                                            score = SCR, 
+                                            ID = transcript_id,
+                                            gene_id = '' ))
+        # reorganizing the grand child
+        for gchild in det:
+            child_n_map[(fid[0], fid[1], transcript_id)].append(
+                                            dict( type = gchild.get('type', ''),
+                                            location =  gchild.get('location'),
+                                            strand = gchild.get('strand'), 
+                                            ID = gchild.get('ID'),
+                                            score = gchild.get('score'),
+                                            gene_id = '' ))
+    return p_feat, child_n_map 
+
diff --git a/bed_to_gff.py b/bed_to_gff.py
index 461ae72..4c582d8 100644
--- a/bed_to_gff.py
+++ b/bed_to_gff.py
@@ -25,7 +25,7 @@ def __main__():
     try:
         bed_fname = sys.argv[1]
     except:
-        print __doc__
+        print(__doc__)
         sys.exit(-1)
 
     bed_fh = helper.open_file(bed_fname)
diff --git a/bed_to_gff.py.bak b/bed_to_gff.py.bak
new file mode 100644
index 0000000..461ae72
--- /dev/null
+++ b/bed_to_gff.py.bak
@@ -0,0 +1,71 @@
+#!/usr/bin/env python
+"""
+Convert genome annotation data in a 12 column BED format to GFF3. 
+
+Usage: 
+    python bed_to_gff.py in.bed > out.gff
+
+Requirement:
+    helper.py : https://github.com/vipints/GFFtools-GX/blob/master/helper.py
+
+Copyright (C) 
+    2009-2012 Friedrich Miescher Laboratory of the Max Planck Society, Tubingen, Germany.
+    2012-2015 Memorial Sloan Kettering Cancer Center New York City, USA.
+"""
+
+import re
+import sys
+import helper 
+
+def __main__():
+    """
+    main function 
+    """
+
+    try:
+        bed_fname = sys.argv[1]
+    except:
+        print __doc__
+        sys.exit(-1)
+
+    bed_fh = helper.open_file(bed_fname)
+
+    for line in bed_fh: 
+        line = line.strip( '\n\r' )
+
+        if not line or line[0] in  ['#']:
+            continue 
+
+        parts = line.split('\t') 
+        assert len(parts) >= 12, line
+
+        rstarts = parts[-1].split(',')
+        rstarts.pop() if rstarts[-1] == '' else rstarts
+
+        exon_lens = parts[-2].split(',')
+        exon_lens.pop() if exon_lens[-1] == '' else exon_lens
+        
+        if len(rstarts) != len(exon_lens):
+            continue # checking the consistency col 11 and col 12 
+
+        if len(rstarts) != int(parts[-3]): 
+            continue # checking the number of exons and block count are same
+        
+        if not parts[5] in ['+', '-']:
+            parts[5] = '.' # replace the unknown strand with '.' 
+
+        # bed2gff result line 
+        sys.stdout.write('%s\tbed2gff\tgene\t%d\t%s\t%s\t%s\t.\tID=Gene:%s;Name=Gene:%s\n' % (parts[0], int(parts[1])+1, parts[2], parts[4], parts[5], parts[3], parts[3]))
+        sys.stdout.write('%s\tbed2gff\ttranscript\t%d\t%s\t%s\t%s\t.\tID=%s;Name=%s;Parent=Gene:%s\n' % (parts[0], int(parts[1])+1, parts[2], parts[4], parts[5], parts[3], parts[3], parts[3]))
+
+        st = int(parts[1])
+        for ex_cnt in range(int(parts[-3])):
+            start = st + int(rstarts[ex_cnt]) + 1
+            stop = start + int(exon_lens[ex_cnt]) - 1
+            sys.stdout.write('%s\tbed2gff\texon\t%d\t%d\t%s\t%s\t.\tParent=%s\n' % (parts[0], start, stop, parts[4], parts[5], parts[3]))
+
+    bed_fh.close()
+
+
+if __name__ == "__main__": 
+    __main__()
diff --git a/gbk_to_gff.py b/gbk_to_gff.py
index 5220676..067e2bc 100644
--- a/gbk_to_gff.py
+++ b/gbk_to_gff.py
@@ -25,7 +25,7 @@ def feature_table(chr_id, source, orient, genes, transcripts, cds, exons, unk):
     """
     Write the feature information
     """
-    for gname, ginfo in genes.items():
+    for gname, ginfo in list(genes.items()):
         line = [str(chr_id), 
                 'gbk2gff',
                 ginfo[3],
@@ -84,11 +84,11 @@ def feature_table(chr_id, source, orient, genes, transcripts, cds, exons, unk):
         line = [str(chr_id), 'gbk2gff', source, 0, 1, '.', orient, '.'] 
         fStart = fStop = None 
 
-        for eid, ex in cds.items(): 
+        for eid, ex in list(cds.items()): 
             fStart = ex[0][0] 
             fStop = ex[0][-1]
 
-        for eid, ex in exons.items(): 
+        for eid, ex in list(exons.items()): 
             fStart = ex[0][0] 
             fStop = ex[0][-1]
 
@@ -209,7 +209,7 @@ def gbk_parse(fname):
     try:
         gbkfname = sys.argv[1]
     except:
-        print __doc__
+        print(__doc__)
         sys.exit(-1)
 
     ## extract gbk records  
diff --git a/gbk_to_gff.py.bak b/gbk_to_gff.py.bak
new file mode 100644
index 0000000..5220676
--- /dev/null
+++ b/gbk_to_gff.py.bak
@@ -0,0 +1,216 @@
+#!/usr/bin/env python
+"""
+Convert data from Genbank format to GFF. 
+
+Usage: 
+python gbk_to_gff.py in.gbk > out.gff 
+
+Requirements:
+    BioPython:- http://biopython.org/
+    helper.py:- https://github.com/vipints/GFFtools-GX/blob/master/helper.py
+
+Copyright (C) 
+    2009-2012 Friedrich Miescher Laboratory of the Max Planck Society, Tubingen, Germany.
+    2012-2015 Memorial Sloan Kettering Cancer Center New York City, USA.
+"""
+
+import os
+import re
+import sys
+import helper 
+import collections
+from Bio import SeqIO
+
+def feature_table(chr_id, source, orient, genes, transcripts, cds, exons, unk):
+    """
+    Write the feature information
+    """
+    for gname, ginfo in genes.items():
+        line = [str(chr_id), 
+                'gbk2gff',
+                ginfo[3],
+                str(ginfo[0]),
+                str(ginfo[1]),
+                '.',
+                ginfo[2],
+                '.',
+                'ID=%s;Name=%s' % (str(gname), str(gname))]
+        sys.stdout.write('\t'.join(line)+"\n") 
+        ## construct the transcript line is not defined in the original file 
+        t_line = [str(chr_id), 'gbk2gff', source, '0', '1', '.', ginfo[2], '.'] 
+
+        if not transcripts:
+            t_line.append('ID=Transcript:%s;Parent=%s' % (str(gname), str(gname)))
+
+            if exons: ## get the entire transcript region  from the defined feature
+                t_line[3] = str(exons[gname][0][0])
+                t_line[4] = str(exons[gname][0][-1])
+            elif cds:
+                t_line[3] = str(cds[gname][0][0])
+                t_line[4] = str(cds[gname][0][-1])
+
+            if not cds:
+                t_line[2] = 'transcript'
+            else:
+                t_line[2] = 'mRNA'
+            sys.stdout.write('\t'.join(t_line)+"\n") 
+
+            if exons:
+                exon_line_print(t_line, exons[gname], 'Transcript:'+str(gname), 'exon')
+
+            if cds:
+                exon_line_print(t_line, cds[gname], 'Transcript:'+str(gname), 'CDS')
+                if not exons:
+                    exon_line_print(t_line, cds[gname], 'Transcript:'+str(gname), 'exon')
+
+        else: ## transcript is defined 
+            for idx in transcripts[gname]: 
+                t_line[2] = idx[3]
+                t_line[3] = str(idx[0])
+                t_line[4] = str(idx[1])
+                t_line.append('ID='+str(idx[2])+';Parent='+str(gname))
+                sys.stdout.write('\t'.join(t_line)+"\n") 
+                
+                ## feature line print call 
+                if exons:
+                    exon_line_print(t_line, exons[gname], str(idx[2]), 'exon')
+                if cds:
+                    exon_line_print(t_line, cds[gname], str(idx[2]), 'CDS')
+                    if not exons:
+                        exon_line_print(t_line, cds[gname], str(idx[2]), 'exon')
+
+    if len(genes) == 0: ## feature entry with fragment information 
+        
+        line = [str(chr_id), 'gbk2gff', source, 0, 1, '.', orient, '.'] 
+        fStart = fStop = None 
+
+        for eid, ex in cds.items(): 
+            fStart = ex[0][0] 
+            fStop = ex[0][-1]
+
+        for eid, ex in exons.items(): 
+            fStart = ex[0][0] 
+            fStop = ex[0][-1]
+
+        if fStart or fStart:
+
+            line[2] = 'gene'
+            line[3] = str(fStart)
+            line[4] = str(fStop)
+            line.append('ID=Unknown_Gene_' + str(unk) + ';Name=Unknown_Gene_' + str(unk))
+            sys.stdout.write('\t'.join(line)+"\n") 
+
+            if not cds:
+                line[2] = 'transcript'
+            else:
+                line[2] = 'mRNA'
+
+            line[8] = 'ID=Unknown_Transcript_' + str(unk) + ';Parent=Unknown_Gene_' + str(unk)
+            sys.stdout.write('\t'.join(line)+"\n") 
+           
+            if exons:
+                exon_line_print(line, cds[None], 'Unknown_Transcript_' + str(unk), 'exon')
+                
+            if cds:
+                exon_line_print(line, cds[None], 'Unknown_Transcript_' + str(unk), 'CDS')
+                if not exons:
+                    exon_line_print(line, cds[None], 'Unknown_Transcript_' + str(unk), 'exon')
+                
+            unk +=1 
+
+    return unk
+
+
+def exon_line_print(temp_line, trx_exons, parent, ftype):
+    """
+    Print the EXON feature line 
+    """
+    for ex in trx_exons:
+        temp_line[2] = ftype
+        temp_line[3] = str(ex[0])
+        temp_line[4] = str(ex[1])
+        temp_line[8] = 'Parent=%s' % parent
+        sys.stdout.write('\t'.join(temp_line)+"\n") 
+
+
+def gbk_parse(fname):
+    """
+    Extract genome annotation recods from genbank format 
+
+    @args fname: gbk file name 
+    @type fname: str
+    """
+    fhand = helper.open_file(gbkfname)
+    unk = 1 
+
+    for record in SeqIO.parse(fhand, "genbank"):
+        gene_tags = dict()
+        tx_tags = collections.defaultdict(list) 
+        exon = collections.defaultdict(list) 
+        cds = collections.defaultdict(list) 
+        mol_type, chr_id = None, None 
+
+        for rec in record.features:
+
+            if rec.type == 'source':
+                try:
+                    mol_type = rec.qualifiers['mol_type'][0]
+                except:
+                    mol_type = '.'
+                    pass 
+                try:
+                    chr_id = rec.qualifiers['chromosome'][0]
+                except:
+                    chr_id = record.name 
+                continue 
+
+            strand='-'
+            strand='+' if rec.strand>0 else strand
+            
+            fid = None 
+            try:
+                fid = rec.qualifiers['gene'][0]
+            except:
+                pass
+
+            transcript_id = None
+            try:
+                transcript_id = rec.qualifiers['transcript_id'][0]
+            except:
+                pass 
+
+            if re.search(r'gene', rec.type):
+                gene_tags[fid] = (rec.location._start.position+1, 
+                                    rec.location._end.position, 
+                                    strand,
+                                    rec.type
+                                    )
+            elif rec.type == 'exon':
+                exon[fid].append((rec.location._start.position+1, 
+                                    rec.location._end.position))
+            elif rec.type=='CDS':
+                cds[fid].append((rec.location._start.position+1, 
+                                    rec.location._end.position))
+            else: 
+                # get all transcripts 
+                if transcript_id: 
+                    tx_tags[fid].append((rec.location._start.position+1,
+                                    rec.location._end.position, 
+                                    transcript_id,
+                                    rec.type))
+        # record extracted, generate feature table
+        unk = feature_table(chr_id, mol_type, strand, gene_tags, tx_tags, cds, exon, unk)
+        
+    fhand.close()
+
+
+if __name__=='__main__': 
+
+    try:
+        gbkfname = sys.argv[1]
+    except:
+        print __doc__
+        sys.exit(-1)
+
+    ## extract gbk records  
+    gbk_parse(gbkfname) 
diff --git a/gff_to_bed.py b/gff_to_bed.py
index b5cc912..bdc24bc 100644
--- a/gff_to_bed.py
+++ b/gff_to_bed.py
@@ -25,9 +25,9 @@ def limitBEDWrite(tinfo):
     @type tinfo: numpy object  
     """
 
-    for contig_id, feature in tinfo.items():
+    for contig_id, feature in list(tinfo.items()):
         uns_line = dict()
-        for tid, tloc in feature.items():
+        for tid, tloc in list(feature.items()):
             uns_line[(int(tloc[0])-1, int(tloc[1]))]=1
         for ele in sorted(uns_line):
             pline = [contig_id,
@@ -106,7 +106,7 @@ def __main__():
     try:
         query_file = sys.argv[1]
     except:
-        print __doc__
+        print(__doc__)
         sys.exit(-1)
 
     Transcriptdb = GFFParser.Parse(query_file)  
diff --git a/gff_to_bed.py.bak b/gff_to_bed.py.bak
new file mode 100644
index 0000000..b5cc912
--- /dev/null
+++ b/gff_to_bed.py.bak
@@ -0,0 +1,116 @@
+#!/usr/bin/env python
+"""
+Convert genome annotation data in GFF/GTF to a 12 column BED format. 
+BED format typically represents the transcript models. 
+
+Usage: python gff_to_bed.py in.gff > out.bed  
+
+Requirement:
+    GFFParser.py: https://github.com/vipints/GFFtools-GX/blob/master/GFFParser.py    
+
+Copyright (C) 
+    2009-2012 Friedrich Miescher Laboratory of the Max Planck Society, Tubingen, Germany.
+    2012-2015 Memorial Sloan Kettering Cancer Center New York City, USA.
+"""
+
+import re
+import sys
+import GFFParser
+
+def limitBEDWrite(tinfo):
+    """
+    Write a three column BED file 
+    
+    @args tinfo: list of genes 
+    @type tinfo: numpy object  
+    """
+
+    for contig_id, feature in tinfo.items():
+        uns_line = dict()
+        for tid, tloc in feature.items():
+            uns_line[(int(tloc[0])-1, int(tloc[1]))]=1
+        for ele in sorted(uns_line):
+            pline = [contig_id,
+                    str(ele[0]-1),
+                    str(ele[1])]
+
+            sys.stdout.write('\t'.join(pline)+"\n")
+
+
+def writeBED(tinfo):
+    """
+    writing result files in bed format 
+
+    @args tinfo: list of genes 
+    @type tinfo: numpy object  
+    """
+
+    for ent1 in tinfo:
+        child_flag = False  
+
+        for idx, tid in enumerate(ent1['transcripts']):
+            child_flag = True 
+            exon_cnt = len(ent1['exons'][idx])
+            exon_len = ''
+            exon_cod = '' 
+            rel_start = None 
+            rel_stop = None 
+            for idz, ex_cod in enumerate(ent1['exons'][idx]):#check for exons of corresponding transcript  
+                exon_len += '%d,' % (ex_cod[1]-ex_cod[0]+1)
+                if idz == 0: #calculate the relative start position 
+                    exon_cod += '0,'
+                    rel_start = int(ex_cod[0])-1 
+                    rel_stop = int(ex_cod[1])
+                else:
+                    exon_cod += '%d,' % (ex_cod[0]-1-rel_start) ## shifting the coordinates to zero 
+                    rel_stop = int(ex_cod[1])
+            
+            if exon_len:
+                score = 0 
+                score = ent1['transcript_score'][idx] if ent1['transcript_score'].any() else score ## getting the transcript score 
+                out_print = [ent1['chr'],
+                            str(rel_start),
+                            str(rel_stop),
+                            tid[0],
+                            str(score), 
+                            ent1['strand'], 
+                            str(rel_start),
+                            str(rel_stop),
+                            '0',
+                            str(exon_cnt),
+                            exon_len,
+                            exon_cod]
+                sys.stdout.write('\t'.join(out_print)+"\n")
+        
+        if not child_flag: # file just contains only a single parent type i.e, gff3 defines only one feature type 
+            score = 0 
+            score = ent1['transcript_score'][0] if ent1['transcript_score'].any() else score
+
+            out_print = [ent1['chr'], 
+                        '%d' % (int(ent1['start'])-1), 
+                        '%d' % int(ent1['stop']),
+                        ent1['name'], 
+                        str(score), 
+                        ent1['strand'],
+                        '%d' % int(ent1['start']), 
+                        '%d' % int(ent1['stop']),
+                        '0',
+                        '1',
+                        '%d,' % (int(ent1['stop'])-int(ent1['start'])+1), 
+                        '0,']
+
+            sys.stdout.write('\t'.join(out_print)+"\n")
+
+    
+def __main__():
+    try:
+        query_file = sys.argv[1]
+    except:
+        print __doc__
+        sys.exit(-1)
+
+    Transcriptdb = GFFParser.Parse(query_file)  
+    writeBED(Transcriptdb)
+
+if __name__ == "__main__": 
+    __main__() 
diff --git a/gff_to_gtf.py b/gff_to_gtf.py
index d1423c8..1ad57ac 100644
--- a/gff_to_gtf.py
+++ b/gff_to_gtf.py
@@ -71,7 +71,7 @@ def printGTF(tinfo):
     try:
         gff_fname = sys.argv[1]
     except:
-        print __doc__
+        print(__doc__)
         sys.exit(-1)
 
     Transcriptdb = GFFParser.Parse(gff_fname)  
diff --git a/gff_to_gtf.py.bak b/gff_to_gtf.py.bak
new file mode 100644
index 0000000..d1423c8
--- /dev/null
+++ b/gff_to_gtf.py.bak
@@ -0,0 +1,79 @@
+#!/usr/bin/env python 
+"""
+Program to convert data from GFF to GTF 
+
+Usage: python gff_to_gtf.py in.gff > out.gtf 
+
+Requirement:
+    GFFParser.py: https://github.com/vipints/GFFtools-GX/blob/master/GFFParser.py    
+
+Copyright (C) 
+    2009-2012 Friedrich Miescher Laboratory of the Max Planck Society, Tubingen, Germany.
+    2012-2015 Memorial Sloan Kettering Cancer Center New York City, USA.
+"""
+
+import re
+import sys
+import numpy
+import GFFParser
+
+def printGTF(tinfo):
+    """
+    writing result file in GTF format
+
+    @args tinfo: parsed object from gff file
+    @type tinfo: numpy array 
+    """
+
+    for ent1 in tinfo:
+        for idx, tid in enumerate(ent1['transcripts']):
+            
+            exons = ent1['exons'][idx]
+            if numpy.isnan(numpy.min(exons)):
+                continue 
+            cds_exons = ent1['cds_exons'][idx]
+
+            stop_codon = start_codon = ()
+
+            if ent1['strand'] == '+':
+                if cds_exons.any():
+                    start_codon = (cds_exons[0][0], cds_exons[0][0]+2) 
+                    stop_codon = (cds_exons[-1][1]-2, cds_exons[-1][1]) 
+            elif ent1['strand'] == '-':
+                if cds_exons.any():
+                    start_codon = (cds_exons[-1][1]-2, cds_exons[-1][1])
+                    stop_codon = (cds_exons[0][0], cds_exons[0][0]+2)
+            else:
+                sys.stdout.write('STRAND information missing - %s, skip the transcript - %s\n' % (ent1['strand'], tid[0]))
+                pass 
+                
+            last_cds_cod = 0 
+            for idz, ex_cod in enumerate(exons):
+
+                sys.stdout.write('%s\t%s\texon\t%d\t%d\t.\t%s\t.\tgene_id "%s"; transcript_id "%s"; exon_number "%d"; gene_name "%s"; \n' % (ent1['chr'], ent1['source'], ex_cod[0], ex_cod[1], ent1['strand'], ent1['name'], tid[0], idz+1, ent1['gene_info']['Name']))
+
+                if cds_exons.any():
+                    try:
+                        sys.stdout.write('%s\t%s\tCDS\t%d\t%d\t.\t%s\t%d\tgene_id "%s"; transcript_id "%s"; exon_number "%d"; gene_name "%s"; \n' % (ent1['chr'], ent1['source'], cds_exons[idz][0], cds_exons[idz][1], ent1['strand'], cds_exons[idz][2], ent1['name'], tid[0], idz+1, ent1['gene_info']['Name']))
+                        last_cds_cod = idz 
+                    except:
+                        pass 
+
+                    if idz == 0:
+                        sys.stdout.write('%s\t%s\tstart_codon\t%d\t%d\t.\t%s\t%d\tgene_id "%s"; transcript_id "%s"; exon_number "%d"; gene_name "%s"; \n' % (ent1['chr'], ent1['source'], start_codon[0], start_codon[1], ent1['strand'], cds_exons[idz][2], ent1['name'], tid[0], idz+1, ent1['gene_info']['Name']))
+
+            if stop_codon:
+                sys.stdout.write('%s\t%s\tstop_codon\t%d\t%d\t.\t%s\t%d\tgene_id "%s"; transcript_id "%s"; exon_number "%d"; gene_name "%s"; \n' % (ent1['chr'], ent1['source'], stop_codon[0], stop_codon[1], ent1['strand'], cds_exons[last_cds_cod][2], ent1['name'], tid[0], idz+1, ent1['gene_info']['Name']))
+
+    
+if __name__ == "__main__": 
+
+    try:
+        gff_fname = sys.argv[1]
+    except:
+        print __doc__
+        sys.exit(-1)
+
+    Transcriptdb = GFFParser.Parse(gff_fname)  
+
+    printGTF(Transcriptdb) 
diff --git a/gtf_to_gff.py b/gtf_to_gff.py
index 6d78669..5e6c7e8 100644
--- a/gtf_to_gff.py
+++ b/gtf_to_gff.py
@@ -67,7 +67,7 @@ def __main__():
     try:
         gtf_fname = sys.argv[1]
     except:
-        print __doc__
+        print(__doc__)
         sys.exit(-1)
 
     gtf_file_content = GFFParser.Parse(gtf_fname)  
diff --git a/gtf_to_gff.py.bak b/gtf_to_gff.py.bak
new file mode 100644
index 0000000..6d78669
--- /dev/null
+++ b/gtf_to_gff.py.bak
@@ -0,0 +1,78 @@
+#!/usr/bin/env python
+"""
+Convert Gene Transfer Format [GTF] to Generic Feature Format Version 3 [GFF3].
+
+Usage: python gtf_to_gff.py in.gtf > out.gff3  
+    
+Requirement:
+    GFFParser.py: https://github.com/vipints/GFFtools-GX/blob/master/GFFParser.py    
+    helper.py: https://github.com/vipints/GFFtools-GX/blob/master/helper.py
+    
+Copyright (C) 
+    2009-2012 Friedrich Miescher Laboratory of the Max Planck Society, Tubingen, Germany.
+    2012-2015 Memorial Sloan Kettering Cancer Center New York City, USA.
+"""
+
+import re
+import sys
+import helper
+import GFFParser
+
+def GFFWriter(gtf_content):
+    """
+    write the feature information to GFF format
+
+    @args gtf_content: Parsed object from gtf file 
+    @type gtf_content: numpy array
+    """
+
+    sys.stdout.write('##gff-version 3\n')
+    for ent1 in gtf_content:
+        chr_name = ent1['chr']
+        strand = ent1['strand']
+        start = ent1['start']
+        stop = ent1['stop']
+        source = ent1['source']
+        ID = ent1['name']
+        Name = ent1['gene_info']['Name']
+        Name = ID if not Name else Name 
+
+        sys.stdout.write('%s\t%s\tgene\t%d\t%d\t.\t%s\t.\tID=%s;Name=%s\n' % (chr_name, source, start, stop, strand, ID, Name))
+        for idx, tid in enumerate(ent1['transcripts']):
+
+            t_start = ent1['exons'][idx][0][0]
+            t_stop = ent1['exons'][idx][-1][-1]
+            t_type = ent1['transcript_type'][idx]
+
+            utr5_exons, utr3_exons = [], [] 
+            if ent1['exons'][idx].any() and ent1['cds_exons'][idx].any():
+                utr5_exons, utr3_exons = helper.buildUTR(ent1['cds_exons'][idx], ent1['exons'][idx], strand)
+
+            sys.stdout.write('%s\t%s\t%s\t%d\t%d\t.\t%s\t.\tID=%s;Parent=%s\n' % (chr_name, source, t_type, t_start, t_stop, strand, tid[0], ID))
+            for ex_cod in utr5_exons:
+                sys.stdout.write('%s\t%s\tfive_prime_UTR\t%d\t%d\t.\t%s\t.\tParent=%s\n' % (chr_name, source, ex_cod[0], ex_cod[1], strand, tid[0]))
+
+            for ex_cod in ent1['cds_exons'][idx]:
+                sys.stdout.write('%s\t%s\tCDS\t%d\t%d\t.\t%s\t%d\tParent=%s\n' % (chr_name, source, ex_cod[0], ex_cod[1], strand, ex_cod[2], tid[0]))
+
+            for ex_cod in utr3_exons:
+                sys.stdout.write('%s\t%s\tthree_prime_UTR\t%d\t%d\t.\t%s\t.\tParent=%s\n' % (chr_name, source, ex_cod[0], ex_cod[1], strand, tid[0]))
+
+            for ex_cod in ent1['exons'][idx]:
+                sys.stdout.write('%s\t%s\texon\t%d\t%d\t.\t%s\t.\tParent=%s\n' % (chr_name, source, ex_cod[0], ex_cod[1], strand, tid[0]))
+            
+
+def __main__():
+
+    try:
+        gtf_fname = sys.argv[1]
+    except:
+        print __doc__
+        sys.exit(-1)
+
+    gtf_file_content = GFFParser.Parse(gtf_fname)  
+
+    GFFWriter(gtf_file_content)
+
+if __name__ == "__main__": 
+    __main__()