From edfc8c62745c5dc4570d8abf03b9b75f83fa1c35 Mon Sep 17 00:00:00 2001
From: Manu-chroma <manvendra0310@gmail.com>
Date: Sun, 2 Apr 2017 17:30:49 +0530
Subject: [PATCH 1/3] working towards py3 compatibility - fixed imports - print
 statements

---
 .travis.yml                       |   4 +-
 setup.cfg                         |   2 +-
 test/test_chat.py                 |   3 +-
 wp_parser/ChatFeatures.py         | 169 +++++++++++++++---------------
 wp_parser/datelib.py              |  15 +--
 wp_parser/parsers/facebook.py     |   9 +-
 wp_parser/parsers/facebook.py.bak |  29 +++++
 wp_parser/parsers/message.py      |  15 ++-
 wp_parser/parsers/whatsapp.py     |   8 +-
 wp_parser/parsers/whatsapp.py.bak |  49 +++++++++
 wp_parser/wp_chat.py              | 127 +++++++++++-----------
 11 files changed, 257 insertions(+), 173 deletions(-)
 create mode 100644 wp_parser/parsers/facebook.py.bak
 create mode 100644 wp_parser/parsers/whatsapp.py.bak

diff --git a/.travis.yml b/.travis.yml
index b0c75b7..5738124 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,8 +1,8 @@
 language: python
 python:
   - "2.7"
-#  - "3.4"
-#  - "3.5"
+  - "3.4"
+  - "3.5"
 #  - "3.6"
 #install:
 #   - pip install .
diff --git a/setup.cfg b/setup.cfg
index 4364fa9..5490153 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,2 +1,2 @@
 [tool:pytest]
-norecursedirs= venv
+norecursedirs = venv
diff --git a/test/test_chat.py b/test/test_chat.py
index 8e5ce24..7f5221d 100644
--- a/test/test_chat.py
+++ b/test/test_chat.py
@@ -1,10 +1,11 @@
 import os
 
 class TestChat:
+
     def test_chat(self, tmpdir):
         out_filename = str(tmpdir.join("abc"))
         for case in ['One', 'Two']:
-            cmd = 'python wp_parser/wp_chat.py -f test/testChat2.txt -n Username{} > {}'.format(case, out_filename)
+            cmd = 'python -p wp_parser/wp_chat.py -f test/testChat2.txt -n Username{} > {}'.format(case, out_filename)
             os.system(cmd)
             with open(out_filename) as fh:
                 result = fh.read()
diff --git a/wp_parser/ChatFeatures.py b/wp_parser/ChatFeatures.py
index 1e754b4..ef0acb3 100644
--- a/wp_parser/ChatFeatures.py
+++ b/wp_parser/ChatFeatures.py
@@ -1,24 +1,28 @@
 # -*- coding: utf-8 -*-
 from __future__ import division
-import datelib
-import re
+from __future__ import absolute_import
+
 import operator
 
-class ChatFeatures():
+import re
+import wp_parser.datelib as datelib
+
 
+class ChatFeatures:
     def __init__(self):
-        self.root_response_time    = []
+        self.root_response_time = []
         self.contact_response_time = []
-        self.root_burst            = []
-        self.contact_burst         = []
-        self.initiations           = {}
-        self.weekday               = {}
-        self.shifts                = {}
-        self.patterns              = {}
-        self.proportions           = {}
-        self.most_used_words       = {}
-
-    def compute_response_time_and_burst(self, list_of_messages, root_name, senders, initiation_thrs=(60*60*8), burst_thrs=3, response_thrs=(60*60*3)):
+        self.root_burst = []
+        self.contact_burst = []
+        self.initiations = {}
+        self.weekday = {}
+        self.shifts = {}
+        self.patterns = {}
+        self.proportions = {}
+        self.most_used_words = {}
+
+    def compute_response_time_and_burst(self, list_of_messages, root_name, senders, initiation_thrs=(60 * 60 * 8),
+                                        burst_thrs=3, response_thrs=(60 * 60 * 3)):
         # perform the operations that are dependant on multiple messages
         # (response time, bursts)
         self.initiations = {}
@@ -27,7 +31,7 @@ def compute_response_time_and_burst(self, list_of_messages, root_name, senders,
         t0 = list_of_messages[0].datetime_obj
         burst_count = 1
         for index, message in enumerate(list_of_messages):
-            #skip the first message since we are looking at differences; note this means we don't count first msg as init
+            # skip the first message since we are looking at differences; note this means we don't count first msg as init
             if index == 0:
                 continue
             t1 = message.datetime_obj
@@ -35,22 +39,22 @@ def compute_response_time_and_burst(self, list_of_messages, root_name, senders,
             dt.total_seconds()
 
             # print "sender %s delta %s" % ( message.sender, dt.total_seconds() )
-            if (dt.total_seconds() > initiation_thrs):
+            if dt.total_seconds() > initiation_thrs:
                 self.initiations[message.sender] += 1
 
             # is sender the same as the last message?
-            if message.sender != list_of_messages[index-1].sender:
+            if message.sender != list_of_messages[index - 1].sender:
                 # sender changed, store the burst count and reset
-                #print "sender changed: %s" % ( message.sender )
-                #print "burst count: %s" % ( burst_count )
+                # print "sender changed: %s" % ( message.sender )
+                # print "burst count: %s" % ( burst_count )
 
-                #print("response time: %d\n" %(dt.total_seconds()) )
+                # print("response time: %d\n" %(dt.total_seconds()) )
                 # is sender the root?
                 if message.sender == root_name:
                     # store the burst count for the last sender, which is the
                     # opposite of current
                     if burst_count > burst_thrs:
-                        #print "BURST CONTACT ENDED: %s IN A ROW" % ( burst_count )
+                        # print "BURST CONTACT ENDED: %s IN A ROW" % ( burst_count )
                         self.contact_burst.append(burst_count)
                     if dt.total_seconds() < response_thrs:
                         self.root_response_time.append(dt.total_seconds())
@@ -59,11 +63,11 @@ def compute_response_time_and_burst(self, list_of_messages, root_name, senders,
                     # store the burst count for the last sender, which is the
                     # opposite of current
                     if burst_count > burst_thrs:
-                        #print "BURST ROOT ENDED: %s IN A ROW" % ( burst_count )
+                        # print "BURST ROOT ENDED: %s IN A ROW" % ( burst_count )
                         self.root_burst.append(burst_count)
                     if dt.total_seconds() < response_thrs:
                         self.contact_response_time.append(dt.total_seconds())
-                
+
                 # End of the first burst, restart the counter
                 burst_count = 1
 
@@ -71,12 +75,12 @@ def compute_response_time_and_burst(self, list_of_messages, root_name, senders,
                 # accumulate the number of messages sent in a row
                 burst_count += 1
             t0 = t1
-        if burst_count > burst_thrs: #catch a burst if at end of chat
-            #print "final burst: %s" % ( burst_count )
-            if  message.sender == root_name:
+        if burst_count > burst_thrs:  # catch a burst if at end of chat
+            # print "final burst: %s" % ( burst_count )
+            if message.sender == root_name:
                 self.root_burst.append(burst_count)
             else:
-                self.contact_burst.append(burst_count)                
+                self.contact_burst.append(burst_count)
 
     def compute_messages_per_weekday(self, list_of_messages):
         self.weekday = {
@@ -105,16 +109,16 @@ def compute_messages_per_shift(self, list_of_messages):
         }
         for msg in list_of_messages:
             hour = int(msg.time.split(":")[0])
-            if hour >= 0 and hour <= 6:
+            if 0 <= hour <= 6:
                 self.shifts["latenight"] += 1
 
-            elif hour > 6 and hour <= 11:
+            elif 6 < hour <= 11:
                 self.shifts["morning"] += 1
 
-            elif hour > 11 and hour <= 17:
+            elif 11 < hour <= 17:
                 self.shifts["afternoon"] += 1
 
-            elif hour > 17 and hour <= 23:
+            elif 17 < hour <= 23:
                 self.shifts["evening"] += 1
         return self.shifts
 
@@ -134,7 +138,7 @@ def compute_messages_pattern(self, list_of_messages, senders, pattern_list):
                 if length > 0:
                     if pattern not in self.patterns:
                         self.patterns[pattern][msg.sender] = length
-                        print "This should never happen"
+                        print("This should never happen")
                     else:
                         self.patterns[pattern][msg.sender] += length
         return self.patterns
@@ -149,10 +153,10 @@ def compute_message_proportions(self, list_of_messages, senders, root, contact):
                 self.proportions[i][s] = 0
         for msg in list_of_messages:
             self.proportions["messages"][msg.sender] += 1
-            self.proportions["words"][msg.sender]    += len(msg.content.split(" "))
-            self.proportions["chars"][msg.sender]    += len(msg.content.strip())
-            self.proportions["qmarks"][msg.sender]   += msg.content.count('?')
-            self.proportions["exclams"][msg.sender]  += msg.content.count('!')
+            self.proportions["words"][msg.sender] += len(msg.content.split(" "))
+            self.proportions["chars"][msg.sender] += len(msg.content.strip())
+            self.proportions["qmarks"][msg.sender] += msg.content.count('?')
+            self.proportions["exclams"][msg.sender] += msg.content.count('!')
             self.proportions["media"][msg.sender] += (
                 msg.content.count('<media omitted>') +
                 msg.content.count('<image omitted>') +
@@ -170,24 +174,24 @@ def compute_message_proportions(self, list_of_messages, senders, root, contact):
         self.proportions["avg_words"] = {}
         for s in senders:
             self.proportions["avg_words"][s] = self.proportions["words"][s] / self.proportions["messages"][s]
-        self.proportions["avg_words"]["ratio"] = self.proportions["avg_words"][root] / self.proportions["avg_words"][contact]
+        self.proportions["avg_words"]["ratio"] = self.proportions["avg_words"][root] / self.proportions["avg_words"][
+            contact]
 
         for c in categories:
             self.proportions[c]["total"] = 0
             for s in senders:
                 self.proportions[c]["total"] += self.proportions[c][s]
-        
+
         for c in categories:
-         
-            #if a value is 0, replace with a 1 to avoid zero erros in ratio calcs.
+
+            # if a value is 0, replace with a 1 to avoid zero erros in ratio calcs.
             if self.proportions[c][contact] == 0:
                 self.proportions[c][contact] = 1
             if self.proportions[c][root] == 0:
-                self.proportions[c][root] = 1                
+                self.proportions[c][root] = 1
 
             self.proportions[c]["ratio"] = self.proportions[c][root] / self.proportions[c][contact]
 
-
         return self.proportions
 
     def compute_most_used_words(self, list_of_messages, top=10, threshold=3):
@@ -204,37 +208,37 @@ def compute_most_used_words(self, list_of_messages, top=10, threshold=3):
                         words_counter[w] = 1
                     else:
                         words_counter[w] += 1
-        sorted_words = sorted(words_counter.iteritems(), key=operator.itemgetter(1), reverse=True)
+        sorted_words = sorted(words_counter.items(), key=operator.itemgetter(1), reverse=True)
         self.most_used_words = sorted_words[:top]
         return self.most_used_words
 
     def compute_avg_root_response_time(self):
-        if (len(self.root_response_time) != 0):
-            return sum(self.root_response_time)/len(self.root_response_time)
+        if len(self.root_response_time) != 0:
+            return sum(self.root_response_time) / len(self.root_response_time)
         return 0
 
     def compute_avg_contact_response_time(self):
-        if (len(self.contact_response_time) != 0):
-            return sum(self.contact_response_time)/len(self.contact_response_time)
+        if len(self.contact_response_time) != 0:
+            return sum(self.contact_response_time) / len(self.contact_response_time)
         return 0
 
     def compute_response_time_ratio(self, root, contact):
         avg_root = self.compute_avg_root_response_time()
         avg_contact = self.compute_avg_contact_response_time()
-        if (avg_contact != 0):
+        if avg_contact != 0:
             return avg_root / avg_contact
         return 0
 
     def compute_bursts_ratio(self, root, contact):
         if (len(self.contact_burst)) == 0:
             return len(self.root_burst) / 1
-        if (len(self.root_burst) == 0):
-            return ( 1/len(self.contact_burst))
-        return len(self.root_burst)/len(self.contact_burst)
+        if len(self.root_burst) == 0:
+            return 1 / len(self.contact_burst)
+        return len(self.root_burst) / len(self.contact_burst)
 
     def compute_nbr_root_burst(self):
         return len(self.root_burst)
-    
+
     def compute_nbr_contact_burst(self):
         return len(self.contact_burst)
 
@@ -244,48 +248,41 @@ def compute_nbr_contact_burst(self):
     #     return 0
 
     def compute_avg_contact_burst(self):
-        if (len(self.contact_burst) != 0):
-            return sum(self.contact_burst)/len(self.contact_burst)
+        if len(self.contact_burst) != 0:
+            return sum(self.contact_burst) / len(self.contact_burst)
         return 0
 
     def compute_root_initation_ratio(self, root, contact):
-        if (self.initiations[contact] == 0):
-            return self.initiations[root]/1
-        if (self.initiations[root] == 0):
-            return 1/self.initiations[contact] 
+        if self.initiations[contact] == 0:
+            return self.initiations[root] / 1
+        if self.initiations[root] == 0:
+            return 1 / self.initiations[contact]
         return self.initiations[root] / self.initiations[contact]
-        
+
     def generate_outcome(self, root, contact, methodology):
-        outcome = 99;
+        outcome = 99
         if methodology == 0:
-            if (self.compute_root_initation_ratio(root, contact) > 0.867):
-                outcome = 0 #"just not that into you"
-                #print "DOESNT INITIATE"
-            elif (self.proportions["qmarks"]["ratio"] > 0.87): #flipped the non-intutitive direction of inequality
-                outcome = 0 #"just not that into you"
-                #print "QUESTIONS FAIL"
+            if self.compute_root_initation_ratio(root, contact) > 0.867:
+                outcome = 0  # "just not that into you"
+                # print "DOESNT INITIATE"
+            elif self.proportions["qmarks"]["ratio"] > 0.87:  # flipped the non-intuitive direction of inequality
+                outcome = 0  # "just not that into you"
+                # print "QUESTIONS FAIL"
             else:
-                outcome = 1 #"definitely into you"
-                #print "ELSE" 
+                outcome = 1  # "definitely into you"
+                # print "ELSE"
         elif methodology == 1:
-            if (self.compute_root_initation_ratio(root, contact) > 0.83):
-                outcome = 0 #"just not that into you"
-                #print "DOESNT INITIATE"
-            elif (self.features.compute_avg_root_response_time() < 0.92): #flipped non-intuitive direction of inequality
-                outcome = 0 #"just not that into you"
-                #print "QUESTIONS FAIL"
+            if self.compute_root_initation_ratio(root, contact) > 0.83:
+                outcome = 0  # "just not that into you"
+                # print "DOESNT INITIATE"
+            elif self.features.compute_avg_root_response_time() < 0.92:  # flipped non-intuitive direction of inequality
+                outcome = 0  # "just not that into you"
+                # print "QUESTIONS FAIL"
             else:
-                outcome = 1 #"definitely into you"
-                #print "ELSE"
+                outcome = 1  # "definitely into you"
+                # print "ELSE"
 
         else:
-            outcome = 99;
-
-        return outcome         
-                        
-#        qMarksPerRoot = qmarksRoot/messagesRoot
- #       qMarksPerContact = qmarksContact/messagesContact
-        
-        
-        
-        
\ No newline at end of file
+            outcome = 99
+
+        return outcome
diff --git a/wp_parser/datelib.py b/wp_parser/datelib.py
index 33f69d6..a540fd3 100644
--- a/wp_parser/datelib.py
+++ b/wp_parser/datelib.py
@@ -1,7 +1,7 @@
+import time
 from datetime import date
 from datetime import datetime
 from datetime import timedelta
-import time
 
 
 # get current ymd
@@ -37,11 +37,13 @@ def valid_date(date_str):
 
     return valid
 
+
 def date_diff(dateobj1, dateobj2):
     import math
     delta = dateobj2 - dateobj1
     return int(math.fabs(delta.days))
 
+
 def datecmp(date1, date2):
     year, month, day = date_split(date1)
     year_t, month_t, day_t = date_split(date2)
@@ -53,8 +55,8 @@ def datecmp(date1, date2):
         else:
             return 1
     except ValueError:
-        #misc.error("Fix me! Invalid date", "datecmp")
-        print "Fix me! Invalid date"
+        # misc.error("Fix me! Invalid date", "datecmp")
+        print("Fix me! Invalid date")
         return False
 
 
@@ -65,7 +67,7 @@ def date_operation(date_str, num):
     return end_date
 
 
-def date_to_str(date_str):
+def date_to_str():
     return date.strftime('%Y-%m-%d')
 
 
@@ -89,7 +91,7 @@ def date_interval(initial_date, length, step=1, separator="-"):
     output = []
     current = start_date
     while current < end_date:
-        output.append(date_to_str(current))
+        output.append(date_to_str())
         current += timedelta(days=step)
 
     return output
@@ -119,5 +121,6 @@ def weekday_portuguese_to_english(string):
     elif string == "sab" or string == "sabado":
         return "Saturday"
 
+
 if __name__ == "__main__":
-    print date_diff(datetime(2015, 6, 4), datetime(2015, 07, 7))
\ No newline at end of file
+    print(date_diff(datetime(2015, 6, 4), datetime(2015, 7, 7)))
diff --git a/wp_parser/parsers/facebook.py b/wp_parser/parsers/facebook.py
index 2d93c9a..c6fea13 100644
--- a/wp_parser/parsers/facebook.py
+++ b/wp_parser/parsers/facebook.py
@@ -1,12 +1,13 @@
 from datetime import datetime
-import message
 
-class ParserFacebook():
+from . import message
 
-    ''' A line is a dict object in this format:
+
+class ParserFacebook:
+    """ A line is a dict object in this format:
     {u'message': u'text text', u'from': u'Username One', u'id':
         u'3294659605566648_1432085429', u'datetime': u'2015-05-20T01:30:29+0000'}
-    '''
+    """
 
     def __init__(self, raw_messages):
         self.raw_messages = raw_messages
diff --git a/wp_parser/parsers/facebook.py.bak b/wp_parser/parsers/facebook.py.bak
new file mode 100644
index 0000000..2d93c9a
--- /dev/null
+++ b/wp_parser/parsers/facebook.py.bak
@@ -0,0 +1,29 @@
+from datetime import datetime
+import message
+
+class ParserFacebook():
+
+    ''' A line is a dict object in this format:
+    {u'message': u'text text', u'from': u'Username One', u'id':
+        u'3294659605566648_1432085429', u'datetime': u'2015-05-20T01:30:29+0000'}
+    '''
+
+    def __init__(self, raw_messages):
+        self.raw_messages = raw_messages
+
+    def parse(self):
+        list_of_messages = []
+        set_of_senders = set()
+        for l in self.raw_messages:
+            content = l["message"].encode("utf-8")
+            sender = l["from"].encode("utf-8")
+            datetime_str = l["datetime"].encode("utf-8")
+            date, time = datetime_str.split("T")
+            time = time.replace("+0000", "")
+            msg_date = date + " " + time
+            datetime_obj = datetime.strptime(msg_date, "%Y-%m-%d %H:%M:%S")
+
+            set_of_senders.add(sender)
+            list_of_messages.append(message.Message(sender, content, date, time, datetime_obj))
+
+        return list(set_of_senders), list_of_messages
diff --git a/wp_parser/parsers/message.py b/wp_parser/parsers/message.py
index b69afd8..53975a1 100644
--- a/wp_parser/parsers/message.py
+++ b/wp_parser/parsers/message.py
@@ -1,11 +1,10 @@
-class Message():
-
+class Message:
     def __init__(self, sender, content, date, time, datetime_obj):
-        self.sender                = sender
-        self.content               = content
-        self.date                  = date
-        self.time                  = time
-        self.datetime_obj          = datetime_obj
+        self.sender = sender
+        self.content = content
+        self.date = date
+        self.time = time
+        self.datetime_obj = datetime_obj
 
     def __repr__(self):
-        return " ".join(str(v) for v in [self.datetime_obj, self.sender, self.content])
\ No newline at end of file
+        return " ".join(str(v) for v in [self.datetime_obj, self.sender, self.content])
diff --git a/wp_parser/parsers/whatsapp.py b/wp_parser/parsers/whatsapp.py
index 8eddc43..7d25998 100644
--- a/wp_parser/parsers/whatsapp.py
+++ b/wp_parser/parsers/whatsapp.py
@@ -1,13 +1,15 @@
 from datetime import datetime
-import message
 
-''' A line can be either: 
+from . import message
+
+''' A line can be either:
         09/12/2012 17:03:48: Sender Name: Message
         3/24/14, 1:59:59 PM: Sender Name: Message
         24/3/14, 13:59:59: Sender Name: Message
 '''
 
-class ParserWhatsapp():
+
+class ParserWhatsapp:
 
     def __init__(self, raw_messages):
         self.raw_messages = raw_messages
diff --git a/wp_parser/parsers/whatsapp.py.bak b/wp_parser/parsers/whatsapp.py.bak
new file mode 100644
index 0000000..8eddc43
--- /dev/null
+++ b/wp_parser/parsers/whatsapp.py.bak
@@ -0,0 +1,49 @@
+from datetime import datetime
+import message
+
+''' A line can be either: 
+        09/12/2012 17:03:48: Sender Name: Message
+        3/24/14, 1:59:59 PM: Sender Name: Message
+        24/3/14, 13:59:59: Sender Name: Message
+'''
+
+class ParserWhatsapp():
+
+    def __init__(self, raw_messages):
+        self.raw_messages = raw_messages
+
+    def parse(self):
+        list_of_messages = []
+        set_of_senders = set()
+        for l in self.raw_messages:
+            msg_date, sep, msg = l.partition(": ")
+            raw_date, sep, time = msg_date.partition(" ")
+            sender, sep, content = msg.partition(": ")
+            raw_date = raw_date.replace(",", "")
+            year = raw_date.split(" ")[0].split("/")[-1]
+            # The following lines treats:
+            # 3/24/14 1:59:59 PM
+            # 24/3/14 13:59:59 PM
+            # Couldn't we use msg_date instead of chatTimeString here?
+
+            # colonIndex = [x.start() for x in re.finditer(':', l)]
+            # print l, colonIndex
+            # chatTimeString = l[0:colonIndex[2]]
+            # This ignores a minority of bad formatted lines using try/except block. 
+            # an execption is raised when the datetime_obj is not created due to date parsing error
+            try:
+                if "AM" in msg_date or "PM" in msg_date:
+                    datetime_obj = datetime.strptime(
+                        msg_date, "%m/%d/%y, %I:%M:%S %p")
+                else:
+                    if len(year) == 2:
+                        datetime_obj = datetime.strptime(msg_date, "%m/%d/%y %H:%M:%S")
+                    else:
+                        datetime_obj = datetime.strptime(msg_date, "%m/%d/%Y %H:%M:%S")
+            except ValueError: 
+                continue
+                
+            set_of_senders.add(sender)
+            list_of_messages.append(message.Message(sender, content, raw_date, time, datetime_obj))
+
+        return list(set_of_senders), list_of_messages
diff --git a/wp_parser/wp_chat.py b/wp_parser/wp_chat.py
index 2b76194..c9adea6 100644
--- a/wp_parser/wp_chat.py
+++ b/wp_parser/wp_chat.py
@@ -2,45 +2,42 @@
 # -*- coding: utf-8 -*-
 
 from __future__ import division
+from __future__ import absolute_import
 
-from parsers import whatsapp, facebook
-from ChatFeatures import ChatFeatures
-
+import argparse
+import json
+import operator
 
-from datetime import datetime
 import codecs
-import operator
-import sys
-import json
-import csv
-import argparse
-import os 
+import os
+from wp_parser.ChatFeatures import ChatFeatures
+from wp_parser.parsers import whatsapp, facebook
 
 
 def pretty_print(dic, parent, depth):
-    tup = sorted(dic.iteritems(), key=operator.itemgetter(1))
+    tup = sorted(dic.items(), key=operator.itemgetter(1))
     isLeaf = True
     for key in tup:
         if isinstance(dic[key[0]], dict):
             isLeaf = False
     if isLeaf and depth != 0:
-        print " " * (depth - 1) * 2, parent
+        print(" " * (depth - 1) * 2, parent)
     for key in tup:
         if isinstance(dic[key[0]], dict):
             pretty_print(dic[key[0]], key[0], depth + 1)
         else:
-            print " " * depth * 2, str(key[0]), "->", dic[key[0]]
+            print(" " * depth * 2, str(key[0]), "->", dic[key[0]])
 
-class Chat():
 
+class Chat:
     def __init__(self, filename, platform="WhatsApp"):
-        self.filename     = filename
-        self.platform     = platform
+        self.filename = filename
+        self.platform = platform
         self.raw_messages = []
-        self.messages     = []     # List of Messages objects
-        self.features     = ChatFeatures() # Chat Features object
-        self.senders      = []
-        self.root         = ''
+        self.messages = []  # List of Messages objects
+        self.features = ChatFeatures()  # Chat Features object
+        self.senders = []
+        self.root = ''
 
         if platform == "WhatsApp":
             self.open_file = self.open_file_whatsapp
@@ -98,72 +95,72 @@ def message_proportions(self):
 
     def most_used_words(self):
         return self.features.compute_most_used_words(self.messages, 10, 3)
-    
+
     def all_features(self, **kargs):
         burst_thrs = kargs.get("burst_thrs", 3)
-        initiation_thrs = kargs.get("initiation_thrs", 60*60*8)
-        response_thrs = kargs.get("response_thrs", 60*60*3)
+        initiation_thrs = kargs.get("initiation_thrs", 60 * 60 * 8)
+        response_thrs = kargs.get("response_thrs", 60 * 60 * 3)
         pattern_list = kargs.get("pattern_list", [])
         top = kargs.get("top", 10)
         word_length_threshold = kargs.get("word_length_threshold", 3)
 
-        self.features.compute_response_time_and_burst(self.messages, self.root, self.senders, initiation_thrs, burst_thrs, response_thrs)
-        self.features.compute_messages_per_weekday(self.messages) 
+        self.features.compute_response_time_and_burst(self.messages, self.root, self.senders, initiation_thrs,
+                                                      burst_thrs, response_thrs)
+        self.features.compute_messages_per_weekday(self.messages)
         self.features.compute_messages_per_shift(self.messages)
         self.features.compute_messages_pattern(self.messages, self.senders, pattern_list)
         self.features.compute_message_proportions(self.messages, self.senders, self.root, self.get_contact())
         self.features.compute_most_used_words(self.messages, top, word_length_threshold)
 
     def print_features(self):
-        print "Root: %s" % (self.senders[0])
-        print ""
+        print("Root: %s" % (self.senders[0]))
+        print("")
 
-        print "Average root response time (s): %.2f" % (self.features.compute_avg_root_response_time())
-        print "Average contact response time (s): %.2f" % (self.features.compute_avg_contact_response_time())
-        print "Ratio: %.2f" % (self.features.compute_response_time_ratio(self.root, self.get_contact()))
-        print ""
+        print("Average root response time (s): %.2f" % (self.features.compute_avg_root_response_time()))
+        print("Average contact response time (s): %.2f" % (self.features.compute_avg_contact_response_time()))
+        print("Ratio: %.2f" % (self.features.compute_response_time_ratio(self.root, self.get_contact())))
+        print("")
 
         # print "Number of root bursts: %d" % (self.features.compute_nbr_root_burst())
         # print "Average burst length: %.2ff" % (self.features.compute_avg_root_burst())
         # print ""
 
-        print "Number of contact bursts: %d" % (self.features.compute_nbr_contact_burst())
-        print "Average burst length: %.2ff" % (self.features.compute_avg_contact_burst())
-        print "Ratio: %.2f" % (self.features.compute_bursts_ratio(self.root, self.get_contact()))
-        print ""
+        print("Number of contact bursts: %d" % (self.features.compute_nbr_contact_burst()))
+        print("Average burst length: %.2ff" % (self.features.compute_avg_contact_burst()))
+        print("Ratio: %.2f" % (self.features.compute_bursts_ratio(self.root, self.get_contact())))
+        print("")
 
         for s in self.senders:
             if s == self.root:
-                print "Root initiations: %d" % (self.features.initiations[s])
+                print("Root initiations: %d" % (self.features.initiations[s]))
             else:
-                print "Contact initiations: %d" % (self.features.initiations[s])
+                print("Contact initiations: %d" % (self.features.initiations[s]))
 
-        print "Root initiation ratio: %.2f" % (self.features.compute_root_initation_ratio(self.root, self.get_contact()))
-        print ""
+        print(
+            "Root initiation ratio: %.2f" % (self.features.compute_root_initation_ratio(self.root, self.get_contact())))
+        print("")
 
-        print "Proportions:"
+        print("Proportions:")
         pretty_print(self.features.proportions, self.features.proportions.keys()[0], 1)
-        print ""
-        print "Weekdays:"
+        print("")
+        print("Weekdays:")
         pretty_print(self.features.weekday, "Weekday", 0)
-        print ""
-        print "Shifts:"
+        print("")
+        print("Shifts:")
         pretty_print(self.features.shifts, "Shifts", 0)
-        print ""
-        print "Patterns:"
+        print("")
+        print("Patterns:")
         pretty_print(self.features.patterns, "Patterns", 0)
-        print ""
-        print "Most used words:"
+        print("")
+        print("Most used words:")
         for muw in self.features.most_used_words:
-            try: 
-                print muw[0]
+            try:
+                print(muw[0])
             except UnicodeEncodeError:
                 self.features.most_used_words.remove(muw)
 
     def save_features(self, output_name):
-        output = {}
-        output["root"] = self.root
-        output["avg_response_time"] = {}
+        output = {"root": self.root, "avg_response_time": {}}
         for s in self.senders:
             if s == self.root:
                 output["avg_response_time"][s] = self.features.compute_avg_root_response_time()
@@ -188,14 +185,16 @@ def save_features(self, output_name):
 
 
         output["initiations"] = self.features.initiations
-        output["initiations"]["root_initiation_ratio"] = self.features.compute_root_initation_ratio(self.root, self.get_contact())
+        output["initiations"]["root_initiation_ratio"] = self.features.compute_root_initation_ratio(self.root,
+                                                                                                    self.get_contact())
         output["proportions"] = self.features.proportions
         output["weekdays"] = self.features.weekday
         output["shifts"] = self.features.shifts
         output["patterns"] = self.features.patterns
         output["senders"] = self.senders
         output["muw"] = self.features.most_used_words
-        output["outcome"] = self.features.generate_outcome(self.root, self.get_contact(), 0) #TODO: make macros for outcome methodology 
+        output["outcome"] = self.features.generate_outcome(self.root, self.get_contact(),
+                                                           0)  # TODO: make macros for outcome methodology
         # if fallback to default path, make sure the hardcoded folder `log` is present in the folder
         if output_name == "./logs/basic_stats.json":
             if not os.path.exists('logs'):
@@ -204,21 +203,25 @@ def save_features(self, output_name):
             if output_name.endswith(".json"):
                 arq = open(output_name, "w")
             else:
-                arq = open(output_name+".json", "w")
+                arq = open(output_name + ".json", "w")
             arq.write(json.dumps(output, indent=4, sort_keys=True))
             arq.close()
         # In case path (directory) mentioned by the user doesn't exist
         except IOError:
-            print "\nI/O Error: Following path doesn't exist:", output_name, "\n"
+            print("\nI/O Error: Following path doesn't exist:", output_name, "\n")
             exit(1)
 
+
 def main():
     parser = argparse.ArgumentParser(description='Chatlog Feature Extractor')
     parser.add_argument('-f', '--file', help='Chatlog file', required=True)
     parser.add_argument('-n', '--root', help='Root name', required=False)
-    parser.add_argument('-p', '--platform', help='Platform', choices=["WhatsApp", "Facebook"], default="WhatsApp", required=False)
-    parser.add_argument('-r', '--regexes', help='Regex patterns to compute frequency', nargs="+", required=False, default=[])
-    parser.add_argument('-o', '--output', help='JSON output file name', required=False, default="./logs/basic_stats.json")
+    parser.add_argument('-p', '--platform', help='Platform', choices=["WhatsApp", "Facebook"], default="WhatsApp",
+                        required=False)
+    parser.add_argument('-r', '--regexes', help='Regex patterns to compute frequency', nargs="+", required=False,
+                        default=[])
+    parser.add_argument('-o', '--output', help='JSON output file name', required=False,
+                        default="./logs/basic_stats.json")
 
     args = vars(parser.parse_args())
 
@@ -227,7 +230,7 @@ def main():
     c.parse_messages()
     if args.get("root") is None:
         for i, s in enumerate(c.senders):
-            print str(i), s
+            print(str(i), s)
         c.set_root(c.senders[int(raw_input("Please choose one person to be the root: "))])
     else:
         c.set_root(args["root"])
@@ -235,6 +238,6 @@ def main():
     c.print_features()
     c.save_features(args["output"])
 
+
 if __name__ == "__main__":
     main()
-    
\ No newline at end of file

From d93bb47909ca332d6f3f31524bc20b214f5e97c3 Mon Sep 17 00:00:00 2001
From: Manvendra Singh <manvendra0310@gmail.com>
Date: Fri, 7 Apr 2017 23:09:32 +0530
Subject: [PATCH 2/3] use unittest class, better tests.

---
 test/out/testChat2_UsernameOne.out | 92 +++++++++++++++---------------
 test/out/testChat2_UsernameTwo.out | 92 +++++++++++++++---------------
 test/test_chat.py                  | 44 +++++++++++---
 wp_parser/wp_chat.py               |  9 ++-
 4 files changed, 133 insertions(+), 104 deletions(-)

diff --git a/test/out/testChat2_UsernameOne.out b/test/out/testChat2_UsernameOne.out
index 3ddd321..d2bc681 100644
--- a/test/out/testChat2_UsernameOne.out
+++ b/test/out/testChat2_UsernameOne.out
@@ -13,55 +13,55 @@ Root initiations: 0
 Root initiation ratio: 1.00
 
 Proportions:
-   avg_words
-     ratio -> 0.625
-     UsernameOne -> 2.75
-     UsernameTwo -> 4.4
-   media
-     total -> 1
-     ratio -> 1.0
-     UsernameTwo -> 1
-     UsernameOne -> 1
-   exclams
-     ratio -> 0.333333333333
-     UsernameOne -> 1
-     total -> 3
-     UsernameTwo -> 3
-   qmarks
-     UsernameTwo -> 1
-     total -> 4
-     ratio -> 4.0
-     UsernameOne -> 4
-   messages
-     ratio -> 0.8
-     UsernameOne -> 4
-     UsernameTwo -> 5
-     total -> 9
-   words
-     ratio -> 0.5
-     UsernameOne -> 11
-     UsernameTwo -> 22
-     total -> 33
-   chars
-     ratio -> 0.528
-     UsernameOne -> 66
-     UsernameTwo -> 125
-     total -> 191
+('  ', 'avg_words')
+('    ', 'ratio', '->', 0.625)
+('    ', 'UsernameOne', '->', 2.75)
+('    ', 'UsernameTwo', '->', 4.4)
+('  ', 'media')
+('    ', 'total', '->', 1)
+('    ', 'ratio', '->', 1.0)
+('    ', 'UsernameTwo', '->', 1)
+('    ', 'UsernameOne', '->', 1)
+('  ', 'exclams')
+('    ', 'ratio', '->', 0.3333333333333333)
+('    ', 'UsernameOne', '->', 1)
+('    ', 'total', '->', 3)
+('    ', 'UsernameTwo', '->', 3)
+('  ', 'qmarks')
+('    ', 'UsernameTwo', '->', 1)
+('    ', 'total', '->', 4)
+('    ', 'ratio', '->', 4.0)
+('    ', 'UsernameOne', '->', 4)
+('  ', 'messages')
+('    ', 'ratio', '->', 0.8)
+('    ', 'UsernameOne', '->', 4)
+('    ', 'UsernameTwo', '->', 5)
+('    ', 'total', '->', 9)
+('  ', 'words')
+('    ', 'ratio', '->', 0.5)
+('    ', 'UsernameOne', '->', 11)
+('    ', 'UsernameTwo', '->', 22)
+('    ', 'total', '->', 33)
+('  ', 'chars')
+('    ', 'ratio', '->', 0.528)
+('    ', 'UsernameOne', '->', 66)
+('    ', 'UsernameTwo', '->', 125)
+('    ', 'total', '->', 191)
 
 Weekdays:
- Tuesday -> 0
- Friday -> 0
- Thursday -> 0
- Sunday -> 0
- Saturday -> 0
- Monday -> 1
- Wednesday -> 8
+('', 'Tuesday', '->', 0)
+('', 'Friday', '->', 0)
+('', 'Thursday', '->', 0)
+('', 'Sunday', '->', 0)
+('', 'Saturday', '->', 0)
+('', 'Monday', '->', 1)
+('', 'Wednesday', '->', 8)
 
 Shifts:
- evening -> 0
- latenight -> 0
- morning -> 1
- afternoon -> 8
+('', 'evening', '->', 0)
+('', 'latenight', '->', 0)
+('', 'morning', '->', 1)
+('', 'afternoon', '->', 8)
 
 Patterns:
 
@@ -75,4 +75,4 @@ time
 hello,
 media
 exclam!!!
-<media
+<media
\ No newline at end of file
diff --git a/test/out/testChat2_UsernameTwo.out b/test/out/testChat2_UsernameTwo.out
index 0497ef5..0aba6b5 100644
--- a/test/out/testChat2_UsernameTwo.out
+++ b/test/out/testChat2_UsernameTwo.out
@@ -13,55 +13,55 @@ Contact initiations: 0
 Root initiation ratio: 1.00
 
 Proportions:
-   avg_words
-     ratio -> 1.0
-     UsernameOne -> 2.75
-     UsernameTwo -> 4.4
-   media
-     UsernameOne -> 0
-     total -> 1
-     ratio -> 1.0
-     UsernameTwo -> 1
-   exclams
-     UsernameOne -> 0
-     ratio -> 1.0
-     total -> 3
-     UsernameTwo -> 3
-   qmarks
-     ratio -> 1.0
-     UsernameTwo -> 1
-     total -> 4
-     UsernameOne -> 4
-   messages
-     ratio -> 1.0
-     UsernameOne -> 4
-     UsernameTwo -> 5
-     total -> 9
-   words
-     ratio -> 1.0
-     UsernameOne -> 11
-     UsernameTwo -> 22
-     total -> 33
-   chars
-     ratio -> 1.0
-     UsernameOne -> 66
-     UsernameTwo -> 125
-     total -> 191
+('  ', 'avg_words')
+('    ', 'ratio', '->', 1.0)
+('    ', 'UsernameOne', '->', 2.75)
+('    ', 'UsernameTwo', '->', 4.4)
+('  ', 'media')
+('    ', 'UsernameOne', '->', 0)
+('    ', 'total', '->', 1)
+('    ', 'ratio', '->', 1.0)
+('    ', 'UsernameTwo', '->', 1)
+('  ', 'exclams')
+('    ', 'UsernameOne', '->', 0)
+('    ', 'ratio', '->', 1.0)
+('    ', 'total', '->', 3)
+('    ', 'UsernameTwo', '->', 3)
+('  ', 'qmarks')
+('    ', 'ratio', '->', 1.0)
+('    ', 'UsernameTwo', '->', 1)
+('    ', 'total', '->', 4)
+('    ', 'UsernameOne', '->', 4)
+('  ', 'messages')
+('    ', 'ratio', '->', 1.0)
+('    ', 'UsernameOne', '->', 4)
+('    ', 'UsernameTwo', '->', 5)
+('    ', 'total', '->', 9)
+('  ', 'words')
+('    ', 'ratio', '->', 1.0)
+('    ', 'UsernameOne', '->', 11)
+('    ', 'UsernameTwo', '->', 22)
+('    ', 'total', '->', 33)
+('  ', 'chars')
+('    ', 'ratio', '->', 1.0)
+('    ', 'UsernameOne', '->', 66)
+('    ', 'UsernameTwo', '->', 125)
+('    ', 'total', '->', 191)
 
 Weekdays:
- Tuesday -> 0
- Friday -> 0
- Thursday -> 0
- Sunday -> 0
- Saturday -> 0
- Monday -> 1
- Wednesday -> 8
+('', 'Tuesday', '->', 0)
+('', 'Friday', '->', 0)
+('', 'Thursday', '->', 0)
+('', 'Sunday', '->', 0)
+('', 'Saturday', '->', 0)
+('', 'Monday', '->', 1)
+('', 'Wednesday', '->', 8)
 
 Shifts:
- evening -> 0
- latenight -> 0
- morning -> 1
- afternoon -> 8
+('', 'evening', '->', 0)
+('', 'latenight', '->', 0)
+('', 'morning', '->', 1)
+('', 'afternoon', '->', 8)
 
 Patterns:
 
@@ -75,4 +75,4 @@ time
 hello,
 media
 exclam!!!
-<media
+<media
\ No newline at end of file
diff --git a/test/test_chat.py b/test/test_chat.py
index 7f5221d..5a1d01b 100644
--- a/test/test_chat.py
+++ b/test/test_chat.py
@@ -1,15 +1,41 @@
-import os
+import sys
+import unittest
 
-class TestChat:
+from wp_parser.wp_chat import main as parser
+
+# for capturing print() output
+from contextlib import contextmanager
+try:
+    from StringIO import StringIO
+except ImportError: #python3
+    from io import StringIO
+
+
+@contextmanager
+def captured_output():
+    new_out, new_err = StringIO(), StringIO()
+    old_out, old_err = sys.stdout, sys.stderr
+    try:
+        sys.stdout, sys.stderr = new_out, new_err
+        yield sys.stdout, sys.stderr
+    finally:
+        sys.stdout, sys.stderr = old_out, old_err
+
+
+class TestChat(unittest.TestCase):
+    def test_chat_1(self):
+        # out_filename = str(tmpdir.join("abc"))
+        # out_filename = ""
 
-    def test_chat(self, tmpdir):
-        out_filename = str(tmpdir.join("abc"))
         for case in ['One', 'Two']:
-            cmd = 'python -p wp_parser/wp_chat.py -f test/testChat2.txt -n Username{} > {}'.format(case, out_filename)
-            os.system(cmd)
-            with open(out_filename) as fh:
-                result = fh.read()
+            # creating mock args
+            args = "-f test/testChat2.txt --root Username{}".format(case).split()
+
+            with captured_output() as (result, err):
+                parser(args)
+
             expected_file = 'test/out/testChat2_Username{}.out'.format(case)
             with open(expected_file) as fh:
                 expected = fh.read()
-            assert result == expected
+
+            self.assertEqual(result.getvalue().strip(), expected)
diff --git a/wp_parser/wp_chat.py b/wp_parser/wp_chat.py
index c9adea6..2ae9331 100644
--- a/wp_parser/wp_chat.py
+++ b/wp_parser/wp_chat.py
@@ -212,7 +212,7 @@ def save_features(self, output_name):
             exit(1)
 
 
-def main():
+def main(explicit_args=False):
     parser = argparse.ArgumentParser(description='Chatlog Feature Extractor')
     parser.add_argument('-f', '--file', help='Chatlog file', required=True)
     parser.add_argument('-n', '--root', help='Root name', required=False)
@@ -223,7 +223,10 @@ def main():
     parser.add_argument('-o', '--output', help='JSON output file name', required=False,
                         default="./logs/basic_stats.json")
 
-    args = vars(parser.parse_args())
+    if not explicit_args:
+        args = vars(parser.parse_args())
+    else:
+        args = vars(parser.parse_args(explicit_args))
 
     c = Chat(args["file"], args["platform"])
     c.open_file()
@@ -240,4 +243,4 @@ def main():
 
 
 if __name__ == "__main__":
-    main()
+    main()
\ No newline at end of file

From ada0d0b5bf415d37f315e6500552a8e1cfc771ec Mon Sep 17 00:00:00 2001
From: Manvendra Singh <manvendra0310@gmail.com>
Date: Fri, 7 Apr 2017 23:15:03 +0530
Subject: [PATCH 3/3] travis config: use nose instead of py.test

---
 .travis.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 5738124..afd16d1 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -4,6 +4,6 @@ python:
   - "3.4"
   - "3.5"
 #  - "3.6"
-#install:
-#   - pip install .
-script: py.test
+install:
+   - pip install .
+script: nosetests