ActiveWebsite · beirving · Nov 25, 2019
diff --git a/.gitattributes b/.gitattributes
@@ -0,0 +1,2 @@
+# Auto detect text files and perform LF normalization
+* text=auto
diff --git a/.gitignore b/.gitignore
@@ -98,3 +98,11 @@ ENV/
 # mypy
 .mypy_cache/
 
+venv/.
+
+.pytest_cache
+
+downloaded_xml_data.xml
+output.csv
+
+
diff --git a/.idea/.gitignore b/.idea/.gitignore
diff --git a/.idea/booj-code-challenge.iml b/.idea/booj-code-challenge.iml
diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml
diff --git a/.idea/misc.xml b/.idea/misc.xml
diff --git a/.idea/modules.xml b/.idea/modules.xml
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
diff --git a/LICENSE b/LICENSE
@@ -0,0 +1,24 @@
+This is free and unencumbered software released into the public domain.
+
+Anyone is free to copy, modify, publish, use, compile, sell, or
+distribute this software, either in source code form or as a compiled
+binary, for any purpose, commercial or non-commercial, and by any
+means.
+
+In jurisdictions that recognize copyright laws, the author or authors
+of this software dedicate any and all copyright interest in the
+software to the public domain. We make this dedication for the benefit
+of the public at large and to the detriment of our heirs and
+successors. We intend this dedication to be an overt act of
+relinquishment in perpetuity of all present and future rights to this
+software under copyright law.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+OTHER DEALINGS IN THE SOFTWARE.
+
+For more information, please refer to <http://unlicense.org>
diff --git a/main.py b/main.py
@@ -0,0 +1,8 @@
+import parseXmlSaveCsv
+
+# set target xml feed
+challenge_url = 'http://syndication.enterprise.websiteidx.com/feeds/BoojCodeTest.xml'
+# download xml feed to local file
+local_file = parseXmlSaveCsv.download_file(challenge_url)
+# parse xml and save as csv
+parseXmlSaveCsv.parse_and_save(local_file)
diff --git a/parseXmlSaveCsv.py b/parseXmlSaveCsv.py
@@ -0,0 +1,169 @@
+import os
+import csv
+import urllib2
+import datetime
+import collections
+import xml.etree.ElementTree as eTree
+
+
+def download_file(url, file_name='downloaded_xml_data.xml'):
+    """
+    Download a file from an external source and save locally
+    :param url: str
+        Valid url for XML
+    :param file_name: str
+        Set location for file to be downloaded
+        Default CWD/downloaded_xml_data.xml
+    :return:  str : file location
+    """
+    source = urllib2.urlopen(url)
+    contents = source.read()
+    file_handler = open(file_name, 'w')
+    file_handler.write(contents)
+    file_handler.close()
+    return os.path.realpath(file_name)
+
+
+def check_valid_year(check_date, target_year=2016):
+    """
+    check if the date given is valid for the target year
+    :param check_date: str
+    :param target_year: str
+    :return: bool
+    """
+    start = datetime.datetime(target_year, 1, 1)
+    end = datetime.datetime(target_year, 12, 31, 23, 59, 59)
+    date = datetime.datetime.strptime(check_date, '%Y-%m-%d %H:%M:%S')
+    if date > end:
+        return False
+    if date < start:
+        return False
+    return True
+
+
+def check_description(description, check_term=' and '):
+    """
+    Check if the first argument contains the 2nd argument
+    :param description: str
+    :param check_term: str
+    :return: bool
+    """
+    if description.find(check_term) >= 0:
+        return True
+    return False
+
+
+def listing_valid(listing_iterator_item):
+    """
+    Helper function to clean up parse_file function
+    Runs check_valid_year and check_description under default params
+    :param listing_iterator_item: xml.etree.ElementTree.Element
+    :return: bool
+    """
+    listing_details = listing_iterator_item.find('ListingDetails')
+    date_result = check_valid_year(listing_details.find('DateListed').text)
+    if date_result is False:
+        return date_result
+    basic_details = listing_iterator_item.find('BasicDetails')
+    return check_description(basic_details.find('Description').text)
+
+
+def join_sub_nodes(main_node, term):
+    """
+    Outputs a comma separated string of the "term" node values
+    :param main_node: xml.etree.ElementTree.Element
+    :param term: str
+    :return: str
+    """
+    if main_node is None:
+        return ''
+    else:
+        node_items = main_node.findall(term)
+        if node_items is None:
+            return ''
+        else:
+            output_list = []
+            for node_item in node_items:
+                output_list.append(node_item.text)
+        return ','.join(output_list)
+
+
+def get_get_fields(listing_iterator_item):
+    """
+    Collect desired node values from xml
+    :param listing_iterator_item: xml.etree.ElementTree.Element
+    :return: collections.OrderedDict
+    """
+    output = collections.OrderedDict()
+    listing_details = listing_iterator_item.find('ListingDetails')
+    location_details = listing_iterator_item.find('Location')
+    basic_details = listing_iterator_item.find('BasicDetails')
+    rich_details = listing_iterator_item.find('RichDetails')
+
+    output['MlsId'] = listing_details.find('MlsId').text
+    output['MlsName'] = listing_details.find('MlsName').text
+
+    output['DateListed'] = listing_details.find('DateListed').text
+
+    output['StreetAddress'] = location_details.find('StreetAddress').text
+
+    output['Price'] = listing_details.find('Price').text
+    output['Bedrooms'] = basic_details.find('Bedrooms').text
+
+    # this is producing no values
+    # at this point I would talk with stake holders for clarifications on which nodes are important
+    # lets talk about the process for getting more information on issues like this
+    output['Bathrooms'] = basic_details.find('Bathrooms').text
+
+    # this would be code to switch to use FullBathrooms, HalfBathrooms, ThreeQuarterBathrooms nodes
+    # bathrooms = 0
+    # bathrooms += int(0 if basic_details.find('FullBathrooms').text is None else basic_details.find('FullBathrooms').text)
+    # bathrooms += int(0 if basic_details.find('HalfBathrooms').text is None else basic_details.find('HalfBathrooms').text)
+    # bathrooms += int(0 if basic_details.find('ThreeQuarterBathrooms').text is None else basic_details.find('ThreeQuarterBathrooms').text)
+    # output['Bathrooms'] = bathrooms
+    #   Humorous result:
+    #       the listing for 1110 Felbar Avenue has 102 bathrooms
+    #       due to the xml having <HalfBathrooms>99</HalfBathrooms>
+
+    output['Appliances'] = join_sub_nodes(rich_details.find('Appliances'), 'Appliance')
+    output['Rooms '] = join_sub_nodes(rich_details.find('Rooms'), 'Room')
+
+    # truncate to the 200th character
+    description = basic_details.find('Description').text
+    output['Description'] = description[0:200]
+    return output
+
+
+def write_listing_to_csv(listing_order_dict, file_name='output'):
+    """
+    Write the given dict data to the given csv
+    :param listing_order_dict: collections.OrderedDict
+    :param file_name: str
+    :return: void
+    """
+    file_location = file_name+".csv"
+    write_header = True
+    if os.path.isfile(file_location) is True:
+        write_header = False
+
+    with open(file_location, 'ab') as f:
+        writer = csv.writer(f)
+        if write_header:
+            writer.writerow(listing_order_dict.keys())
+        writer.writerow(listing_order_dict.values())
+
+
+def parse_and_save(local_file_location):
+    """
+    Iterate through given local xml file line by line and write to default csv location
+    :param local_file_location: str
+    :return: void
+    """
+    context = eTree.iterparse(local_file_location)
+    for event, element in context:
+        if event == "end" and element.tag == 'Listings':
+            for listing in element:
+                if listing_valid(listing):
+                    listing_fields = get_get_fields(listing)
+                    write_listing_to_csv(listing_fields)
+
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,19 @@
+atomicwrites==1.3.0
+attrs==19.3.0
+colorama==0.4.1
+configparser==4.0.2
+contextlib2==0.6.0.post1
+funcsigs==1.0.2
+importlib-metadata==0.23
+mock==3.0.5
+more-itertools==5.0.0
+packaging==19.2
+pathlib2==2.3.5
+pluggy==0.13.1
+py==1.8.0
+pyparsing==2.4.5
+pytest==4.6.6
+scandir==1.10.0
+six==1.13.0
+wcwidth==0.1.7
+zipp==0.6.0
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		# Auto detect text files and perform LF normalization
		* text=auto