Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# Auto detect text files and perform LF normalization
* text=auto
8 changes: 8 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -98,3 +98,11 @@ ENV/
# mypy
.mypy_cache/

venv/.

.pytest_cache

downloaded_xml_data.xml
output.csv


3 changes: 3 additions & 0 deletions .idea/.gitignore

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 14 additions & 0 deletions .idea/booj-code-challenge.iml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions .idea/inspectionProfiles/profiles_settings.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions .idea/misc.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions .idea/modules.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions .idea/vcs.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

24 changes: 24 additions & 0 deletions LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
This is free and unencumbered software released into the public domain.

Anyone is free to copy, modify, publish, use, compile, sell, or
distribute this software, either in source code form or as a compiled
binary, for any purpose, commercial or non-commercial, and by any
means.

In jurisdictions that recognize copyright laws, the author or authors
of this software dedicate any and all copyright interest in the
software to the public domain. We make this dedication for the benefit
of the public at large and to the detriment of our heirs and
successors. We intend this dedication to be an overt act of
relinquishment in perpetuity of all present and future rights to this
software under copyright law.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
OTHER DEALINGS IN THE SOFTWARE.

For more information, please refer to <http://unlicense.org>
8 changes: 8 additions & 0 deletions main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
import parseXmlSaveCsv

# set target xml feed
challenge_url = 'http://syndication.enterprise.websiteidx.com/feeds/BoojCodeTest.xml'
# download xml feed to local file
local_file = parseXmlSaveCsv.download_file(challenge_url)
# parse xml and save as csv
parseXmlSaveCsv.parse_and_save(local_file)
169 changes: 169 additions & 0 deletions parseXmlSaveCsv.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
import os
import csv
import urllib2
import datetime
import collections
import xml.etree.ElementTree as eTree


def download_file(url, file_name='downloaded_xml_data.xml'):
"""
Download a file from an external source and save locally
:param url: str
Valid url for XML
:param file_name: str
Set location for file to be downloaded
Default CWD/downloaded_xml_data.xml
:return: str : file location
"""
source = urllib2.urlopen(url)
contents = source.read()
file_handler = open(file_name, 'w')
file_handler.write(contents)
file_handler.close()
return os.path.realpath(file_name)


def check_valid_year(check_date, target_year=2016):
"""
check if the date given is valid for the target year
:param check_date: str
:param target_year: str
:return: bool
"""
start = datetime.datetime(target_year, 1, 1)
end = datetime.datetime(target_year, 12, 31, 23, 59, 59)
date = datetime.datetime.strptime(check_date, '%Y-%m-%d %H:%M:%S')
if date > end:
return False
if date < start:
return False
return True


def check_description(description, check_term=' and '):
"""
Check if the first argument contains the 2nd argument
:param description: str
:param check_term: str
:return: bool
"""
if description.find(check_term) >= 0:
return True
return False


def listing_valid(listing_iterator_item):
"""
Helper function to clean up parse_file function
Runs check_valid_year and check_description under default params
:param listing_iterator_item: xml.etree.ElementTree.Element
:return: bool
"""
listing_details = listing_iterator_item.find('ListingDetails')
date_result = check_valid_year(listing_details.find('DateListed').text)
if date_result is False:
return date_result
basic_details = listing_iterator_item.find('BasicDetails')
return check_description(basic_details.find('Description').text)


def join_sub_nodes(main_node, term):
"""
Outputs a comma separated string of the "term" node values
:param main_node: xml.etree.ElementTree.Element
:param term: str
:return: str
"""
if main_node is None:
return ''
else:
node_items = main_node.findall(term)
if node_items is None:
return ''
else:
output_list = []
for node_item in node_items:
output_list.append(node_item.text)
return ','.join(output_list)


def get_get_fields(listing_iterator_item):
"""
Collect desired node values from xml
:param listing_iterator_item: xml.etree.ElementTree.Element
:return: collections.OrderedDict
"""
output = collections.OrderedDict()
listing_details = listing_iterator_item.find('ListingDetails')
location_details = listing_iterator_item.find('Location')
basic_details = listing_iterator_item.find('BasicDetails')
rich_details = listing_iterator_item.find('RichDetails')

output['MlsId'] = listing_details.find('MlsId').text
output['MlsName'] = listing_details.find('MlsName').text

output['DateListed'] = listing_details.find('DateListed').text

output['StreetAddress'] = location_details.find('StreetAddress').text

output['Price'] = listing_details.find('Price').text
output['Bedrooms'] = basic_details.find('Bedrooms').text

# this is producing no values
# at this point I would talk with stake holders for clarifications on which nodes are important
# lets talk about the process for getting more information on issues like this
output['Bathrooms'] = basic_details.find('Bathrooms').text

# this would be code to switch to use FullBathrooms, HalfBathrooms, ThreeQuarterBathrooms nodes
# bathrooms = 0
# bathrooms += int(0 if basic_details.find('FullBathrooms').text is None else basic_details.find('FullBathrooms').text)
# bathrooms += int(0 if basic_details.find('HalfBathrooms').text is None else basic_details.find('HalfBathrooms').text)
# bathrooms += int(0 if basic_details.find('ThreeQuarterBathrooms').text is None else basic_details.find('ThreeQuarterBathrooms').text)
# output['Bathrooms'] = bathrooms
# Humorous result:
# the listing for 1110 Felbar Avenue has 102 bathrooms
# due to the xml having <HalfBathrooms>99</HalfBathrooms>

output['Appliances'] = join_sub_nodes(rich_details.find('Appliances'), 'Appliance')
output['Rooms '] = join_sub_nodes(rich_details.find('Rooms'), 'Room')

# truncate to the 200th character
description = basic_details.find('Description').text
output['Description'] = description[0:200]
return output


def write_listing_to_csv(listing_order_dict, file_name='output'):
"""
Write the given dict data to the given csv
:param listing_order_dict: collections.OrderedDict
:param file_name: str
:return: void
"""
file_location = file_name+".csv"
write_header = True
if os.path.isfile(file_location) is True:
write_header = False

with open(file_location, 'ab') as f:
writer = csv.writer(f)
if write_header:
writer.writerow(listing_order_dict.keys())
writer.writerow(listing_order_dict.values())


def parse_and_save(local_file_location):
"""
Iterate through given local xml file line by line and write to default csv location
:param local_file_location: str
:return: void
"""
context = eTree.iterparse(local_file_location)
for event, element in context:
if event == "end" and element.tag == 'Listings':
for listing in element:
if listing_valid(listing):
listing_fields = get_get_fields(listing)
write_listing_to_csv(listing_fields)

19 changes: 19 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
atomicwrites==1.3.0
attrs==19.3.0
colorama==0.4.1
configparser==4.0.2
contextlib2==0.6.0.post1
funcsigs==1.0.2
importlib-metadata==0.23
mock==3.0.5
more-itertools==5.0.0
packaging==19.2
pathlib2==2.3.5
pluggy==0.13.1
py==1.8.0
pyparsing==2.4.5
pytest==4.6.6
scandir==1.10.0
six==1.13.0
wcwidth==0.1.7
zipp==0.6.0
Loading