From 6defbf65620f4185591d5f729c135073e2b74cb9 Mon Sep 17 00:00:00 2001
From: Bradley Irving <30394346+beirving@users.noreply.github.com>
Date: Mon, 25 Nov 2019 15:31:01 -0700
Subject: [PATCH] File commit
Code challange onn forked repo
---
.gitattributes | 2 +
.gitignore | 8 +
.idea/.gitignore | 3 +
.idea/booj-code-challenge.iml | 14 ++
.../inspectionProfiles/profiles_settings.xml | 6 +
.idea/misc.xml | 4 +
.idea/modules.xml | 8 +
.idea/vcs.xml | 6 +
LICENSE | 24 +++
main.py | 8 +
parseXmlSaveCsv.py | 169 ++++++++++++++++++
requirements.txt | 19 ++
test_parseXmlSaveCsv.py | 123 +++++++++++++
13 files changed, 394 insertions(+)
create mode 100644 .gitattributes
create mode 100644 .idea/.gitignore
create mode 100644 .idea/booj-code-challenge.iml
create mode 100644 .idea/inspectionProfiles/profiles_settings.xml
create mode 100644 .idea/misc.xml
create mode 100644 .idea/modules.xml
create mode 100644 .idea/vcs.xml
create mode 100644 LICENSE
create mode 100644 main.py
create mode 100644 parseXmlSaveCsv.py
create mode 100644 requirements.txt
create mode 100644 test_parseXmlSaveCsv.py
diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000..dfe0770
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,2 @@
+# Auto detect text files and perform LF normalization
+* text=auto
diff --git a/.gitignore b/.gitignore
index db1f598..b385c4b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -98,3 +98,11 @@ ENV/
# mypy
.mypy_cache/
+venv/.
+
+.pytest_cache
+
+downloaded_xml_data.xml
+output.csv
+
+
diff --git a/.idea/.gitignore b/.idea/.gitignore
new file mode 100644
index 0000000..0e40fe8
--- /dev/null
+++ b/.idea/.gitignore
@@ -0,0 +1,3 @@
+
+# Default ignored files
+/workspace.xml
\ No newline at end of file
diff --git a/.idea/booj-code-challenge.iml b/.idea/booj-code-challenge.iml
new file mode 100644
index 0000000..b6c198f
--- /dev/null
+++ b/.idea/booj-code-challenge.iml
@@ -0,0 +1,14 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml
new file mode 100644
index 0000000..105ce2d
--- /dev/null
+++ b/.idea/inspectionProfiles/profiles_settings.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
new file mode 100644
index 0000000..ae8e745
--- /dev/null
+++ b/.idea/misc.xml
@@ -0,0 +1,4 @@
+
+
+
+
\ No newline at end of file
diff --git a/.idea/modules.xml b/.idea/modules.xml
new file mode 100644
index 0000000..683be98
--- /dev/null
+++ b/.idea/modules.xml
@@ -0,0 +1,8 @@
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
new file mode 100644
index 0000000..94a25f7
--- /dev/null
+++ b/.idea/vcs.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..6bb8a29
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,24 @@
+This is free and unencumbered software released into the public domain.
+
+Anyone is free to copy, modify, publish, use, compile, sell, or
+distribute this software, either in source code form or as a compiled
+binary, for any purpose, commercial or non-commercial, and by any
+means.
+
+In jurisdictions that recognize copyright laws, the author or authors
+of this software dedicate any and all copyright interest in the
+software to the public domain. We make this dedication for the benefit
+of the public at large and to the detriment of our heirs and
+successors. We intend this dedication to be an overt act of
+relinquishment in perpetuity of all present and future rights to this
+software under copyright law.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+OTHER DEALINGS IN THE SOFTWARE.
+
+For more information, please refer to
\ No newline at end of file
diff --git a/main.py b/main.py
new file mode 100644
index 0000000..b79c8ca
--- /dev/null
+++ b/main.py
@@ -0,0 +1,8 @@
+import parseXmlSaveCsv
+
+# set target xml feed
+challenge_url = 'http://syndication.enterprise.websiteidx.com/feeds/BoojCodeTest.xml'
+# download xml feed to local file
+local_file = parseXmlSaveCsv.download_file(challenge_url)
+# parse xml and save as csv
+parseXmlSaveCsv.parse_and_save(local_file)
diff --git a/parseXmlSaveCsv.py b/parseXmlSaveCsv.py
new file mode 100644
index 0000000..bc8aa67
--- /dev/null
+++ b/parseXmlSaveCsv.py
@@ -0,0 +1,169 @@
+import os
+import csv
+import urllib2
+import datetime
+import collections
+import xml.etree.ElementTree as eTree
+
+
+def download_file(url, file_name='downloaded_xml_data.xml'):
+ """
+ Download a file from an external source and save locally
+ :param url: str
+ Valid url for XML
+ :param file_name: str
+ Set location for file to be downloaded
+ Default CWD/downloaded_xml_data.xml
+ :return: str : file location
+ """
+ source = urllib2.urlopen(url)
+ contents = source.read()
+ file_handler = open(file_name, 'w')
+ file_handler.write(contents)
+ file_handler.close()
+ return os.path.realpath(file_name)
+
+
+def check_valid_year(check_date, target_year=2016):
+ """
+ check if the date given is valid for the target year
+ :param check_date: str
+ :param target_year: str
+ :return: bool
+ """
+ start = datetime.datetime(target_year, 1, 1)
+ end = datetime.datetime(target_year, 12, 31, 23, 59, 59)
+ date = datetime.datetime.strptime(check_date, '%Y-%m-%d %H:%M:%S')
+ if date > end:
+ return False
+ if date < start:
+ return False
+ return True
+
+
+def check_description(description, check_term=' and '):
+ """
+ Check if the first argument contains the 2nd argument
+ :param description: str
+ :param check_term: str
+ :return: bool
+ """
+ if description.find(check_term) >= 0:
+ return True
+ return False
+
+
+def listing_valid(listing_iterator_item):
+ """
+ Helper function to clean up parse_file function
+ Runs check_valid_year and check_description under default params
+ :param listing_iterator_item: xml.etree.ElementTree.Element
+ :return: bool
+ """
+ listing_details = listing_iterator_item.find('ListingDetails')
+ date_result = check_valid_year(listing_details.find('DateListed').text)
+ if date_result is False:
+ return date_result
+ basic_details = listing_iterator_item.find('BasicDetails')
+ return check_description(basic_details.find('Description').text)
+
+
+def join_sub_nodes(main_node, term):
+ """
+ Outputs a comma separated string of the "term" node values
+ :param main_node: xml.etree.ElementTree.Element
+ :param term: str
+ :return: str
+ """
+ if main_node is None:
+ return ''
+ else:
+ node_items = main_node.findall(term)
+ if node_items is None:
+ return ''
+ else:
+ output_list = []
+ for node_item in node_items:
+ output_list.append(node_item.text)
+ return ','.join(output_list)
+
+
+def get_get_fields(listing_iterator_item):
+ """
+ Collect desired node values from xml
+ :param listing_iterator_item: xml.etree.ElementTree.Element
+ :return: collections.OrderedDict
+ """
+ output = collections.OrderedDict()
+ listing_details = listing_iterator_item.find('ListingDetails')
+ location_details = listing_iterator_item.find('Location')
+ basic_details = listing_iterator_item.find('BasicDetails')
+ rich_details = listing_iterator_item.find('RichDetails')
+
+ output['MlsId'] = listing_details.find('MlsId').text
+ output['MlsName'] = listing_details.find('MlsName').text
+
+ output['DateListed'] = listing_details.find('DateListed').text
+
+ output['StreetAddress'] = location_details.find('StreetAddress').text
+
+ output['Price'] = listing_details.find('Price').text
+ output['Bedrooms'] = basic_details.find('Bedrooms').text
+
+ # this is producing no values
+ # at this point I would talk with stake holders for clarifications on which nodes are important
+ # lets talk about the process for getting more information on issues like this
+ output['Bathrooms'] = basic_details.find('Bathrooms').text
+
+ # this would be code to switch to use FullBathrooms, HalfBathrooms, ThreeQuarterBathrooms nodes
+ # bathrooms = 0
+ # bathrooms += int(0 if basic_details.find('FullBathrooms').text is None else basic_details.find('FullBathrooms').text)
+ # bathrooms += int(0 if basic_details.find('HalfBathrooms').text is None else basic_details.find('HalfBathrooms').text)
+ # bathrooms += int(0 if basic_details.find('ThreeQuarterBathrooms').text is None else basic_details.find('ThreeQuarterBathrooms').text)
+ # output['Bathrooms'] = bathrooms
+ # Humorous result:
+ # the listing for 1110 Felbar Avenue has 102 bathrooms
+ # due to the xml having 99
+
+ output['Appliances'] = join_sub_nodes(rich_details.find('Appliances'), 'Appliance')
+ output['Rooms '] = join_sub_nodes(rich_details.find('Rooms'), 'Room')
+
+ # truncate to the 200th character
+ description = basic_details.find('Description').text
+ output['Description'] = description[0:200]
+ return output
+
+
+def write_listing_to_csv(listing_order_dict, file_name='output'):
+ """
+ Write the given dict data to the given csv
+ :param listing_order_dict: collections.OrderedDict
+ :param file_name: str
+ :return: void
+ """
+ file_location = file_name+".csv"
+ write_header = True
+ if os.path.isfile(file_location) is True:
+ write_header = False
+
+ with open(file_location, 'ab') as f:
+ writer = csv.writer(f)
+ if write_header:
+ writer.writerow(listing_order_dict.keys())
+ writer.writerow(listing_order_dict.values())
+
+
+def parse_and_save(local_file_location):
+ """
+ Iterate through given local xml file line by line and write to default csv location
+ :param local_file_location: str
+ :return: void
+ """
+ context = eTree.iterparse(local_file_location)
+ for event, element in context:
+ if event == "end" and element.tag == 'Listings':
+ for listing in element:
+ if listing_valid(listing):
+ listing_fields = get_get_fields(listing)
+ write_listing_to_csv(listing_fields)
+
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..dabec0c
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,19 @@
+atomicwrites==1.3.0
+attrs==19.3.0
+colorama==0.4.1
+configparser==4.0.2
+contextlib2==0.6.0.post1
+funcsigs==1.0.2
+importlib-metadata==0.23
+mock==3.0.5
+more-itertools==5.0.0
+packaging==19.2
+pathlib2==2.3.5
+pluggy==0.13.1
+py==1.8.0
+pyparsing==2.4.5
+pytest==4.6.6
+scandir==1.10.0
+six==1.13.0
+wcwidth==0.1.7
+zipp==0.6.0
diff --git a/test_parseXmlSaveCsv.py b/test_parseXmlSaveCsv.py
new file mode 100644
index 0000000..047b32f
--- /dev/null
+++ b/test_parseXmlSaveCsv.py
@@ -0,0 +1,123 @@
+import os
+import xml
+import pytest
+import collections
+import parseXmlSaveCsv
+from mock import Mock, patch
+
+
+@pytest.fixture(scope='function')
+def mock_xml_element():
+ mock_element = Mock(spec=xml.etree.ElementTree.Element)
+ mock_text = Mock()
+ mock_text.text = 'string'
+ mock_element.find.return_value = mock_text
+ mock_element.tag.return_value = 'Listings'
+ return mock_element
+
+
+@patch('parseXmlSaveCsv.urllib2.urlopen')
+def test_download_file_will_pass(mock_urlopen):
+ test_path = os.path.dirname(os.path.realpath(__file__))+'\\test.xml'
+ mock = Mock()
+ mock.read.side_effect = ['mock_contents1']
+ mock_urlopen.return_value = mock
+ result = parseXmlSaveCsv.download_file('http://test.com', 'test.xml')
+ assert type(result) is str
+ assert result == test_path
+ os.remove(result)
+
+
+@patch('parseXmlSaveCsv.urllib2.urlopen')
+def test_download_file_will_fail(mock_urlopen):
+ test_path = os.path.dirname(os.path.realpath(__file__))+'\\test_fail.xml'
+ mock = Mock()
+ mock.read.side_effect = ['mock_contents1']
+ mock_urlopen.return_value = mock
+ result = parseXmlSaveCsv.download_file('http://test.com', 'test.xml')
+ assert result is not test_path
+ os.remove(result)
+
+
+def test_check_description_will_pass():
+ result = parseXmlSaveCsv.check_description('this test will pass', 'pass')
+ assert result
+
+
+def test_check_description_will_fail():
+ result = parseXmlSaveCsv.check_description('this test will pass', 'fail')
+ assert result is False
+
+
+def test_check_valid_year_will_pass():
+ result = parseXmlSaveCsv.check_valid_year('2019-05-15 13:29:45', 2019)
+ assert result
+
+
+def test_check_valid_year_will_fail():
+ result = parseXmlSaveCsv.check_valid_year('2019-05-15 13:29:45', 2018)
+ assert result is False
+
+
+@patch("parseXmlSaveCsv.check_valid_year", return_value=True)
+@patch("parseXmlSaveCsv.check_description", return_value=True)
+def test_listing_valid_will_pass(mock_check_year, mock_check_description, mock_xml_element):
+ result = parseXmlSaveCsv.listing_valid(mock_xml_element)
+ assert mock_check_year.called
+ assert mock_check_description.called
+ assert result
+
+
+@patch("parseXmlSaveCsv.check_valid_year", return_value=False)
+def test_listing_valid_will_fail_check_year(mock_check_year, mock_xml_element):
+ result = parseXmlSaveCsv.listing_valid(mock_xml_element)
+ assert mock_check_year.called
+ assert result is False
+
+
+@patch("parseXmlSaveCsv.check_valid_year", return_value=True)
+@patch("parseXmlSaveCsv.check_description", return_value=False)
+def test_listing_valid_will_fail_check_description(mock_check_year, mock_check_description, mock_xml_element):
+ result = parseXmlSaveCsv.listing_valid(mock_xml_element)
+ assert mock_check_year.called
+ assert mock_check_description.called
+ assert result is False
+
+
+def test_join_sub_nodes_will_pass(mock_xml_element):
+ mock_text = Mock()
+ mock_text.text = 'string'
+
+ mock_xml_element.findall.return_value = [mock_text, mock_text]
+
+ result = parseXmlSaveCsv.join_sub_nodes(mock_xml_element, 'test')
+ assert result == 'string,string'
+
+
+def test_join_sub_nodes_will_fail_main_node_is_none():
+ result = parseXmlSaveCsv.join_sub_nodes(None, 'test')
+ assert result == ''
+
+
+def test_join_sub_nodes_will_fail_main_node_find_all_is_none(mock_xml_element):
+ mock_xml_element.findall.return_value = None
+ result = parseXmlSaveCsv.join_sub_nodes(mock_xml_element, 'test')
+ assert result == ''
+
+
+@patch("parseXmlSaveCsv.join_sub_nodes", return_value='string')
+def test_get_fields_will_pass(mock_join_sub_nodes, mock_xml_element):
+ mock_element = Mock(spec=xml.etree.ElementTree.Element)
+ mock_element.find.return_value = mock_xml_element
+ result = parseXmlSaveCsv.get_get_fields(mock_element)
+ assert mock_join_sub_nodes.called
+ assert type(result) is collections.OrderedDict
+
+
+def test_write_listing_to_csv_will_pass():
+ test_path = os.path.dirname(os.path.realpath(__file__)) + '\\test.csv'
+ mock_dict = collections.OrderedDict()
+ mock_dict['header'] = 'value'
+ parseXmlSaveCsv.write_listing_to_csv(mock_dict, 'test')
+ assert os.path.isfile('test.csv')
+ os.remove(test_path)