From 6defbf65620f4185591d5f729c135073e2b74cb9 Mon Sep 17 00:00:00 2001
From: Bradley Irving <30394346+beirving@users.noreply.github.com>
Date: Mon, 25 Nov 2019 15:31:01 -0700
Subject: [PATCH] File commit

Code challange onn forked repo
---
 .gitattributes                                |   2 +
 .gitignore                                    |   8 +
 .idea/.gitignore                              |   3 +
 .idea/booj-code-challenge.iml                 |  14 ++
 .../inspectionProfiles/profiles_settings.xml  |   6 +
 .idea/misc.xml                                |   4 +
 .idea/modules.xml                             |   8 +
 .idea/vcs.xml                                 |   6 +
 LICENSE                                       |  24 +++
 main.py                                       |   8 +
 parseXmlSaveCsv.py                            | 169 ++++++++++++++++++
 requirements.txt                              |  19 ++
 test_parseXmlSaveCsv.py                       | 123 +++++++++++++
 13 files changed, 394 insertions(+)
 create mode 100644 .gitattributes
 create mode 100644 .idea/.gitignore
 create mode 100644 .idea/booj-code-challenge.iml
 create mode 100644 .idea/inspectionProfiles/profiles_settings.xml
 create mode 100644 .idea/misc.xml
 create mode 100644 .idea/modules.xml
 create mode 100644 .idea/vcs.xml
 create mode 100644 LICENSE
 create mode 100644 main.py
 create mode 100644 parseXmlSaveCsv.py
 create mode 100644 requirements.txt
 create mode 100644 test_parseXmlSaveCsv.py
diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000..dfe0770
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,2 @@
+# Auto detect text files and perform LF normalization
+* text=auto
diff --git a/.gitignore b/.gitignore
index db1f598..b385c4b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -98,3 +98,11 @@ ENV/
 # mypy
 .mypy_cache/
 
+venv/.
+
+.pytest_cache
+
+downloaded_xml_data.xml
+output.csv
+
+
diff --git a/.idea/.gitignore b/.idea/.gitignore
new file mode 100644
index 0000000..0e40fe8
--- /dev/null
+++ b/.idea/.gitignore
@@ -0,0 +1,3 @@
+
+# Default ignored files
+/workspace.xml
\ No newline at end of file
diff --git a/.idea/booj-code-challenge.iml b/.idea/booj-code-challenge.iml
new file mode 100644
index 0000000..b6c198f
--- /dev/null
+++ b/.idea/booj-code-challenge.iml
@@ -0,0 +1,14 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$">
+      <excludeFolder url="file://$MODULE_DIR$/venv" />
+    </content>
+    <orderEntry type="jdk" jdkName="Python 2.7 (booj-code-challenge)" jdkType="Python SDK" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+  <component name="TestRunnerService">
+    <option name="projectConfiguration" value="pytest" />
+    <option name="PROJECT_TEST_RUNNER" value="pytest" />
+  </component>
+</module>
\ No newline at end of file
diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml
new file mode 100644
index 0000000..105ce2d
--- /dev/null
+++ b/.idea/inspectionProfiles/profiles_settings.xml
@@ -0,0 +1,6 @@
+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
new file mode 100644
index 0000000..ae8e745
--- /dev/null
+++ b/.idea/misc.xml
@@ -0,0 +1,4 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 2.7 (booj-code-challenge)" project-jdk-type="Python SDK" />
+</project>
\ No newline at end of file
diff --git a/.idea/modules.xml b/.idea/modules.xml
new file mode 100644
index 0000000..683be98
--- /dev/null
+++ b/.idea/modules.xml
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/booj-code-challenge.iml" filepath="$PROJECT_DIR$/.idea/booj-code-challenge.iml" />
+    </modules>
+  </component>
+</project>
\ No newline at end of file
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
new file mode 100644
index 0000000..94a25f7
--- /dev/null
+++ b/.idea/vcs.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="$PROJECT_DIR$" vcs="Git" />
+  </component>
+</project>
\ No newline at end of file
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..6bb8a29
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,24 @@
+This is free and unencumbered software released into the public domain.
+
+Anyone is free to copy, modify, publish, use, compile, sell, or
+distribute this software, either in source code form or as a compiled
+binary, for any purpose, commercial or non-commercial, and by any
+means.
+
+In jurisdictions that recognize copyright laws, the author or authors
+of this software dedicate any and all copyright interest in the
+software to the public domain. We make this dedication for the benefit
+of the public at large and to the detriment of our heirs and
+successors. We intend this dedication to be an overt act of
+relinquishment in perpetuity of all present and future rights to this
+software under copyright law.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+OTHER DEALINGS IN THE SOFTWARE.
+
+For more information, please refer to <http://unlicense.org>
\ No newline at end of file
diff --git a/main.py b/main.py
new file mode 100644
index 0000000..b79c8ca
--- /dev/null
+++ b/main.py
@@ -0,0 +1,8 @@
+import parseXmlSaveCsv
+
+# set target xml feed
+challenge_url = 'http://syndication.enterprise.websiteidx.com/feeds/BoojCodeTest.xml'
+# download xml feed to local file
+local_file = parseXmlSaveCsv.download_file(challenge_url)
+# parse xml and save as csv
+parseXmlSaveCsv.parse_and_save(local_file)
diff --git a/parseXmlSaveCsv.py b/parseXmlSaveCsv.py
new file mode 100644
index 0000000..bc8aa67
--- /dev/null
+++ b/parseXmlSaveCsv.py
@@ -0,0 +1,169 @@
+import os
+import csv
+import urllib2
+import datetime
+import collections
+import xml.etree.ElementTree as eTree
+
+
+def download_file(url, file_name='downloaded_xml_data.xml'):
+    """
+    Download a file from an external source and save locally
+    :param url: str
+        Valid url for XML
+    :param file_name: str
+        Set location for file to be downloaded
+        Default CWD/downloaded_xml_data.xml
+    :return:  str : file location
+    """
+    source = urllib2.urlopen(url)
+    contents = source.read()
+    file_handler = open(file_name, 'w')
+    file_handler.write(contents)
+    file_handler.close()
+    return os.path.realpath(file_name)
+
+
+def check_valid_year(check_date, target_year=2016):
+    """
+    check if the date given is valid for the target year
+    :param check_date: str
+    :param target_year: str
+    :return: bool
+    """
+    start = datetime.datetime(target_year, 1, 1)
+    end = datetime.datetime(target_year, 12, 31, 23, 59, 59)
+    date = datetime.datetime.strptime(check_date, '%Y-%m-%d %H:%M:%S')
+    if date > end:
+        return False
+    if date < start:
+        return False
+    return True
+
+
+def check_description(description, check_term=' and '):
+    """
+    Check if the first argument contains the 2nd argument
+    :param description: str
+    :param check_term: str
+    :return: bool
+    """
+    if description.find(check_term) >= 0:
+        return True
+    return False
+
+
+def listing_valid(listing_iterator_item):
+    """
+    Helper function to clean up parse_file function
+    Runs check_valid_year and check_description under default params
+    :param listing_iterator_item: xml.etree.ElementTree.Element
+    :return: bool
+    """
+    listing_details = listing_iterator_item.find('ListingDetails')
+    date_result = check_valid_year(listing_details.find('DateListed').text)
+    if date_result is False:
+        return date_result
+    basic_details = listing_iterator_item.find('BasicDetails')
+    return check_description(basic_details.find('Description').text)
+
+
+def join_sub_nodes(main_node, term):
+    """
+    Outputs a comma separated string of the "term" node values
+    :param main_node: xml.etree.ElementTree.Element
+    :param term: str
+    :return: str
+    """
+    if main_node is None:
+        return ''
+    else:
+        node_items = main_node.findall(term)
+        if node_items is None:
+            return ''
+        else:
+            output_list = []
+            for node_item in node_items:
+                output_list.append(node_item.text)
+        return ','.join(output_list)
+
+
+def get_get_fields(listing_iterator_item):
+    """
+    Collect desired node values from xml
+    :param listing_iterator_item: xml.etree.ElementTree.Element
+    :return: collections.OrderedDict
+    """
+    output = collections.OrderedDict()
+    listing_details = listing_iterator_item.find('ListingDetails')
+    location_details = listing_iterator_item.find('Location')
+    basic_details = listing_iterator_item.find('BasicDetails')
+    rich_details = listing_iterator_item.find('RichDetails')
+
+    output['MlsId'] = listing_details.find('MlsId').text
+    output['MlsName'] = listing_details.find('MlsName').text
+
+    output['DateListed'] = listing_details.find('DateListed').text
+
+    output['StreetAddress'] = location_details.find('StreetAddress').text
+
+    output['Price'] = listing_details.find('Price').text
+    output['Bedrooms'] = basic_details.find('Bedrooms').text
+
+    # this is producing no values
+    # at this point I would talk with stake holders for clarifications on which nodes are important
+    # lets talk about the process for getting more information on issues like this
+    output['Bathrooms'] = basic_details.find('Bathrooms').text
+
+    # this would be code to switch to use FullBathrooms, HalfBathrooms, ThreeQuarterBathrooms nodes
+    # bathrooms = 0
+    # bathrooms += int(0 if basic_details.find('FullBathrooms').text is None else basic_details.find('FullBathrooms').text)
+    # bathrooms += int(0 if basic_details.find('HalfBathrooms').text is None else basic_details.find('HalfBathrooms').text)
+    # bathrooms += int(0 if basic_details.find('ThreeQuarterBathrooms').text is None else basic_details.find('ThreeQuarterBathrooms').text)
+    # output['Bathrooms'] = bathrooms
+    #   Humorous result:
+    #       the listing for 1110 Felbar Avenue has 102 bathrooms
+    #       due to the xml having <HalfBathrooms>99</HalfBathrooms>
+
+    output['Appliances'] = join_sub_nodes(rich_details.find('Appliances'), 'Appliance')
+    output['Rooms '] = join_sub_nodes(rich_details.find('Rooms'), 'Room')
+
+    # truncate to the 200th character
+    description = basic_details.find('Description').text
+    output['Description'] = description[0:200]
+    return output
+
+
+def write_listing_to_csv(listing_order_dict, file_name='output'):
+    """
+    Write the given dict data to the given csv
+    :param listing_order_dict: collections.OrderedDict
+    :param file_name: str
+    :return: void
+    """
+    file_location = file_name+".csv"
+    write_header = True
+    if os.path.isfile(file_location) is True:
+        write_header = False
+
+    with open(file_location, 'ab') as f:
+        writer = csv.writer(f)
+        if write_header:
+            writer.writerow(listing_order_dict.keys())
+        writer.writerow(listing_order_dict.values())
+
+
+def parse_and_save(local_file_location):
+    """
+    Iterate through given local xml file line by line and write to default csv location
+    :param local_file_location: str
+    :return: void
+    """
+    context = eTree.iterparse(local_file_location)
+    for event, element in context:
+        if event == "end" and element.tag == 'Listings':
+            for listing in element:
+                if listing_valid(listing):
+                    listing_fields = get_get_fields(listing)
+                    write_listing_to_csv(listing_fields)
+
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..dabec0c
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,19 @@
+atomicwrites==1.3.0
+attrs==19.3.0
+colorama==0.4.1
+configparser==4.0.2
+contextlib2==0.6.0.post1
+funcsigs==1.0.2
+importlib-metadata==0.23
+mock==3.0.5
+more-itertools==5.0.0
+packaging==19.2
+pathlib2==2.3.5
+pluggy==0.13.1
+py==1.8.0
+pyparsing==2.4.5
+pytest==4.6.6
+scandir==1.10.0
+six==1.13.0
+wcwidth==0.1.7
+zipp==0.6.0
diff --git a/test_parseXmlSaveCsv.py b/test_parseXmlSaveCsv.py
new file mode 100644
index 0000000..047b32f
--- /dev/null
+++ b/test_parseXmlSaveCsv.py
@@ -0,0 +1,123 @@
+import os
+import xml
+import pytest
+import collections
+import parseXmlSaveCsv
+from mock import Mock, patch
+
+
+@pytest.fixture(scope='function')
+def mock_xml_element():
+    mock_element = Mock(spec=xml.etree.ElementTree.Element)
+    mock_text = Mock()
+    mock_text.text = 'string'
+    mock_element.find.return_value = mock_text
+    mock_element.tag.return_value = 'Listings'
+    return mock_element
+
+
+@patch('parseXmlSaveCsv.urllib2.urlopen')
+def test_download_file_will_pass(mock_urlopen):
+    test_path = os.path.dirname(os.path.realpath(__file__))+'\\test.xml'
+    mock = Mock()
+    mock.read.side_effect = ['mock_contents1']
+    mock_urlopen.return_value = mock
+    result = parseXmlSaveCsv.download_file('http://test.com', 'test.xml')
+    assert type(result) is str
+    assert result == test_path
+    os.remove(result)
+
+
+@patch('parseXmlSaveCsv.urllib2.urlopen')
+def test_download_file_will_fail(mock_urlopen):
+    test_path = os.path.dirname(os.path.realpath(__file__))+'\\test_fail.xml'
+    mock = Mock()
+    mock.read.side_effect = ['mock_contents1']
+    mock_urlopen.return_value = mock
+    result = parseXmlSaveCsv.download_file('http://test.com', 'test.xml')
+    assert result is not test_path
+    os.remove(result)
+
+
+def test_check_description_will_pass():
+    result = parseXmlSaveCsv.check_description('this test will pass', 'pass')
+    assert result
+
+
+def test_check_description_will_fail():
+    result = parseXmlSaveCsv.check_description('this test will pass', 'fail')
+    assert result is False
+
+
+def test_check_valid_year_will_pass():
+    result = parseXmlSaveCsv.check_valid_year('2019-05-15 13:29:45', 2019)
+    assert result
+
+
+def test_check_valid_year_will_fail():
+    result = parseXmlSaveCsv.check_valid_year('2019-05-15 13:29:45', 2018)
+    assert result is False
+
+
+@patch("parseXmlSaveCsv.check_valid_year", return_value=True)
+@patch("parseXmlSaveCsv.check_description", return_value=True)
+def test_listing_valid_will_pass(mock_check_year, mock_check_description, mock_xml_element):
+    result = parseXmlSaveCsv.listing_valid(mock_xml_element)
+    assert mock_check_year.called
+    assert mock_check_description.called
+    assert result
+
+
+@patch("parseXmlSaveCsv.check_valid_year", return_value=False)
+def test_listing_valid_will_fail_check_year(mock_check_year, mock_xml_element):
+    result = parseXmlSaveCsv.listing_valid(mock_xml_element)
+    assert mock_check_year.called
+    assert result is False
+
+
+@patch("parseXmlSaveCsv.check_valid_year", return_value=True)
+@patch("parseXmlSaveCsv.check_description", return_value=False)
+def test_listing_valid_will_fail_check_description(mock_check_year, mock_check_description, mock_xml_element):
+    result = parseXmlSaveCsv.listing_valid(mock_xml_element)
+    assert mock_check_year.called
+    assert mock_check_description.called
+    assert result is False
+
+
+def test_join_sub_nodes_will_pass(mock_xml_element):
+    mock_text = Mock()
+    mock_text.text = 'string'
+
+    mock_xml_element.findall.return_value = [mock_text, mock_text]
+
+    result = parseXmlSaveCsv.join_sub_nodes(mock_xml_element, 'test')
+    assert result == 'string,string'
+
+
+def test_join_sub_nodes_will_fail_main_node_is_none():
+    result = parseXmlSaveCsv.join_sub_nodes(None, 'test')
+    assert result == ''
+
+
+def test_join_sub_nodes_will_fail_main_node_find_all_is_none(mock_xml_element):
+    mock_xml_element.findall.return_value = None
+    result = parseXmlSaveCsv.join_sub_nodes(mock_xml_element, 'test')
+    assert result == ''
+
+
+@patch("parseXmlSaveCsv.join_sub_nodes", return_value='string')
+def test_get_fields_will_pass(mock_join_sub_nodes, mock_xml_element):
+    mock_element = Mock(spec=xml.etree.ElementTree.Element)
+    mock_element.find.return_value = mock_xml_element
+    result = parseXmlSaveCsv.get_get_fields(mock_element)
+    assert mock_join_sub_nodes.called
+    assert type(result) is collections.OrderedDict
+
+
+def test_write_listing_to_csv_will_pass():
+    test_path = os.path.dirname(os.path.realpath(__file__)) + '\\test.csv'
+    mock_dict = collections.OrderedDict()
+    mock_dict['header'] = 'value'
+    parseXmlSaveCsv.write_listing_to_csv(mock_dict, 'test')
+    assert os.path.isfile('test.csv')
+    os.remove(test_path)