From 86b4dcf16151aec92c53a567954a31c8def34bda Mon Sep 17 00:00:00 2001
From: George Emanuel <emanuega0@gmail.com>
Date: Sun, 14 Jun 2020 11:44:46 -0400
Subject: [PATCH 01/16] Added option to place image data in Data subdirectory
 and place dataorganization.csv file into the raw data folder.

---
 merlin/core/dataset.py          | 23 ++++++++++++++++-------
 merlin/data/dataorganization.py | 32 ++++++++++++++++++++++----------
 2 files changed, 38 insertions(+), 17 deletions(-)

diff --git a/merlin/core/dataset.py b/merlin/core/dataset.py
index bc120af6..fc6f5a5e 100755
--- a/merlin/core/dataset.py
+++ b/merlin/core/dataset.py
@@ -66,7 +66,7 @@ def __init__(self, dataDirectoryName: str,
             self.rawDataPath)
         if not self.rawDataPortal.is_available():
             print('The raw data is not available at %s'.format(
-                self.rawDataPath))
+            self.rawDataPath))
 
         self.analysisPath = os.sep.join([analysisHome, dataDirectoryName])
         os.makedirs(self.analysisPath, exist_ok=True)
@@ -886,16 +886,25 @@ def __init__(self, dataDirectoryName: str, dataHome: str = None,
 
         if microscopeParametersName is not None:
             self._import_microscope_parameters(microscopeParametersName)
-    
+
+        # try to find the image data in two locations. First in the Data
+        # subdirectory and then in the dataset directory
+        self.imageDataPath = os.sep.join([self.rawDataPath, 'Data'])
+        self.imageDataPortal = dataportal.DataPortal.create_portal(
+            self.imageDataPath)
+        if not self.imageDataPortal.is_available():
+            self.imageDataPath = self.rawDataPath
+            self.imageDataPortal = self.rawDataPortal
+
         self._load_microscope_parameters()
 
     def get_image_file_names(self):
-        return sorted(self.rawDataPortal.list_files(
+        return sorted(self.imageDataPortal.list_files(
             extensionList=['.dax', '.tif', '.tiff']))
 
     def load_image(self, imagePath, frameIndex):
         with imagereader.infer_reader(
-                self.rawDataPortal.open_file(imagePath)) as reader:
+                self.imageDataPortal.open_file(imagePath)) as reader:
             imageIn = reader.load_frame(int(frameIndex))
             if self.transpose:
                 imageIn = np.transpose(imageIn)
@@ -913,7 +922,7 @@ def image_stack_size(self, imagePath):
             a three element list with [width, height, frameCount] or None
                     if the file does not exist
         """
-        with imagereader.infer_reader(self.rawDataPortal.open_file(imagePath)
+        with imagereader.infer_reader(self.imageDataPortal.open_file(imagePath)
                                       ) as reader:
             return reader.film_size()
 
@@ -965,7 +974,7 @@ def get_image_xml_metadata(self, imagePath: str) -> Dict:
             imagePath: the path to the image file (.dax or .tif)
         Returns: the metadata from the associated xml file
         """
-        filePortal = self.rawDataPortal.open_file(
+        filePortal = self.imageDataPortal.open_file(
             imagePath).get_sibling_with_extension('.xml')
         return xmltodict.parse(filePortal.read_as_text())
 
@@ -1005,7 +1014,7 @@ def __init__(self, dataDirectoryName: str, codebookNames: List[str] = None,
                          microscopeParametersName)
 
         self.dataOrganization = dataorganization.DataOrganization(
-                self, dataOrganizationName)
+                self, dataOrganizationName, self.rawDataPortal)
         if codebookNames:
             self.codebooks = [codebook.Codebook(self, name, i)
                               for i, name in enumerate(codebookNames)]
diff --git a/merlin/data/dataorganization.py b/merlin/data/dataorganization.py
index 1fa584d3..b3589059 100755
--- a/merlin/data/dataorganization.py
+++ b/merlin/data/dataorganization.py
@@ -7,6 +7,7 @@
 
 import merlin
 from merlin.core import dataset
+from merlin.util import dataportal
 
 
 def _parse_list(inputString: str, dtype=float):
@@ -31,15 +32,19 @@ class DataOrganization(object):
     image files.
     """
 
-    def __init__(self, dataSet, filePath: str = None):
+    def __init__(self, dataSet, filePath: str = None,
+                 dataPortal: dataportal.DataPortal = None):
         """
         Create a new DataOrganization for the data in the specified data set.
 
-        If filePath is not specified, a previously stored DataOrganization
-        is loaded from the dataSet if it exists. If filePath is specified,
-        the DataOrganization at the specified filePath is loaded and
-        stored in the dataSet, overwriting any previously stored
-        DataOrganization.
+        The DataOrganization is located in the following search order:
+        i) If filePath is specified and filePath exists this file is copied
+        into the data set analysis directory and used as the datorganization
+        ii) If dataPortal is specified and contains a file named
+        "dataorganization.csv", this file will be copied into the
+        data set analysis directory and used as the data organization.
+        iii) If neither filePath or dataPortal are specified, the previously
+        stored dataorganization is used.
 
         Raises:
             InputDataError: If the set of raw data is incomplete or the
@@ -47,24 +52,31 @@ def __init__(self, dataSet, filePath: str = None):
         """
 
         self._dataSet = dataSet
+        self.data = None
 
         if filePath is not None:
             if not os.path.exists(filePath):
                 filePath = os.sep.join(
                         [merlin.DATA_ORGANIZATION_HOME, filePath])
-
             self.data = pandas.read_csv(
                 filePath,
                 converters={'frame': _parse_int_list, 'zPos': _parse_list})
-            self.data['readoutName'] = self.data['readoutName'].str.strip()
-            self._dataSet.save_dataframe_to_csv(
-                    self.data, 'dataorganization', index=False)
 
+        elif dataPortal is not None:
+            fileList = dataPortal.list_files('.csv')
+            if 'dataorganization.csv' in fileList:
+                self.data = pandas.read_csv(
+                    dataPortal.open_file('dataorganization.csv'),
+                    converters={'frame': _parse_int_list, 'zPos': _parse_list})
         else:
             self.data = self._dataSet.load_dataframe_from_csv(
                 'dataorganization',
                 converters={'frame': _parse_int_list, 'zPos': _parse_list})
 
+        self.data['readoutName'] = self.data['readoutName'].str.strip()
+        self._dataSet.save_dataframe_to_csv(
+            self.data, 'dataorganization', index=False)
+
         stringColumns = ['readoutName', 'channelName', 'imageType',
                          'imageRegExp', 'fiducialImageType', 'fiducialRegExp']
         self.data[stringColumns] = self.data[stringColumns].astype('str')

From 35a4faf71d828b33d3a9cdd9d69afaaed9067d87 Mon Sep 17 00:00:00 2001
From: George Emanuel <emanuega0@gmail.com>
Date: Sun, 14 Jun 2020 11:55:24 -0400
Subject: [PATCH 02/16] Allow report path to be specified and if not specified
 send no report.

---
 merlin/merlin.py | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/merlin/merlin.py b/merlin/merlin.py
index c892baa3..b6b9e224 100755
--- a/merlin/merlin.py
+++ b/merlin/merlin.py
@@ -56,9 +56,10 @@ def build_parser():
                         help='the analysis home directory')
     parser.add_argument('-k', '--snakemake-parameters',
                         help='the name of the snakemake parameters file')
-    parser.add_argument('--no_report',
-                        help='flag indicating that the snakemake stats ' +
-                        'should not be shared to improve MERlin')
+    parser.add_argument('--report-path',
+                        help='The path to send a report of the MERlin run to.' +
+                        'If no report path is specified, no MERlin run ' +
+                        'information is shared.')
 
     return parser
 
@@ -144,7 +145,7 @@ def merlin():
                     snakemakeParameters = json.load(f)
 
             run_with_snakemake(dataSet, snakefilePath, args.core_count,
-                               snakemakeParameters, not args.no_report)
+                               snakemakeParameters, args.report_path)
 
 
 def generate_analysis_tasks_and_snakefile(dataSet: dataset.MERFISHDataSet,
@@ -160,18 +161,18 @@ def generate_analysis_tasks_and_snakefile(dataSet: dataset.MERFISHDataSet,
 
 def run_with_snakemake(
         dataSet: dataset.MERFISHDataSet, snakefilePath: str, coreCount: int,
-        snakemakeParameters: Dict = {}, report: bool = True):
+        snakemakeParameters: Dict = {}, reportPath: str = None):
     print('Running MERlin pipeline through snakemake')
     snakemake.snakemake(snakefilePath, cores=coreCount,
                         workdir=dataSet.get_snakemake_path(),
                         stats=snakefilePath + '.stats', lock=False,
                         **snakemakeParameters)
 
-    if report:
+    if reportPath:
         reportTime = int(time.time())
         try:
             with open(snakefilePath + '.stats', 'r') as f:
-                requests.post('http://merlin.georgeemanuel.com/post',
+                requests.post(reportPath,
                               files={
                                   'file': (
                                       '.'.join(
@@ -200,7 +201,7 @@ def run_with_snakemake(
             'analysis_parameters': analysisParameters
         }
         try:
-            requests.post('http://merlin.georgeemanuel.com/post',
+            requests.post(reportPath,
                           files={'file': ('.'.join(
                               [dataSet.dataSetName,
                                str(reportTime)])

From 5fce49e69591c5be4b2f361b139b2401991ebdcf Mon Sep 17 00:00:00 2001
From: George Emanuel <emanuega0@gmail.com>
Date: Sun, 14 Jun 2020 12:08:13 -0400
Subject: [PATCH 03/16] Fixed data organization search.

---
 merlin/data/dataorganization.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/merlin/data/dataorganization.py b/merlin/data/dataorganization.py
index b3589059..1a944875 100755
--- a/merlin/data/dataorganization.py
+++ b/merlin/data/dataorganization.py
@@ -62,20 +62,20 @@ def __init__(self, dataSet, filePath: str = None,
                 filePath,
                 converters={'frame': _parse_int_list, 'zPos': _parse_list})
 
-        elif dataPortal is not None:
-            fileList = dataPortal.list_files('.csv')
-            if 'dataorganization.csv' in fileList:
-                self.data = pandas.read_csv(
-                    dataPortal.open_file('dataorganization.csv'),
-                    converters={'frame': _parse_int_list, 'zPos': _parse_list})
+        elif dataPortal is not None and \
+                'dataorganization.csv' in dataPortal.list_files('csv'):
+            self.data = pandas.read_csv(
+                dataPortal.open_file('dataorganization.csv'),
+                converters={'frame': _parse_int_list, 'zPos': _parse_list})
         else:
             self.data = self._dataSet.load_dataframe_from_csv(
                 'dataorganization',
                 converters={'frame': _parse_int_list, 'zPos': _parse_list})
 
-        self.data['readoutName'] = self.data['readoutName'].str.strip()
-        self._dataSet.save_dataframe_to_csv(
-            self.data, 'dataorganization', index=False)
+        if self.data is not None:
+            self.data['readoutName'] = self.data['readoutName'].str.strip()
+            self._dataSet.save_dataframe_to_csv(
+                self.data, 'dataorganization', index=False)
 
         stringColumns = ['readoutName', 'channelName', 'imageType',
                          'imageRegExp', 'fiducialImageType', 'fiducialRegExp']

From 0f3a617bf55c1c108747f4aeb437f14f25d974c6 Mon Sep 17 00:00:00 2001
From: George Emanuel <emanuega0@gmail.com>
Date: Sun, 14 Jun 2020 13:18:27 -0400
Subject: [PATCH 04/16] Updated default snakemake parameters to wait long
 enough for a file to appear.

---
 merlin/data/dataorganization.py | 25 +++++++++++++++----------
 merlin/merlin.py                |  6 ++++++
 2 files changed, 21 insertions(+), 10 deletions(-)

diff --git a/merlin/data/dataorganization.py b/merlin/data/dataorganization.py
index 1a944875..e18dc7c0 100755
--- a/merlin/data/dataorganization.py
+++ b/merlin/data/dataorganization.py
@@ -62,20 +62,25 @@ def __init__(self, dataSet, filePath: str = None,
                 filePath,
                 converters={'frame': _parse_int_list, 'zPos': _parse_list})
 
-        elif dataPortal is not None and \
-                'dataorganization.csv' in dataPortal.list_files('csv'):
-            self.data = pandas.read_csv(
-                dataPortal.open_file('dataorganization.csv'),
-                converters={'frame': _parse_int_list, 'zPos': _parse_list})
-        else:
+        if self.data is None and dataPortal is not None:
+            try:
+                self.data = pandas.read_csv(
+                    dataPortal.open_file('dataorganization.csv'),
+                    converters={'frame': _parse_int_list, 'zPos': _parse_list})
+            # this could be many different exceptions so for now it can remain
+            # broad. If data can't be loaded from the data portal we load it from
+            # the dataset before
+            except Exception:
+                pass
+
+        if self.data is None:
             self.data = self._dataSet.load_dataframe_from_csv(
                 'dataorganization',
                 converters={'frame': _parse_int_list, 'zPos': _parse_list})
 
-        if self.data is not None:
-            self.data['readoutName'] = self.data['readoutName'].str.strip()
-            self._dataSet.save_dataframe_to_csv(
-                self.data, 'dataorganization', index=False)
+        self.data['readoutName'] = self.data['readoutName'].str.strip()
+        self._dataSet.save_dataframe_to_csv(
+            self.data, 'dataorganization', index=False)
 
         stringColumns = ['readoutName', 'channelName', 'imageType',
                          'imageRegExp', 'fiducialImageType', 'fiducialRegExp']
diff --git a/merlin/merlin.py b/merlin/merlin.py
index b6b9e224..80b09ca0 100755
--- a/merlin/merlin.py
+++ b/merlin/merlin.py
@@ -163,6 +163,12 @@ def run_with_snakemake(
         dataSet: dataset.MERFISHDataSet, snakefilePath: str, coreCount: int,
         snakemakeParameters: Dict = {}, reportPath: str = None):
     print('Running MERlin pipeline through snakemake')
+
+    if 'restart_times' not in snakemakeParameters:
+        snakemakeParameters['restart_times'] = 3
+    if 'latency_wait' not in snakemakeParameters:
+        snakemakeParameters['latency_wait'] = 60
+
     snakemake.snakemake(snakefilePath, cores=coreCount,
                         workdir=dataSet.get_snakemake_path(),
                         stats=snakefilePath + '.stats', lock=False,

From a11a763f5d8a711471ff1bc55c5b0142cabe6192 Mon Sep 17 00:00:00 2001
From: George Emanuel <emanuega0@gmail.com>
Date: Sun, 14 Jun 2020 13:30:43 -0400
Subject: [PATCH 05/16] Fixed search for local dataorganization

---
 merlin/util/dataportal.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/merlin/util/dataportal.py b/merlin/util/dataportal.py
index 3c8d2bb6..2c929f74 100755
--- a/merlin/util/dataportal.py
+++ b/merlin/util/dataportal.py
@@ -256,6 +256,8 @@ class LocalFilePortal(FilePortal):
 
     def __init__(self, fileName: str):
         super().__init__(fileName)
+        if not os.path.exists(fileName):
+            raise FileNotFoundError
         self._fileHandle = open(fileName, 'rb')
 
     def get_sibling_with_extension(self, newExtension: str):

From c526d12e12dcbb0b1bd662199f02e1fb6252c005 Mon Sep 17 00:00:00 2001
From: George Emanuel <emanuega0@gmail.com>
Date: Sun, 14 Jun 2020 13:40:34 -0400
Subject: [PATCH 06/16] Pep8

---
 merlin/core/dataset.py          | 2 +-
 merlin/data/dataorganization.py | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/merlin/core/dataset.py b/merlin/core/dataset.py
index fc6f5a5e..bd11451f 100755
--- a/merlin/core/dataset.py
+++ b/merlin/core/dataset.py
@@ -66,7 +66,7 @@ def __init__(self, dataDirectoryName: str,
             self.rawDataPath)
         if not self.rawDataPortal.is_available():
             print('The raw data is not available at %s'.format(
-            self.rawDataPath))
+                self.rawDataPath))
 
         self.analysisPath = os.sep.join([analysisHome, dataDirectoryName])
         os.makedirs(self.analysisPath, exist_ok=True)
diff --git a/merlin/data/dataorganization.py b/merlin/data/dataorganization.py
index e18dc7c0..2d9fbe50 100755
--- a/merlin/data/dataorganization.py
+++ b/merlin/data/dataorganization.py
@@ -68,8 +68,8 @@ def __init__(self, dataSet, filePath: str = None,
                     dataPortal.open_file('dataorganization.csv'),
                     converters={'frame': _parse_int_list, 'zPos': _parse_list})
             # this could be many different exceptions so for now it can remain
-            # broad. If data can't be loaded from the data portal we load it from
-            # the dataset before
+            # broad. If data can't be loaded from the data portal we load it
+            # from the dataset before
             except Exception:
                 pass
 

From d37b4c9826e1b2bc22712cfd0e83826d3f0a3fa1 Mon Sep 17 00:00:00 2001
From: George Emanuel <emanuega0@gmail.com>
Date: Sun, 14 Jun 2020 13:49:28 -0400
Subject: [PATCH 07/16] Moved fov and z index selection to run_analysis to
 avoid incompatible parameter sets when these are randomly selected and save
 into the parameters since these saved parameters will no longer match the
 specified parameters.

---
 merlin/analysis/optimize.py | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/merlin/analysis/optimize.py b/merlin/analysis/optimize.py
index f182ab2b..20a37c99 100755
--- a/merlin/analysis/optimize.py
+++ b/merlin/analysis/optimize.py
@@ -39,15 +39,9 @@ def __init__(self, dataSet, parameters=None, analysisName=None):
 
             self.parameters['fov_per_iteration'] = \
                 len(self.parameters['fov_index'])
-
         else:
-            self.parameters['fov_index'] = []
-            for i in range(self.parameters['fov_per_iteration']):
-                fovIndex = int(np.random.choice(
-                    list(self.dataSet.get_fovs())))
-                zIndex = int(np.random.choice(
-                    list(range(len(self.dataSet.get_z_positions())))))
-                self.parameters['fov_index'].append([fovIndex, zIndex])
+            self.parameters['fov_index'] = None
+
 
     def get_estimated_memory(self):
         return 4000
@@ -71,11 +65,21 @@ def get_codebook(self) -> Codebook:
         return preprocessTask.get_codebook()
 
     def _run_analysis(self, fragmentIndex):
+        logger = self.dataSet.get_logger(self)
+
         preprocessTask = self.dataSet.load_analysis_task(
                 self.parameters['preprocess_task'])
         codebook = self.get_codebook()
 
-        fovIndex, zIndex = self.parameters['fov_index'][fragmentIndex]
+        if self.parameters['fov_index'] is not None:
+            fovIndex, zIndex = self.parameters['fov_index'][fragmentIndex]
+        else:
+            fovIndex = int(np.random.choice(
+                list(self.dataSet.get_fovs())))
+            zIndex = int(np.random.choice(
+                list(range(len(self.dataSet.get_z_positions())))))
+            logger.info('Selected fov %i and z index %i for replicate %i'
+                        % (fovIndex, zIndex, fragmentIndex))
 
         scaleFactors = self._get_previous_scale_factors()
         backgrounds = self._get_previous_backgrounds()

From 6c9e58043b2acb4b9bbf67bc5e2d35fc8c56bc9c Mon Sep 17 00:00:00 2001
From: George Emanuel <emanuega0@gmail.com>
Date: Sun, 14 Jun 2020 13:57:06 -0400
Subject: [PATCH 08/16] Fixed cased when fov_index is None.

---
 merlin/analysis/optimize.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/merlin/analysis/optimize.py b/merlin/analysis/optimize.py
index 20a37c99..875639fc 100755
--- a/merlin/analysis/optimize.py
+++ b/merlin/analysis/optimize.py
@@ -33,7 +33,8 @@ def __init__(self, dataSet, parameters=None, analysisName=None):
         if 'crop_width' not in self.parameters:
             self.parameters['crop_width'] = 0
 
-        if 'fov_index' in self.parameters:
+        if 'fov_index' in self.parameters \
+                and self.parameters['fov_index'] is not None:
             logger = self.dataSet.get_logger(self)
             logger.info('Setting fov_per_iteration to length of fov_index')
 

From b983606444db400e33d16bb6b7990cdc046fc5e6 Mon Sep 17 00:00:00 2001
From: George Emanuel <root@ip-10-82-0-147.ec2.internal>
Date: Sun, 14 Jun 2020 18:39:35 +0000
Subject: [PATCH 09/16] Fixed bugs for identifying and using data folder.

---
 merlin/data/dataorganization.py | 12 ++++++------
 merlin/util/dataportal.py       |  4 ++--
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/merlin/data/dataorganization.py b/merlin/data/dataorganization.py
index 2d9fbe50..cf9d3237 100755
--- a/merlin/data/dataorganization.py
+++ b/merlin/data/dataorganization.py
@@ -4,6 +4,7 @@
 from typing import Tuple
 import pandas
 import numpy as np
+from io import StringIO
 
 import merlin
 from merlin.core import dataset
@@ -64,8 +65,8 @@ def __init__(self, dataSet, filePath: str = None,
 
         if self.data is None and dataPortal is not None:
             try:
-                self.data = pandas.read_csv(
-                    dataPortal.open_file('dataorganization.csv'),
+                self.data = pandas.read_csv(StringIO(
+                    dataPortal.open_file('dataorganization.csv').read_as_text()),
                     converters={'frame': _parse_int_list, 'zPos': _parse_list})
             # this could be many different exceptions so for now it can remain
             # broad. If data can't be loaded from the data portal we load it
@@ -273,8 +274,7 @@ def _get_image_path(
                                  (self.fileMap['fov'] == fov) &
                                  (self.fileMap['imagingRound'] == imagingRound)]
         filemapPath = selection['imagePath'].values[0]
-        return os.path.join(self._dataSet.dataHome, self._dataSet.dataSetName,
-                            filemapPath)
+        return os.path.join(self._dataSet.imageDataPath, filemapPath)
 
     def _truncate_file_path(self, path) -> None:
         head, tail = os.path.split(path)
@@ -300,7 +300,7 @@ def _map_image_files(self) -> None:
             fileNames = self._dataSet.get_image_file_names()
             if len(fileNames) == 0:
                 raise dataset.DataFormatException(
-                    'No image files found at %s.' % self._dataSet.rawDataPath)
+                    'No image files found at %s.' % self._dataSet.imageDataPath)
             fileData = []
             for currentType, currentIndex in zip(uniqueTypes, uniqueIndexes):
                 matchRE = re.compile(
@@ -360,7 +360,7 @@ def _validate_file_map(self) -> None:
                         (channelInfo['imageType'], fov,
                          channelInfo['imagingRound']))
 
-                if not self._dataSet.rawDataPortal.open_file(
+                if not self._dataSet.imageDataPortal.open_file(
                         imagePath).exists():
                     raise InputDataError(
                         ('Image data for channel {0} and fov {1} not found. '
diff --git a/merlin/util/dataportal.py b/merlin/util/dataportal.py
index 2c929f74..a72e7521 100755
--- a/merlin/util/dataportal.py
+++ b/merlin/util/dataportal.py
@@ -116,8 +116,8 @@ def __init__(self, basePath: str, **kwargs):
         self._s3 = boto3.resource('s3', **kwargs)
 
     def is_available(self):
-        objects = list(self._s3.Bucket(self._bucketName).objects.limit(10)
-                       .filter(Prefix=self._prefix))
+        objects = list(self._s3.Bucket(self._bucketName).objects
+                       .filter(Prefix=self._prefix).limit(10))
         return len(objects) > 0
 
     def open_file(self, fileName):

From 96a0382a3029caceb01f38b1201d68ff22f44805 Mon Sep 17 00:00:00 2001
From: George Emanuel <emanuega0@gmail.com>
Date: Sun, 14 Jun 2020 19:16:01 -0400
Subject: [PATCH 10/16] Updated changelog.

---
 CHANGELOG.md           | 3 +++
 merlin/core/dataset.py | 9 +++++++--
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 7d3d05c3..c658e3f7 100755
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -45,3 +45,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ### Added
 - An alternative Lucy-Richardson deconvolution approach that requires ~10x fewer iterations.
 
+## [0.1.7] - 
+### Added
+- Added option to put image data into a folder named "Data" and to save the data organization with the raw data in the root directory named dataorganization.csv 
diff --git a/merlin/core/dataset.py b/merlin/core/dataset.py
index bd11451f..5110b8fb 100755
--- a/merlin/core/dataset.py
+++ b/merlin/core/dataset.py
@@ -893,8 +893,13 @@ def __init__(self, dataDirectoryName: str, dataHome: str = None,
         self.imageDataPortal = dataportal.DataPortal.create_portal(
             self.imageDataPath)
         if not self.imageDataPortal.is_available():
-            self.imageDataPath = self.rawDataPath
-            self.imageDataPortal = self.rawDataPortal
+            # allow "data" to be used instead of "Data"
+            self.imageDataPath = os.sep.join([self.rawDataPath, 'data'])
+            self.imageDataPortal = dataportal.DataPortal.create_portal(
+                self.imageDataPath)
+            if not self.imageDataPortal.is_available():
+                self.imageDataPath = self.rawDataPath
+                self.imageDataPortal = self.rawDataPortal
 
         self._load_microscope_parameters()
 

From 65e51ba19474ad97ae12f969cac3dbff0a5ce82f Mon Sep 17 00:00:00 2001
From: George Emanuel <emanuega0@gmail.com>
Date: Sun, 14 Jun 2020 19:19:40 -0400
Subject: [PATCH 11/16] Pep8

---
 merlin/data/dataorganization.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/merlin/data/dataorganization.py b/merlin/data/dataorganization.py
index cf9d3237..cb54c149 100755
--- a/merlin/data/dataorganization.py
+++ b/merlin/data/dataorganization.py
@@ -65,8 +65,8 @@ def __init__(self, dataSet, filePath: str = None,
 
         if self.data is None and dataPortal is not None:
             try:
-                self.data = pandas.read_csv(StringIO(
-                    dataPortal.open_file('dataorganization.csv').read_as_text()),
+                self.data = pandas.read_csv(StringIO(dataPortal.open_file(
+                    'dataorganization.csv').read_as_text()),
                     converters={'frame': _parse_int_list, 'zPos': _parse_list})
             # this could be many different exceptions so for now it can remain
             # broad. If data can't be loaded from the data portal we load it

From 07ffe6b33d9d444d5d86830c794a8708d13dc1d4 Mon Sep 17 00:00:00 2001
From: George Emanuel <emanuega0@gmail.com>
Date: Sat, 11 Jul 2020 12:44:43 +0000
Subject: [PATCH 12/16] Added big tiff option.

---
 merlin/analysis/generatemosaic.py | 3 ++-
 merlin/core/dataset.py            | 5 +++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/merlin/analysis/generatemosaic.py b/merlin/analysis/generatemosaic.py
index 430ea50b..ef17cef3 100755
--- a/merlin/analysis/generatemosaic.py
+++ b/merlin/analysis/generatemosaic.py
@@ -128,7 +128,8 @@ def _run_analysis(self):
                 for z in zIndexes:
                     with self.dataSet.writer_for_analysis_images(
                         self, 'mosaic_%s_%i'
-                              % (dataOrganization.get_data_channel_name(d), z))\
+                              % (dataOrganization.get_data_channel_name(d), z),
+                              bigTiff = True, imagej = False)\
                             as outputTif:
                         mosaic = self._prepare_mosaic_slice(
                             z, d, micronExtents, alignTask, maximumProjection)
diff --git a/merlin/core/dataset.py b/merlin/core/dataset.py
index 5110b8fb..0ec65e8b 100755
--- a/merlin/core/dataset.py
+++ b/merlin/core/dataset.py
@@ -204,7 +204,7 @@ def get_analysis_image(
 
     def writer_for_analysis_images(
             self, analysisTask: TaskOrName, imageBaseName: str,
-            imageIndex: int = None, imagej: bool = True) -> tifffile.TiffWriter:
+            imageIndex: int = None, bigTiff = False, imagej: bool = True) -> tifffile.TiffWriter:
         """Get a writer for writing tiff files from an analysis task.
 
         Args:
@@ -216,7 +216,8 @@ def writer_for_analysis_images(
 
         """
         return tifffile.TiffWriter(self._analysis_image_name(
-            analysisTask, imageBaseName, imageIndex), imagej=imagej)
+            analysisTask, imageBaseName, imageIndex), bigtiff=bigTiff, 
+            imagej=imagej)
 
     @staticmethod
     def analysis_tiff_description(sliceCount: int, frameCount: int) -> Dict:

From acc56bac24d02f78b5ff865987918336488cf98a Mon Sep 17 00:00:00 2001
From: George Emanuel <emanuega0@gmail.com>
Date: Tue, 22 Sep 2020 13:53:01 -0400
Subject: [PATCH 13/16] Fixed problem caused by fileHandle not being set when
 path doesn't exist

---
 merlin/util/dataportal.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/merlin/util/dataportal.py b/merlin/util/dataportal.py
index a72e7521..d7461122 100755
--- a/merlin/util/dataportal.py
+++ b/merlin/util/dataportal.py
@@ -256,6 +256,7 @@ class LocalFilePortal(FilePortal):
 
     def __init__(self, fileName: str):
         super().__init__(fileName)
+        self._fileHandle = None
         if not os.path.exists(fileName):
             raise FileNotFoundError
         self._fileHandle = open(fileName, 'rb')
@@ -275,7 +276,8 @@ def read_file_bytes(self, startByte, endByte):
         return self._fileHandle.read(endByte-startByte)
 
     def close(self) -> None:
-        self._fileHandle.close()
+        if self._fileHandle is not None:
+            self._fileHandle.close()
 
 
 class S3FilePortal(FilePortal):

From e83ceb2ccd285ef55c512aea3fdd80e298cdde8b Mon Sep 17 00:00:00 2001
From: George Emanuel <emanuega0@gmail.com>
Date: Tue, 22 Sep 2020 13:59:23 -0400
Subject: [PATCH 14/16] Added matplotlib to conda install.

---
 .circleci/config.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index 366579ab..e7aaabae 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -22,6 +22,7 @@ jobs:
             conda config --set quiet true
             conda create -n merlin_env python=3.6
             source activate merlin_env
+            conda install matplotlib
             conda install rtree
             conda install pytables
             cd ~

From 9b5f66e66212637b8b96ee26a406fd7619f6aa7b Mon Sep 17 00:00:00 2001
From: George Emanuel <emanuega0@gmail.com>
Date: Fri, 20 Nov 2020 21:55:09 +0000
Subject: [PATCH 15/16] Prevent read only directories from creating a problem.

---
 merlin/core/dataset.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/merlin/core/dataset.py b/merlin/core/dataset.py
index 0ec65e8b..dcbebd03 100755
--- a/merlin/core/dataset.py
+++ b/merlin/core/dataset.py
@@ -72,7 +72,10 @@ def __init__(self, dataDirectoryName: str,
         os.makedirs(self.analysisPath, exist_ok=True)
 
         self.logPath = os.sep.join([self.analysisPath, 'logs'])
-        os.makedirs(self.logPath, exist_ok=True)
+        try: 
+            os.makedirs(self.logPath, exist_ok=True)
+        except PermissionError as e:
+            print("Unable to create logging directory")
 
         self._store_dataset_metadata()
 

From 595136c1a0b9c79c5dd0d22927bf025cf842bfbf Mon Sep 17 00:00:00 2001
From: George Emanuel <emanuega0@gmail.com>
Date: Tue, 24 Nov 2020 02:47:39 +0000
Subject: [PATCH 16/16] Updated data organization to work when working on read
 only storage.

---
 merlin/data/dataorganization.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/merlin/data/dataorganization.py b/merlin/data/dataorganization.py
index cb54c149..61d28c93 100755
--- a/merlin/data/dataorganization.py
+++ b/merlin/data/dataorganization.py
@@ -80,8 +80,11 @@ def __init__(self, dataSet, filePath: str = None,
                 converters={'frame': _parse_int_list, 'zPos': _parse_list})
 
         self.data['readoutName'] = self.data['readoutName'].str.strip()
-        self._dataSet.save_dataframe_to_csv(
-            self.data, 'dataorganization', index=False)
+        try:
+            self._dataSet.save_dataframe_to_csv(
+                self.data, 'dataorganization', index=False)
+        except PermissionError as e:
+            print('Unable to save data organization.')
 
         stringColumns = ['readoutName', 'channelName', 'imageType',
                          'imageRegExp', 'fiducialImageType', 'fiducialRegExp']