From 86b4dcf16151aec92c53a567954a31c8def34bda Mon Sep 17 00:00:00 2001 From: George Emanuel Date: Sun, 14 Jun 2020 11:44:46 -0400 Subject: [PATCH 01/16] Added option to place image data in Data subdirectory and place dataorganization.csv file into the raw data folder. --- merlin/core/dataset.py | 23 ++++++++++++++++------- merlin/data/dataorganization.py | 32 ++++++++++++++++++++++---------- 2 files changed, 38 insertions(+), 17 deletions(-) diff --git a/merlin/core/dataset.py b/merlin/core/dataset.py index bc120af6..fc6f5a5e 100755 --- a/merlin/core/dataset.py +++ b/merlin/core/dataset.py @@ -66,7 +66,7 @@ def __init__(self, dataDirectoryName: str, self.rawDataPath) if not self.rawDataPortal.is_available(): print('The raw data is not available at %s'.format( - self.rawDataPath)) + self.rawDataPath)) self.analysisPath = os.sep.join([analysisHome, dataDirectoryName]) os.makedirs(self.analysisPath, exist_ok=True) @@ -886,16 +886,25 @@ def __init__(self, dataDirectoryName: str, dataHome: str = None, if microscopeParametersName is not None: self._import_microscope_parameters(microscopeParametersName) - + + # try to find the image data in two locations. First in the Data + # subdirectory and then in the dataset directory + self.imageDataPath = os.sep.join([self.rawDataPath, 'Data']) + self.imageDataPortal = dataportal.DataPortal.create_portal( + self.imageDataPath) + if not self.imageDataPortal.is_available(): + self.imageDataPath = self.rawDataPath + self.imageDataPortal = self.rawDataPortal + self._load_microscope_parameters() def get_image_file_names(self): - return sorted(self.rawDataPortal.list_files( + return sorted(self.imageDataPortal.list_files( extensionList=['.dax', '.tif', '.tiff'])) def load_image(self, imagePath, frameIndex): with imagereader.infer_reader( - self.rawDataPortal.open_file(imagePath)) as reader: + self.imageDataPortal.open_file(imagePath)) as reader: imageIn = reader.load_frame(int(frameIndex)) if self.transpose: imageIn = np.transpose(imageIn) @@ -913,7 +922,7 @@ def image_stack_size(self, imagePath): a three element list with [width, height, frameCount] or None if the file does not exist """ - with imagereader.infer_reader(self.rawDataPortal.open_file(imagePath) + with imagereader.infer_reader(self.imageDataPortal.open_file(imagePath) ) as reader: return reader.film_size() @@ -965,7 +974,7 @@ def get_image_xml_metadata(self, imagePath: str) -> Dict: imagePath: the path to the image file (.dax or .tif) Returns: the metadata from the associated xml file """ - filePortal = self.rawDataPortal.open_file( + filePortal = self.imageDataPortal.open_file( imagePath).get_sibling_with_extension('.xml') return xmltodict.parse(filePortal.read_as_text()) @@ -1005,7 +1014,7 @@ def __init__(self, dataDirectoryName: str, codebookNames: List[str] = None, microscopeParametersName) self.dataOrganization = dataorganization.DataOrganization( - self, dataOrganizationName) + self, dataOrganizationName, self.rawDataPortal) if codebookNames: self.codebooks = [codebook.Codebook(self, name, i) for i, name in enumerate(codebookNames)] diff --git a/merlin/data/dataorganization.py b/merlin/data/dataorganization.py index 1fa584d3..b3589059 100755 --- a/merlin/data/dataorganization.py +++ b/merlin/data/dataorganization.py @@ -7,6 +7,7 @@ import merlin from merlin.core import dataset +from merlin.util import dataportal def _parse_list(inputString: str, dtype=float): @@ -31,15 +32,19 @@ class DataOrganization(object): image files. """ - def __init__(self, dataSet, filePath: str = None): + def __init__(self, dataSet, filePath: str = None, + dataPortal: dataportal.DataPortal = None): """ Create a new DataOrganization for the data in the specified data set. - If filePath is not specified, a previously stored DataOrganization - is loaded from the dataSet if it exists. If filePath is specified, - the DataOrganization at the specified filePath is loaded and - stored in the dataSet, overwriting any previously stored - DataOrganization. + The DataOrganization is located in the following search order: + i) If filePath is specified and filePath exists this file is copied + into the data set analysis directory and used as the datorganization + ii) If dataPortal is specified and contains a file named + "dataorganization.csv", this file will be copied into the + data set analysis directory and used as the data organization. + iii) If neither filePath or dataPortal are specified, the previously + stored dataorganization is used. Raises: InputDataError: If the set of raw data is incomplete or the @@ -47,24 +52,31 @@ def __init__(self, dataSet, filePath: str = None): """ self._dataSet = dataSet + self.data = None if filePath is not None: if not os.path.exists(filePath): filePath = os.sep.join( [merlin.DATA_ORGANIZATION_HOME, filePath]) - self.data = pandas.read_csv( filePath, converters={'frame': _parse_int_list, 'zPos': _parse_list}) - self.data['readoutName'] = self.data['readoutName'].str.strip() - self._dataSet.save_dataframe_to_csv( - self.data, 'dataorganization', index=False) + elif dataPortal is not None: + fileList = dataPortal.list_files('.csv') + if 'dataorganization.csv' in fileList: + self.data = pandas.read_csv( + dataPortal.open_file('dataorganization.csv'), + converters={'frame': _parse_int_list, 'zPos': _parse_list}) else: self.data = self._dataSet.load_dataframe_from_csv( 'dataorganization', converters={'frame': _parse_int_list, 'zPos': _parse_list}) + self.data['readoutName'] = self.data['readoutName'].str.strip() + self._dataSet.save_dataframe_to_csv( + self.data, 'dataorganization', index=False) + stringColumns = ['readoutName', 'channelName', 'imageType', 'imageRegExp', 'fiducialImageType', 'fiducialRegExp'] self.data[stringColumns] = self.data[stringColumns].astype('str') From 35a4faf71d828b33d3a9cdd9d69afaaed9067d87 Mon Sep 17 00:00:00 2001 From: George Emanuel Date: Sun, 14 Jun 2020 11:55:24 -0400 Subject: [PATCH 02/16] Allow report path to be specified and if not specified send no report. --- merlin/merlin.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/merlin/merlin.py b/merlin/merlin.py index c892baa3..b6b9e224 100755 --- a/merlin/merlin.py +++ b/merlin/merlin.py @@ -56,9 +56,10 @@ def build_parser(): help='the analysis home directory') parser.add_argument('-k', '--snakemake-parameters', help='the name of the snakemake parameters file') - parser.add_argument('--no_report', - help='flag indicating that the snakemake stats ' + - 'should not be shared to improve MERlin') + parser.add_argument('--report-path', + help='The path to send a report of the MERlin run to.' + + 'If no report path is specified, no MERlin run ' + + 'information is shared.') return parser @@ -144,7 +145,7 @@ def merlin(): snakemakeParameters = json.load(f) run_with_snakemake(dataSet, snakefilePath, args.core_count, - snakemakeParameters, not args.no_report) + snakemakeParameters, args.report_path) def generate_analysis_tasks_and_snakefile(dataSet: dataset.MERFISHDataSet, @@ -160,18 +161,18 @@ def generate_analysis_tasks_and_snakefile(dataSet: dataset.MERFISHDataSet, def run_with_snakemake( dataSet: dataset.MERFISHDataSet, snakefilePath: str, coreCount: int, - snakemakeParameters: Dict = {}, report: bool = True): + snakemakeParameters: Dict = {}, reportPath: str = None): print('Running MERlin pipeline through snakemake') snakemake.snakemake(snakefilePath, cores=coreCount, workdir=dataSet.get_snakemake_path(), stats=snakefilePath + '.stats', lock=False, **snakemakeParameters) - if report: + if reportPath: reportTime = int(time.time()) try: with open(snakefilePath + '.stats', 'r') as f: - requests.post('http://merlin.georgeemanuel.com/post', + requests.post(reportPath, files={ 'file': ( '.'.join( @@ -200,7 +201,7 @@ def run_with_snakemake( 'analysis_parameters': analysisParameters } try: - requests.post('http://merlin.georgeemanuel.com/post', + requests.post(reportPath, files={'file': ('.'.join( [dataSet.dataSetName, str(reportTime)]) From 5fce49e69591c5be4b2f361b139b2401991ebdcf Mon Sep 17 00:00:00 2001 From: George Emanuel Date: Sun, 14 Jun 2020 12:08:13 -0400 Subject: [PATCH 03/16] Fixed data organization search. --- merlin/data/dataorganization.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/merlin/data/dataorganization.py b/merlin/data/dataorganization.py index b3589059..1a944875 100755 --- a/merlin/data/dataorganization.py +++ b/merlin/data/dataorganization.py @@ -62,20 +62,20 @@ def __init__(self, dataSet, filePath: str = None, filePath, converters={'frame': _parse_int_list, 'zPos': _parse_list}) - elif dataPortal is not None: - fileList = dataPortal.list_files('.csv') - if 'dataorganization.csv' in fileList: - self.data = pandas.read_csv( - dataPortal.open_file('dataorganization.csv'), - converters={'frame': _parse_int_list, 'zPos': _parse_list}) + elif dataPortal is not None and \ + 'dataorganization.csv' in dataPortal.list_files('csv'): + self.data = pandas.read_csv( + dataPortal.open_file('dataorganization.csv'), + converters={'frame': _parse_int_list, 'zPos': _parse_list}) else: self.data = self._dataSet.load_dataframe_from_csv( 'dataorganization', converters={'frame': _parse_int_list, 'zPos': _parse_list}) - self.data['readoutName'] = self.data['readoutName'].str.strip() - self._dataSet.save_dataframe_to_csv( - self.data, 'dataorganization', index=False) + if self.data is not None: + self.data['readoutName'] = self.data['readoutName'].str.strip() + self._dataSet.save_dataframe_to_csv( + self.data, 'dataorganization', index=False) stringColumns = ['readoutName', 'channelName', 'imageType', 'imageRegExp', 'fiducialImageType', 'fiducialRegExp'] From 0f3a617bf55c1c108747f4aeb437f14f25d974c6 Mon Sep 17 00:00:00 2001 From: George Emanuel Date: Sun, 14 Jun 2020 13:18:27 -0400 Subject: [PATCH 04/16] Updated default snakemake parameters to wait long enough for a file to appear. --- merlin/data/dataorganization.py | 25 +++++++++++++++---------- merlin/merlin.py | 6 ++++++ 2 files changed, 21 insertions(+), 10 deletions(-) diff --git a/merlin/data/dataorganization.py b/merlin/data/dataorganization.py index 1a944875..e18dc7c0 100755 --- a/merlin/data/dataorganization.py +++ b/merlin/data/dataorganization.py @@ -62,20 +62,25 @@ def __init__(self, dataSet, filePath: str = None, filePath, converters={'frame': _parse_int_list, 'zPos': _parse_list}) - elif dataPortal is not None and \ - 'dataorganization.csv' in dataPortal.list_files('csv'): - self.data = pandas.read_csv( - dataPortal.open_file('dataorganization.csv'), - converters={'frame': _parse_int_list, 'zPos': _parse_list}) - else: + if self.data is None and dataPortal is not None: + try: + self.data = pandas.read_csv( + dataPortal.open_file('dataorganization.csv'), + converters={'frame': _parse_int_list, 'zPos': _parse_list}) + # this could be many different exceptions so for now it can remain + # broad. If data can't be loaded from the data portal we load it from + # the dataset before + except Exception: + pass + + if self.data is None: self.data = self._dataSet.load_dataframe_from_csv( 'dataorganization', converters={'frame': _parse_int_list, 'zPos': _parse_list}) - if self.data is not None: - self.data['readoutName'] = self.data['readoutName'].str.strip() - self._dataSet.save_dataframe_to_csv( - self.data, 'dataorganization', index=False) + self.data['readoutName'] = self.data['readoutName'].str.strip() + self._dataSet.save_dataframe_to_csv( + self.data, 'dataorganization', index=False) stringColumns = ['readoutName', 'channelName', 'imageType', 'imageRegExp', 'fiducialImageType', 'fiducialRegExp'] diff --git a/merlin/merlin.py b/merlin/merlin.py index b6b9e224..80b09ca0 100755 --- a/merlin/merlin.py +++ b/merlin/merlin.py @@ -163,6 +163,12 @@ def run_with_snakemake( dataSet: dataset.MERFISHDataSet, snakefilePath: str, coreCount: int, snakemakeParameters: Dict = {}, reportPath: str = None): print('Running MERlin pipeline through snakemake') + + if 'restart_times' not in snakemakeParameters: + snakemakeParameters['restart_times'] = 3 + if 'latency_wait' not in snakemakeParameters: + snakemakeParameters['latency_wait'] = 60 + snakemake.snakemake(snakefilePath, cores=coreCount, workdir=dataSet.get_snakemake_path(), stats=snakefilePath + '.stats', lock=False, From a11a763f5d8a711471ff1bc55c5b0142cabe6192 Mon Sep 17 00:00:00 2001 From: George Emanuel Date: Sun, 14 Jun 2020 13:30:43 -0400 Subject: [PATCH 05/16] Fixed search for local dataorganization --- merlin/util/dataportal.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/merlin/util/dataportal.py b/merlin/util/dataportal.py index 3c8d2bb6..2c929f74 100755 --- a/merlin/util/dataportal.py +++ b/merlin/util/dataportal.py @@ -256,6 +256,8 @@ class LocalFilePortal(FilePortal): def __init__(self, fileName: str): super().__init__(fileName) + if not os.path.exists(fileName): + raise FileNotFoundError self._fileHandle = open(fileName, 'rb') def get_sibling_with_extension(self, newExtension: str): From c526d12e12dcbb0b1bd662199f02e1fb6252c005 Mon Sep 17 00:00:00 2001 From: George Emanuel Date: Sun, 14 Jun 2020 13:40:34 -0400 Subject: [PATCH 06/16] Pep8 --- merlin/core/dataset.py | 2 +- merlin/data/dataorganization.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/merlin/core/dataset.py b/merlin/core/dataset.py index fc6f5a5e..bd11451f 100755 --- a/merlin/core/dataset.py +++ b/merlin/core/dataset.py @@ -66,7 +66,7 @@ def __init__(self, dataDirectoryName: str, self.rawDataPath) if not self.rawDataPortal.is_available(): print('The raw data is not available at %s'.format( - self.rawDataPath)) + self.rawDataPath)) self.analysisPath = os.sep.join([analysisHome, dataDirectoryName]) os.makedirs(self.analysisPath, exist_ok=True) diff --git a/merlin/data/dataorganization.py b/merlin/data/dataorganization.py index e18dc7c0..2d9fbe50 100755 --- a/merlin/data/dataorganization.py +++ b/merlin/data/dataorganization.py @@ -68,8 +68,8 @@ def __init__(self, dataSet, filePath: str = None, dataPortal.open_file('dataorganization.csv'), converters={'frame': _parse_int_list, 'zPos': _parse_list}) # this could be many different exceptions so for now it can remain - # broad. If data can't be loaded from the data portal we load it from - # the dataset before + # broad. If data can't be loaded from the data portal we load it + # from the dataset before except Exception: pass From d37b4c9826e1b2bc22712cfd0e83826d3f0a3fa1 Mon Sep 17 00:00:00 2001 From: George Emanuel Date: Sun, 14 Jun 2020 13:49:28 -0400 Subject: [PATCH 07/16] Moved fov and z index selection to run_analysis to avoid incompatible parameter sets when these are randomly selected and save into the parameters since these saved parameters will no longer match the specified parameters. --- merlin/analysis/optimize.py | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/merlin/analysis/optimize.py b/merlin/analysis/optimize.py index f182ab2b..20a37c99 100755 --- a/merlin/analysis/optimize.py +++ b/merlin/analysis/optimize.py @@ -39,15 +39,9 @@ def __init__(self, dataSet, parameters=None, analysisName=None): self.parameters['fov_per_iteration'] = \ len(self.parameters['fov_index']) - else: - self.parameters['fov_index'] = [] - for i in range(self.parameters['fov_per_iteration']): - fovIndex = int(np.random.choice( - list(self.dataSet.get_fovs()))) - zIndex = int(np.random.choice( - list(range(len(self.dataSet.get_z_positions()))))) - self.parameters['fov_index'].append([fovIndex, zIndex]) + self.parameters['fov_index'] = None + def get_estimated_memory(self): return 4000 @@ -71,11 +65,21 @@ def get_codebook(self) -> Codebook: return preprocessTask.get_codebook() def _run_analysis(self, fragmentIndex): + logger = self.dataSet.get_logger(self) + preprocessTask = self.dataSet.load_analysis_task( self.parameters['preprocess_task']) codebook = self.get_codebook() - fovIndex, zIndex = self.parameters['fov_index'][fragmentIndex] + if self.parameters['fov_index'] is not None: + fovIndex, zIndex = self.parameters['fov_index'][fragmentIndex] + else: + fovIndex = int(np.random.choice( + list(self.dataSet.get_fovs()))) + zIndex = int(np.random.choice( + list(range(len(self.dataSet.get_z_positions()))))) + logger.info('Selected fov %i and z index %i for replicate %i' + % (fovIndex, zIndex, fragmentIndex)) scaleFactors = self._get_previous_scale_factors() backgrounds = self._get_previous_backgrounds() From 6c9e58043b2acb4b9bbf67bc5e2d35fc8c56bc9c Mon Sep 17 00:00:00 2001 From: George Emanuel Date: Sun, 14 Jun 2020 13:57:06 -0400 Subject: [PATCH 08/16] Fixed cased when fov_index is None. --- merlin/analysis/optimize.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/merlin/analysis/optimize.py b/merlin/analysis/optimize.py index 20a37c99..875639fc 100755 --- a/merlin/analysis/optimize.py +++ b/merlin/analysis/optimize.py @@ -33,7 +33,8 @@ def __init__(self, dataSet, parameters=None, analysisName=None): if 'crop_width' not in self.parameters: self.parameters['crop_width'] = 0 - if 'fov_index' in self.parameters: + if 'fov_index' in self.parameters \ + and self.parameters['fov_index'] is not None: logger = self.dataSet.get_logger(self) logger.info('Setting fov_per_iteration to length of fov_index') From b983606444db400e33d16bb6b7990cdc046fc5e6 Mon Sep 17 00:00:00 2001 From: George Emanuel Date: Sun, 14 Jun 2020 18:39:35 +0000 Subject: [PATCH 09/16] Fixed bugs for identifying and using data folder. --- merlin/data/dataorganization.py | 12 ++++++------ merlin/util/dataportal.py | 4 ++-- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/merlin/data/dataorganization.py b/merlin/data/dataorganization.py index 2d9fbe50..cf9d3237 100755 --- a/merlin/data/dataorganization.py +++ b/merlin/data/dataorganization.py @@ -4,6 +4,7 @@ from typing import Tuple import pandas import numpy as np +from io import StringIO import merlin from merlin.core import dataset @@ -64,8 +65,8 @@ def __init__(self, dataSet, filePath: str = None, if self.data is None and dataPortal is not None: try: - self.data = pandas.read_csv( - dataPortal.open_file('dataorganization.csv'), + self.data = pandas.read_csv(StringIO( + dataPortal.open_file('dataorganization.csv').read_as_text()), converters={'frame': _parse_int_list, 'zPos': _parse_list}) # this could be many different exceptions so for now it can remain # broad. If data can't be loaded from the data portal we load it @@ -273,8 +274,7 @@ def _get_image_path( (self.fileMap['fov'] == fov) & (self.fileMap['imagingRound'] == imagingRound)] filemapPath = selection['imagePath'].values[0] - return os.path.join(self._dataSet.dataHome, self._dataSet.dataSetName, - filemapPath) + return os.path.join(self._dataSet.imageDataPath, filemapPath) def _truncate_file_path(self, path) -> None: head, tail = os.path.split(path) @@ -300,7 +300,7 @@ def _map_image_files(self) -> None: fileNames = self._dataSet.get_image_file_names() if len(fileNames) == 0: raise dataset.DataFormatException( - 'No image files found at %s.' % self._dataSet.rawDataPath) + 'No image files found at %s.' % self._dataSet.imageDataPath) fileData = [] for currentType, currentIndex in zip(uniqueTypes, uniqueIndexes): matchRE = re.compile( @@ -360,7 +360,7 @@ def _validate_file_map(self) -> None: (channelInfo['imageType'], fov, channelInfo['imagingRound'])) - if not self._dataSet.rawDataPortal.open_file( + if not self._dataSet.imageDataPortal.open_file( imagePath).exists(): raise InputDataError( ('Image data for channel {0} and fov {1} not found. ' diff --git a/merlin/util/dataportal.py b/merlin/util/dataportal.py index 2c929f74..a72e7521 100755 --- a/merlin/util/dataportal.py +++ b/merlin/util/dataportal.py @@ -116,8 +116,8 @@ def __init__(self, basePath: str, **kwargs): self._s3 = boto3.resource('s3', **kwargs) def is_available(self): - objects = list(self._s3.Bucket(self._bucketName).objects.limit(10) - .filter(Prefix=self._prefix)) + objects = list(self._s3.Bucket(self._bucketName).objects + .filter(Prefix=self._prefix).limit(10)) return len(objects) > 0 def open_file(self, fileName): From 96a0382a3029caceb01f38b1201d68ff22f44805 Mon Sep 17 00:00:00 2001 From: George Emanuel Date: Sun, 14 Jun 2020 19:16:01 -0400 Subject: [PATCH 10/16] Updated changelog. --- CHANGELOG.md | 3 +++ merlin/core/dataset.py | 9 +++++++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7d3d05c3..c658e3f7 100755 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -45,3 +45,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - An alternative Lucy-Richardson deconvolution approach that requires ~10x fewer iterations. +## [0.1.7] - +### Added +- Added option to put image data into a folder named "Data" and to save the data organization with the raw data in the root directory named dataorganization.csv diff --git a/merlin/core/dataset.py b/merlin/core/dataset.py index bd11451f..5110b8fb 100755 --- a/merlin/core/dataset.py +++ b/merlin/core/dataset.py @@ -893,8 +893,13 @@ def __init__(self, dataDirectoryName: str, dataHome: str = None, self.imageDataPortal = dataportal.DataPortal.create_portal( self.imageDataPath) if not self.imageDataPortal.is_available(): - self.imageDataPath = self.rawDataPath - self.imageDataPortal = self.rawDataPortal + # allow "data" to be used instead of "Data" + self.imageDataPath = os.sep.join([self.rawDataPath, 'data']) + self.imageDataPortal = dataportal.DataPortal.create_portal( + self.imageDataPath) + if not self.imageDataPortal.is_available(): + self.imageDataPath = self.rawDataPath + self.imageDataPortal = self.rawDataPortal self._load_microscope_parameters() From 65e51ba19474ad97ae12f969cac3dbff0a5ce82f Mon Sep 17 00:00:00 2001 From: George Emanuel Date: Sun, 14 Jun 2020 19:19:40 -0400 Subject: [PATCH 11/16] Pep8 --- merlin/data/dataorganization.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/merlin/data/dataorganization.py b/merlin/data/dataorganization.py index cf9d3237..cb54c149 100755 --- a/merlin/data/dataorganization.py +++ b/merlin/data/dataorganization.py @@ -65,8 +65,8 @@ def __init__(self, dataSet, filePath: str = None, if self.data is None and dataPortal is not None: try: - self.data = pandas.read_csv(StringIO( - dataPortal.open_file('dataorganization.csv').read_as_text()), + self.data = pandas.read_csv(StringIO(dataPortal.open_file( + 'dataorganization.csv').read_as_text()), converters={'frame': _parse_int_list, 'zPos': _parse_list}) # this could be many different exceptions so for now it can remain # broad. If data can't be loaded from the data portal we load it From 07ffe6b33d9d444d5d86830c794a8708d13dc1d4 Mon Sep 17 00:00:00 2001 From: George Emanuel Date: Sat, 11 Jul 2020 12:44:43 +0000 Subject: [PATCH 12/16] Added big tiff option. --- merlin/analysis/generatemosaic.py | 3 ++- merlin/core/dataset.py | 5 +++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/merlin/analysis/generatemosaic.py b/merlin/analysis/generatemosaic.py index 430ea50b..ef17cef3 100755 --- a/merlin/analysis/generatemosaic.py +++ b/merlin/analysis/generatemosaic.py @@ -128,7 +128,8 @@ def _run_analysis(self): for z in zIndexes: with self.dataSet.writer_for_analysis_images( self, 'mosaic_%s_%i' - % (dataOrganization.get_data_channel_name(d), z))\ + % (dataOrganization.get_data_channel_name(d), z), + bigTiff = True, imagej = False)\ as outputTif: mosaic = self._prepare_mosaic_slice( z, d, micronExtents, alignTask, maximumProjection) diff --git a/merlin/core/dataset.py b/merlin/core/dataset.py index 5110b8fb..0ec65e8b 100755 --- a/merlin/core/dataset.py +++ b/merlin/core/dataset.py @@ -204,7 +204,7 @@ def get_analysis_image( def writer_for_analysis_images( self, analysisTask: TaskOrName, imageBaseName: str, - imageIndex: int = None, imagej: bool = True) -> tifffile.TiffWriter: + imageIndex: int = None, bigTiff = False, imagej: bool = True) -> tifffile.TiffWriter: """Get a writer for writing tiff files from an analysis task. Args: @@ -216,7 +216,8 @@ def writer_for_analysis_images( """ return tifffile.TiffWriter(self._analysis_image_name( - analysisTask, imageBaseName, imageIndex), imagej=imagej) + analysisTask, imageBaseName, imageIndex), bigtiff=bigTiff, + imagej=imagej) @staticmethod def analysis_tiff_description(sliceCount: int, frameCount: int) -> Dict: From acc56bac24d02f78b5ff865987918336488cf98a Mon Sep 17 00:00:00 2001 From: George Emanuel Date: Tue, 22 Sep 2020 13:53:01 -0400 Subject: [PATCH 13/16] Fixed problem caused by fileHandle not being set when path doesn't exist --- merlin/util/dataportal.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/merlin/util/dataportal.py b/merlin/util/dataportal.py index a72e7521..d7461122 100755 --- a/merlin/util/dataportal.py +++ b/merlin/util/dataportal.py @@ -256,6 +256,7 @@ class LocalFilePortal(FilePortal): def __init__(self, fileName: str): super().__init__(fileName) + self._fileHandle = None if not os.path.exists(fileName): raise FileNotFoundError self._fileHandle = open(fileName, 'rb') @@ -275,7 +276,8 @@ def read_file_bytes(self, startByte, endByte): return self._fileHandle.read(endByte-startByte) def close(self) -> None: - self._fileHandle.close() + if self._fileHandle is not None: + self._fileHandle.close() class S3FilePortal(FilePortal): From e83ceb2ccd285ef55c512aea3fdd80e298cdde8b Mon Sep 17 00:00:00 2001 From: George Emanuel Date: Tue, 22 Sep 2020 13:59:23 -0400 Subject: [PATCH 14/16] Added matplotlib to conda install. --- .circleci/config.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.circleci/config.yml b/.circleci/config.yml index 366579ab..e7aaabae 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -22,6 +22,7 @@ jobs: conda config --set quiet true conda create -n merlin_env python=3.6 source activate merlin_env + conda install matplotlib conda install rtree conda install pytables cd ~ From 9b5f66e66212637b8b96ee26a406fd7619f6aa7b Mon Sep 17 00:00:00 2001 From: George Emanuel Date: Fri, 20 Nov 2020 21:55:09 +0000 Subject: [PATCH 15/16] Prevent read only directories from creating a problem. --- merlin/core/dataset.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/merlin/core/dataset.py b/merlin/core/dataset.py index 0ec65e8b..dcbebd03 100755 --- a/merlin/core/dataset.py +++ b/merlin/core/dataset.py @@ -72,7 +72,10 @@ def __init__(self, dataDirectoryName: str, os.makedirs(self.analysisPath, exist_ok=True) self.logPath = os.sep.join([self.analysisPath, 'logs']) - os.makedirs(self.logPath, exist_ok=True) + try: + os.makedirs(self.logPath, exist_ok=True) + except PermissionError as e: + print("Unable to create logging directory") self._store_dataset_metadata() From 595136c1a0b9c79c5dd0d22927bf025cf842bfbf Mon Sep 17 00:00:00 2001 From: George Emanuel Date: Tue, 24 Nov 2020 02:47:39 +0000 Subject: [PATCH 16/16] Updated data organization to work when working on read only storage. --- merlin/data/dataorganization.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/merlin/data/dataorganization.py b/merlin/data/dataorganization.py index cb54c149..61d28c93 100755 --- a/merlin/data/dataorganization.py +++ b/merlin/data/dataorganization.py @@ -80,8 +80,11 @@ def __init__(self, dataSet, filePath: str = None, converters={'frame': _parse_int_list, 'zPos': _parse_list}) self.data['readoutName'] = self.data['readoutName'].str.strip() - self._dataSet.save_dataframe_to_csv( - self.data, 'dataorganization', index=False) + try: + self._dataSet.save_dataframe_to_csv( + self.data, 'dataorganization', index=False) + except PermissionError as e: + print('Unable to save data organization.') stringColumns = ['readoutName', 'channelName', 'imageType', 'imageRegExp', 'fiducialImageType', 'fiducialRegExp']