diff --git a/CHANGELOG.md b/CHANGELOG.md index 1f91c7e9..3fbebaa2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,10 @@ +**v0.55.0** +* [[TeamMsgExtractor #465](https://github.com/TeamMsgExtractor/msg-extractor/issues/465)] Added missing `msg.close()` to `openMsg()`. If the MSG file was actually just a plain OLE file, it would be left open. +* Adjusted the default value of `maxNameLength` for `MessageBase.save()` to 40 instead of 256. +* Adjusted exception handling for `MessageBase.save()` to properly report the reason a folder fails to be created. +* Simplified some of the code for `MessageBase.save()`. +* Fixed some typing information. + **v0.54.1** * [[TeamMsgExtractor #462](https://github.com/TeamMsgExtractor/msg-extractor/issues/462)] Fix potential issue where child MSG might have incompatible encoding to parent MSG when trying to grab a stream from the parent. * Added code to attempt to significantly improve RTF deencapsulation times. This tries to strip away unneeded data before passing it to `RTFDE`. This shows improvements on all files that take more than one second. Currently, this actually fixes some files previously outputting wrong from `RTFDE` when deencapsulating the HTML body, specifically around non breaking spaces sometimes not transferring over. diff --git a/README.rst b/README.rst index 2fe2af9e..1bc826d0 100644 --- a/README.rst +++ b/README.rst @@ -260,8 +260,8 @@ your access to the newest major version of extract-msg. .. |License: GPL v3| image:: https://img.shields.io/badge/License-GPLv3-blue.svg :target: LICENSE.txt -.. |PyPI3| image:: https://img.shields.io/badge/pypi-0.54.1-blue.svg - :target: https://pypi.org/project/extract-msg/0.54.1/ +.. |PyPI3| image:: https://img.shields.io/badge/pypi-0.55.0-blue.svg + :target: https://pypi.org/project/extract-msg/0.55.0/ .. |PyPI2| image:: https://img.shields.io/badge/python-3.8+-brightgreen.svg :target: https://www.python.org/downloads/release/python-3810/ diff --git a/extract_msg/__init__.py b/extract_msg/__init__.py index 030e9316..7fac49f3 100644 --- a/extract_msg/__init__.py +++ b/extract_msg/__init__.py @@ -27,8 +27,8 @@ # along with this program. If not, see . __author__ = 'Destiny Peterson & Matthew Walker' -__date__ = '2025-04-10' -__version__ = '0.54.1' +__date__ = '2025-08-12' +__version__ = '0.55.0' __all__ = [ # Modules: diff --git a/extract_msg/attachments/attachment.py b/extract_msg/attachments/attachment.py index e12c21e8..e8728619 100644 --- a/extract_msg/attachments/attachment.py +++ b/extract_msg/attachments/attachment.py @@ -13,7 +13,7 @@ import string import zipfile -from typing import TYPE_CHECKING +from typing import Optional, TYPE_CHECKING from .. import constants from .attachment_base import AttachmentBase @@ -72,7 +72,7 @@ def getFilename(self, **kwargs) -> str: return filename - def regenerateRandomName(self) -> str: + def regenerateRandomName(self) -> None: """ Used to regenerate the random filename used if the attachment cannot find a usable filename. @@ -166,9 +166,11 @@ def save(self, **kwargs) -> constants.SAVE_TYPE: _zip.close() @property - def data(self) -> bytes: + def data(self) -> Optional[bytes]: """ The bytes making up the attachment data. + + If the attachment data stream does not exist, returns None. """ return self.__data diff --git a/extract_msg/msg_classes/message_base.py b/extract_msg/msg_classes/message_base.py index 74f94df8..82b643c5 100644 --- a/extract_msg/msg_classes/message_base.py +++ b/extract_msg/msg_classes/message_base.py @@ -726,12 +726,12 @@ def save(self, **kwargs) -> constants.SAVE_TYPE: pdf = kwargs.get('pdf', False) allowFallback = kwargs.get('allowFallback', False) _zip = kwargs.get('zip') - maxNameLength = kwargs.get('maxNameLength', 256) + maxNameLength = kwargs.get('maxNameLength', 40) # Variables involved in the save location. customFilename = kwargs.get('customFilename') useMsgFilename = kwargs.get('useMsgFilename', False) - #maxPathLength = kwargs.get('maxPathLength', 255) + #maxPathLength = kwargs.get('maxPathLength', 255) # TODO # Track if we are only saving the attachments. attachOnly = kwargs.get('attachmentsOnly', False) @@ -742,6 +742,8 @@ def save(self, **kwargs) -> constants.SAVE_TYPE: # raising an exception. skipBodyNotFound = kwargs.get('skipBodyNotFound', False) + fext = None + if pdf: kwargs['preparedHtml'] = True @@ -758,30 +760,22 @@ def save(self, **kwargs) -> constants.SAVE_TYPE: if self.htmlBody: useHtml = True fext = 'html' - elif not allowFallback: - if skipBodyNotFound: - fext = None - else: - raise DataNotFoundError('Could not find the htmlBody.') + elif not allowFallback and not skipBodyNotFound: + raise DataNotFoundError('Could not find the htmlBody.') if pdf: if self.htmlBody: usePdf = True fext = 'pdf' - elif not allowFallback: - if skipBodyNotFound: - fext = None - else: - raise DataNotFoundError('Count not find the htmlBody to convert to pdf.') + elif not allowFallback and not skipBodyNotFound: + raise DataNotFoundError('Count not find the htmlBody to convert to pdf.') if rtf or (html and not useHtml) or (pdf and not usePdf): if self.rtfBody: useRtf = True fext = 'rtf' elif not allowFallback: - if skipBodyNotFound: - fext = None - else: + if not skipBodyNotFound: raise DataNotFoundError('Could not find the rtfBody.') else: # This was the last resort before plain text, so fall @@ -794,10 +788,7 @@ def save(self, **kwargs) -> constants.SAVE_TYPE: # We need to check if the plain text body was found. If it # was found but was empty that is considered valid, so we # specifically check against None. - if self.body is None: - if skipBodyNotFound: - fext = None - else: + if self.body is None and not skipBodyNotFound: if allowFallback: raise DataNotFoundError('Could not find a valid body using current options.') else: @@ -872,12 +863,12 @@ def save(self, **kwargs) -> constants.SAVE_TYPE: if not _zip: try: os.makedirs(path) - except Exception: + except Exception as e: newDirName = addNumToDir(path) if newDirName: path = newDirName else: - raise OSError(f'Failed to create directory "{path}". Does it already exist?') + raise OSError(f'Failed to create directory "{path}". Reason: {e}') else: # In my testing I ended up with multiple files in a zip at the # same location so let's try to handle that. diff --git a/extract_msg/open_msg.py b/extract_msg/open_msg.py index ab8b35a9..619ed10b 100644 --- a/extract_msg/open_msg.py +++ b/extract_msg/open_msg.py @@ -109,6 +109,7 @@ def openMsg(path, **kwargs) -> MSGFile: # lower function. So let's make sure we got a good return first. if not ct: if kwargs.get('strict', True): + msg.close() raise InvalidFileFormatError('File was confirmed to be an olefile, but was not an MSG file.') else: # If strict mode is off, we'll just return an MSGFile anyways.