diff --git a/.github/workflows/lint-and-test.yml b/.github/workflows/lint-and-test.yml new file mode 100644 index 0000000..af2769a --- /dev/null +++ b/.github/workflows/lint-and-test.yml @@ -0,0 +1,81 @@ +name: Lint and Test + +on: [push, pull_request] + +jobs: + build: + + runs-on: ubuntu-20.04 + strategy: + fail-fast: false + matrix: + python-version: [3.6, 3.7, 3.8, 3.9, '3.10', 3.11, 3.12] + + env: + exiftool_version: 12.15 + + steps: + - uses: actions/checkout@v2 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + # while https://github.com/actions/setup-python recommends using a specific dependency version to use cache + # we'll see if this just uses it in default configuration + # this can't be enabled unless a requirements.txt file exists. PyExifTool doesn't have any hard requirements + #cache: 'pip' + - name: Cache Perl ExifTool Download + # https://docs.github.com/en/actions/using-workflows/caching-dependencies-to-speed-up-workflows + uses: actions/cache@v2 + env: + cache-name: cache-perl-exiftool + with: + # path where we would extract the ExifTool source files + path: Image-ExifTool-${{ env.exiftool_version }} + key: ${{ runner.os }}-build-${{ env.cache-name }}-${{ env.exiftool_version }} + + - name: Install dependencies + run: | + # don't have to do this on the GitHub runner, it's going to always be the latest + #python -m pip install --upgrade pip + # the setup-python uses it this way instead of calling it via module, so maybe this will cache ... + pip install flake8 pytest + if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + + # latest version not yet available on Ubuntu Focal 20.04 LTS, but it's better to install it with all dependencies first + sudo apt-get install -qq libimage-exiftool-perl + # print this in the log + exiftool -ver + + # get just the minimum version to build and compile, later we can go with latest version to test + # working with cache: only get if the directory doesn't exist + if [ ! -d Image-ExifTool-${{ env.exiftool_version }} ]; then wget http://backpan.perl.org/authors/id/E/EX/EXIFTOOL/Image-ExifTool-${{ env.exiftool_version }}.tar.gz; fi + # extract if it was downloaded + if [ -f Image-ExifTool-${{ env.exiftool_version }}.tar.gz ]; then tar xf Image-ExifTool-${{ env.exiftool_version }}.tar.gz; fi + + cd Image-ExifTool-${{ env.exiftool_version }}/ + + # https://exiftool.org/install.html#Unix + perl Makefile.PL + make test + + export PATH=`pwd`:$PATH + cd .. + exiftool -ver + + # save this environment for subsequent steps + # https://brandur.org/fragments/github-actions-env-vars-in-env-vars + echo "PATH=`pwd`:$PATH" >> $GITHUB_ENV + - name: Install pyexiftool + run: | + # install all supported json processors for tests + python -m pip install .[json,test] + - name: Lint with flake8 + run: | + # stop the build if there are Python syntax errors or undefined names + flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics + # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide + flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics + - name: Test with pytest + run: | + pytest diff --git a/.gitignore b/.gitignore index 6830690..5429489 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,14 @@ __pycache__/ build/ dist/ MANIFEST + +*.egg-info/ + +# pytest-cov db +.coverage + +# tests will be made to write to temp directories with this prefix +tests/exiftool-tmp-* + +# IntelliJ +.idea diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..e867e6a --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,71 @@ +# PyExifTool Changelog + +Date (Timezone) | Version | Comment +---------------------------- | ------- | ------- +03/13/2021 01:54:44 PM (PST) | 0.5.0a0 | no functional code changes ... yet. this is currently on a separate branch referring to [Break down Exiftool into 2+ classes, a raw Exiftool, and helper classes](https://github.com/sylikc/pyexiftool/discussions/10) and [Deprecating Python 2.x compatibility](https://github.com/sylikc/pyexiftool/discussions/9) . In time this refactor will be the future of PyExifTool, once it stabilizes. I'll make code-breaking updates in this branch from build to build and take comments to make improvements. Consider the 0.5.0 "nightly" quality. Also, changelog versions were modified because I noticed that the LAST release from smarnach is tagged with v0.2.0 +02/28/2022 12:39:57 PM (PST) | 0.5.0 | complete refactor of the PyExifTool code. Lots of changes. Some code breaking changes. Not directly backwards-compatible with v0.4.x. See COMPATIBILITY.TXT to understand all the code-breaking changes. +03/02/2022 07:07:26 AM (PST) | 0.5.1 | v0.5 Sphinx documentation generation finally working. Lots of reStructuredText written to make the documentation better!
There's no functional changes to PyExifTool, but after several days and hours of effort, every single docstring in ExifTool and ExifToolHelper was updated to reflect all v0.5.0 changes. ExifToolAlpha was largely untouched because the methods exposed haven't really been updated this time. +03/03/2022 06:49:31 PM (PST) | 0.5.2 | Predicting the next most requested method: ExifToolHelper now has a set_tags() method similar to the get_tags() method. This was pulled from ExifToolAlpha, combining the old set_tags/set_tags_batch into one method.
Added a new constructor/property to ExifToolHelper: check_execute, which (by default) will raise ExifToolExecuteError when the exit status code from exiftool subprocess is non-zero. This should help users debug otherwise silent errors.
Also updated more docstrings and added maintenance script to generate docs. +03/26/2022 06:48:01 AM (PDT) | 0.5.3 | Quite a few docstring changes
ExifToolHelper's get_tags() and set_tags() checks tag names to prevent inadvertent write behavior
Renamed a few of the errors to make sure the errors are explicit
ExifToolHelper() has some static helper methods which can be used when extending the class (ExifToolAlpha.set_keywords_batch() demonstrates a sample usage).
setup.py tweaked to make it Beta rather than Alpha
ExifToolAlpha.get_tag() updated to make it more robust.
Fixed ujson compatibility
Cleaned up and refactored testing. +08/27/2022 06:06:32 PM (PDT) | 0.5.4 | New Feature: added raw_bytes parameter to ExifTool.execute() to return bytes only with no decoding conversion.
Changed: ExifTool.execute() now accepts both [str,bytes]. When given str, it will encode according to the ExifTool.encoding property.
Changed: ExifToolHelper.execute() now accepts Any type, and will do a str() on any non-str parameter.
Technical change: Popen() no longer uses an -encoding parameter, therefore working with the socket is back to bytes when interfacing with the exiftool subprocess. This should be invisible to most users as the default behavior will still be the same.
Tests: Created associated test with a custom makernotes example to write and read back bytes.
Docs: Updated documentation with comprehensive samples, and a better FAQ section for common problems. +12/30/2022 02:35:18 PM (PST) | 0.5.5 | No functional changes, only a huge speed improvement with large operations :: Update: Speed up large responses from exiftool. Instead of using + string concatenation, uses list appends and reverse(), which results in a speedup of 10x+ for large operations. See more details from the [reported issue](https://github.com/sylikc/pyexiftool/issues/60) and [PR 61](https://github.com/sylikc/pyexiftool/pull/61) by [prutschman](https://github.com/prutschman) +10/22/2023 03:21:46 PM (PDT) | 0.5.6 | New Feature: added method ExifTool.set_json_loads() which allows setting a method to replace the json.loads() called in ExifTool.execute_json(). Changed: ujson is no longer used by default when available. Use the set_json_loads() to enable manually
This permits passing additional configuration parameters to address the [reported issue](https://github.com/sylikc/pyexiftool/issues/76).
All documentation has been updated and two accompanying FAQ entries have been written to describe the new functionality. Test cases have been written to test the new functionality and some baseline exiftool tests to ensure that the behavior remains consistent across tests. + + +Follow maintenance/release-process.html when releasing a version. + + +# PyExifTool Changelog Archive (v0.2 - v0.4) + +Date (Timezone) | Version | Comment +---------------------------- | ------- | ------- +07/17/2019 12:26:16 AM (PDT) | 0.2.0 | Source was pulled directly from https://github.com/smarnach/pyexiftool with a complete bare clone to preserve all history. Because it's no longer being updated, I will pull all merge requests in and make updates accordingly +07/17/2019 12:50:20 AM (PDT) | 0.2.1 | Convert leading spaces to tabs. (I'm aware of [PEP 8](https://www.python.org/dev/peps/pep-0008/#tabs-or-spaces) recommending spaces over tabs, but I <3 tabs) +07/17/2019 12:52:33 AM (PDT) | 0.2.2 | Merge [Pull request #10 "add copy_tags method"](https://github.com/smarnach/pyexiftool/pull/10) by [Maik Riechert (letmaik) Cambridge, UK](https://github.com/letmaik) on May 28, 2014
*This adds a small convenience method to copy any tags from one file to another. I use it for several month now and it works fine for me.* +07/17/2019 01:05:37 AM (PDT) | 0.2.3 | Merge [Pull request #25 "Added option for keeping print conversion active. #25"](https://github.com/smarnach/pyexiftool/pull/25) by [Bernhard Bliem (bbliem)](https://github.com/bbliem) on Jan 17, 2019
*For some tags, disabling print conversion (as was the default before) would not make much sense. For example, if print conversion is deactivated, the value of the Composite:LensID tag could be reported as something like "8D 44 5C 8E 34 3C 8F 0E". It is doubtful whether this is useful here, as we would then need to look up what this means in a table supplied with exiftool. We would probably like the human-readable value, which is in this case "AF-S DX Zoom-Nikkor 18-70mm f/3.5-4.5G IF-ED".*
*Disabling print conversion makes sense for a lot of tags (e.g., it's nicer to get as the exposure time not the string "1/2" but the number 0.5). In such cases, even if we enable print conversion, we can disable it for individual tags by appending a # symbol to the tag name.* +07/17/2019 01:20:15 AM (PDT) | 0.2.4 | Merge with slight modifications to variable names for clarity (sylikc) [Pull request #27 "Add "shell" keyword argument to ExifTool initialization"](https://github.com/smarnach/pyexiftool/pull/27) by [Douglas Lassance (douglaslassance) Los Angeles, CA](https://github.com/douglaslassance) on 5/29/2019
*On Windows this will allow to run exiftool without showing the DOS shell.*
**This might break Linux but I don't know for sure**
Alternative source location with only this patch: https://github.com/blurstudio/pyexiftool/tree/shell-option +07/17/2019 01:24:32 AM (PDT) | 0.2.5 | Merge [Pull request #19 "Correct dependency for building an RPM."](https://github.com/smarnach/pyexiftool/pull/19) by [Achim Herwig (Achimh3011) Munich, Germany](https://github.com/Achimh3011) on Aug 25, 2016
**I'm not sure if this is entirely necessary, but merging it anyways** +07/17/2019 02:09:40 AM (PDT) | 0.2.6 | Merge [Pull request #15 "handling Errno:11 Resource temporarily unavailable"](https://github.com/smarnach/pyexiftool/pull/15) by [shoyebi](https://github.com/shoyebi) on Jun 12, 2015 +07/18/2019 03:40:39 AM (PDT) | 0.2.7 | set_tags and UTF-8 cmdline - Merge in the first set of changes by Leo Broska related to [Pull request #5 "add set_tags_batch, set_tags + constructor takes added options"](https://github.com/smarnach/pyexiftool/pull/5) by [halloleo](https://github.com/halloleo) on Aug 1, 2012
but this is sourced from [jmathai/elodie's 6114328 Jun 22,2016 commit](https://github.com/jmathai/elodie/blob/6114328f325660287d1998338a6d5e6ba4ccf069/elodie/external/pyexiftool.py) +07/18/2019 03:59:02 AM (PDT) | 0.2.8 | Merge another commit fromt he jmathai/elodie [zserg on Mar 12, 2016](https://github.com/jmathai/elodie/blob/af36de091e1746b490bed0adb839adccd4f6d2ef/elodie/external/pyexiftool.py)
seems to do UTF-8 encoding on set_tags +07/18/2019 04:01:18 AM (PDT) | 0.2.9 | minor change it looks like a rename to match PEP8 coding standards by [zserg on Aug 21, 2016](https://github.com/jmathai/elodie/blob/ad1cbefb15077844a6f64dca567ea5600477dd52/elodie/external/pyexiftool.py) +07/18/2019 04:05:36 AM (PDT) | 0.2.10 | [Fallback to latin if utf-8 decode fails in pyexiftool.py](https://github.com/jmathai/elodie/commit/fe70227c7170e01c8377de7f9770e761eab52036#diff-f9cf0f3eed27e85c9c9469d0e0d431d5) by [jmathai](https://github.com/jmathai/elodie/commits?author=jmathai) on Sep 7, 2016 +07/18/2019 04:14:32 AM (PDT) | 0.2.11 | Merge the test cases from the [Pull request #5 "add set_tags_batch, set_tags + constructor takes added options"](https://github.com/smarnach/pyexiftool/pull/5) by [halloleo](https://github.com/halloleo) on Aug 1, 2012 +07/18/2019 04:34:46 AM (PDT) | 0.3.0 | changed the setup.py licensing and updated the version numbering as in changelog
changed the version number scheme, as it appears the "official last release" was 0.2.0 tagged. There's going to be a lot of things broken in this current build, and I'll fix it as they come up. I'm going to start playing with the library and the included tests and such.
There's one more pull request #11 which would be pending, but it duplicates the extra arguments option.
I'm also likely to remove the print conversion as it's now covered by the extra args. I'll also rename some variable names with the addedargs patch
**for my changes (sylikc), I can only guarantee they will work on Python 3.7, because that's my environment... and while I'll try to maintain compatibility, there's no guarantees** +07/18/2019 05:06:19 AM (PDT) | 0.3.1 | make some minor tweaks to the naming of the extra args variable. The other pull request 11 names them params, and when I decide how to merge that pull request, I'll probably change the variable names again. +07/19/2019 12:01:22 AM (PDT) | 0.3.2 | fix the select() problem for windows, and fix all tests +07/19/2019 12:54:39 AM (PDT) | 0.3.3 | Merge a piece of [Pull request #11 "Robustness enhancements](https://github.com/smarnach/pyexiftool/pull/11) by [Matthias Kiefer (kiefermat)](https://github.com/kiefermat) on Oct 27, 2014
*On linux call prctl in subprocess to be sure that the exiftool child process is killed even if the parent process is killed by itself*
also removed print_conversion
also merged the common_args and added_args into one args list +07/19/2019 01:18:26 AM (PDT) | 0.3.4 | Merge the rest of Pull request #11. Added the other pieces, however, I added them as "wrappers" instead of modifying the interface of the original code. I feel like the additions here are overly done, and as I understand the code more, I'll either remove it or incorporate it into single functions
from #11 *When getting json results, verify that the results returned by exiftool actually belong to the correct file by checking the SourceFile property of the returned result*
and also *Added possibility to provide different exiftools params for each file separately* +07/19/2019 01:22:48 AM (PDT) | 0.3.5 | changed a bit of the test_exiftool so all the tests pass again +01/04/2020 11:59:14 AM (PST) | 0.3.6 | made the tests work with the latest output of ExifTool. This is the final version which is named "exiftool" +01/04/2020 12:16:51 PM (PST) | 0.4.0 | pyexiftool rename (and make all tests work again) ... I also think that the pyexiftool.py has gotten too big. I'll probably break it out into a directory structure later to make it more maintainable +02/01/2020 05:09:43 PM (PST) | 0.4.1 | incorporated pull request #2 and #3 by ickc which added a "no_output" feature and an import for ujson if it's installed. Thanks for the updates! +04/09/2020 04:25:31 AM (PDT) | 0.4.2 | roll back 0.4.0's pyexiftool rename. It appears there's no specific PEP to have to to name PyPI projects to be py. The only convention I found was https://www.python.org/dev/peps/pep-0423/#use-standard-pattern-for-community-contributions which I might look at in more detail +04/09/2020 05:15:40 AM (PDT) | 0.4.3 | initial work of moving the exiftool.py into a directory preparing to break it down into separate files to make the codebase more manageable +03/12/2021 01:37:30 PM (PST) | 0.4.4 | no functional code changes. Revamped the setup.py and related files to release to PyPI. Added all necessary and recommended files into release +03/12/2021 02:03:38 PM (PST) | 0.4.5 | no functional code changes. re-release with new version because I accidentally included the "test" package with the PyPI 0.4.4 release. I deleted it instead of yanking or doing a post release this time... just bumped the version. "test" folder renamed to "tests" as per convention, so the build will automatically ignore it +04/08/2021 03:38:46 PM (PDT) | 0.4.6 | added support for config files in constructor -- Merged pull request #7 from @asielen and fixed a bug referenced in the discussion https://github.com/sylikc/pyexiftool/pull/7 +04/19/2021 02:37:02 PM (PDT) | 0.4.7 | added support for writing a list of values in set_tags_batch() which allows setting individual keywords (and other tags which are exiftool lists) -- contribution from @davidorme referenced in issue https://github.com/sylikc/pyexiftool/issues/12#issuecomment-821879234 +04/28/2021 01:50:59 PM (PDT) | 0.4.8 | no functional changes, only a minor documentation link update -- Merged pull request #16 from @beng +05/19/2021 09:37:52 PM (PDT) | 0.4.9 | test_tags() parameter encoding bugfix and a new test case TestTagCopying -- Merged pull request #19 from @jangop
I also added further updates to README.rst to point to my repo and GH pages
I fixed the "previous versions" naming to match the v0.2.0 start. None of them were published, so I changed the version information here just to make it less confusing to a casual observer who might ask "why did you have 0.1 when you forked off on 0.2.0?" Sven Marnach's releases were all 0.1, but he tagged his last release v0.2.0, which is my starting point +08/22/2021 08:32:30 PM (PDT) | 0.4.10 | logger changed to use logging.getLogger(__name__) instead of the root logger -- Merged pull request #24 from @nyoungstudios +08/22/2021 08:34:45 PM (PDT) | 0.4.11 | no functional code changes. Changed setup.py with updated version and Documentation link pointed to sylikc.github.io -- as per issue #27 by @derMart +08/22/2021 09:02:33 PM (PDT) | 0.4.12 | fixed a bug ExifTool.terminate() where there was a typo. Kept the unused outs, errs though. -- from suggestion in pull request #26 by @aaronkollasch +02/13/2022 03:38:45 PM (PST) | 0.4.13 | (NOTE: Barring any critical bug, this is expected to be the LAST Python 2 supported release!) added GitHub actions. fixed bug in execute_json_wrapper() 'error' was not defined syntactically properly -- merged pull request #30 by https://github.com/jangop + + + + +# Changes around the web + +Check for changes at the following resources to see if anyone has added some nifty features. While we have the most active fork, I'm just one of the many forks, spoons, and knives! + +We can also direct users here or answer existing questions as to how to use the original version of ExifTool. + +(last checked 10/23/2023 all) + +search "pyexiftool github" to see if you find any more random ports/forks +check for updates https://github.com/smarnach/pyexiftool/pulls +check for new open issues https://github.com/smarnach/pyexiftool/issues?q=is%3Aissue+is%3Aopen + +answer relevant issues on stackoverflow (make sure it's related to the latest version) https://stackoverflow.com/search?tab=newest&q=pyexiftool&searchOn=3 diff --git a/COMPATIBILITY.txt b/COMPATIBILITY.txt new file mode 100644 index 0000000..0f10017 --- /dev/null +++ b/COMPATIBILITY.txt @@ -0,0 +1,55 @@ +PyExifTool does not guarantee source-level compatibility from one release to the next. + +That said, efforts will be made to provide well-documented API-level compatibility, +and if there are major API changes, migration documentation will be provided, when +possible. + +---- + +v0.1.x - v0.2.0 = smarnach code, API compatible +v0.2.1 - v0.4.13 = original v0.2 code with all PRs, a superset of functionality on Exiftool class +v0.5.0 - = not API compatible with the v0.4.x series. Broke down functionality stability by classes. See comments below: + + +---- +API changes between v0.4.x and v0.5.0: + + PYTHON CHANGE: Old: Python 2.6 supported. New: Python 3.6+ required + + CHANGED: Exiftool constructor: + RENAME: "executable_" parameter to "executable" + DEFAULT BEHAVIOR: "common_args" defaults to ["-G", "-n"] instead of None. Old behavior set -G and -n if "common_args" is None. New behavior "common_args" = [] if common_args is None. + DEFAULT: Old: "win_shell" defaults to True. New: "win_shell" defaults to False. + NEW: "encoding" parameter + NEW: "logger" parameter + + NEW PROPERTY GET/SET: a lot of properties were added to do get/set validation, and parameters can be changed outside of the constructor. + + METHOD RENAME: starting the process was renamed from "start" to "run" + + MINIMUM TOOL VERSION: exiftool command line utility minimum requirements. Old: 8.60. New: 12.15 + + ENCODING CHANGE: execute() and execute_json() no longer take bytes, but is guided by the encoding set in constructor/property + + ERROR CHANGE: execute_json() when no json was not returned (such as a set metadata operation) => Old: raised an error. New: returns custom ExifToolException + + FEATURE REMOVAL: execute_json() no longer detects the '-w' flag being passed used in common_args. + If a user uses this flag, expect no output. + (detection in common_args was clunky anyways because -w can be passed as a per-run param for the same effect) + + + all methods other than execute() and execute_json() moved to ExifToolHelper or ExifToolAlpha class. + + ExifToolHelper adds methods: + get_metadata() + get_tags() + + NEW CONVENTION: all methods take "files" first, "tags" second (if needed) and "params" last + + + ExifToolAlpha adds all remaining methods in an alpha-quality way + + NOTE: ExifToolAlpha has not been updated yet to use the new convention, and the edge case code may be removed/changed at any time. + If you depend on functionality provided by ExifToolAlpha, please submit an Issue to start a discussion on cleaning up the code and moving it into ExifToolHelper +---- + diff --git a/COPYING.BSD b/COPYING.BSD index cc68df4..ec6075c 100644 --- a/COPYING.BSD +++ b/COPYING.BSD @@ -1,4 +1,4 @@ -Copyright 2012 Sven Marnach +Copyright 2012 Sven Marnach, 2019-2023 Kevin M (sylikc) All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..1791c17 --- /dev/null +++ b/LICENSE @@ -0,0 +1,15 @@ +PyExifTool + +Copyright 2019-2023 Kevin M (sylikc) +Copyright 2012-2014 Sven Marnach + +PyExifTool is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the licence, or +(at your option) any later version, or the BSD licence. + +PyExifTool is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + +See COPYING.GPL or COPYING.BSD for more details. diff --git a/README.rst b/README.rst index b2c6a64..05b95bd 100644 --- a/README.rst +++ b/README.rst @@ -1,9 +1,52 @@ +********** PyExifTool -========== +********** -PyExifTool is a Python library to communicate with an instance of Phil -Harvey's excellent ExifTool_ command-line application. The library -provides the class ``exiftool.ExifTool`` that runs the command-line +.. image:: https://img.shields.io/badge/Docs-latest-blueviolet + :alt: GitHub Pages + :target: http://sylikc.github.io/pyexiftool/ + + +.. HIDE_FROM_PYPI_START + +.. image:: https://github.com/sylikc/pyexiftool/actions/workflows/lint-and-test.yml/badge.svg + :alt: GitHub Actions + :target: https://github.com/sylikc/pyexiftool/actions + +.. image:: https://img.shields.io/pypi/v/pyexiftool.svg + :target: https://pypi.org/project/PyExifTool/ + :alt: PyPI Version + + +.. HIDE_FROM_PYPI_END + +.. image:: https://img.shields.io/pypi/pyversions/pyexiftool.svg + :target: https://pypi.org/project/PyExifTool/ + :alt: Supported Python Versions + +.. image:: https://pepy.tech/badge/pyexiftool + :target: https://pepy.tech/project/pyexiftool + :alt: Total PyPI Downloads + +.. image:: https://static.pepy.tech/personalized-badge/pyexiftool?period=month&units=international_system&left_color=black&right_color=orange&left_text=Downloads%2030d + :target: https://pepy.tech/project/pyexiftool + :alt: PyPI Downloads this month + + + +.. DESCRIPTION_START + +.. BLURB_START + +PyExifTool is a Python library to communicate with an instance of +`Phil Harvey's ExifTool`_ command-line application. + +.. _Phil Harvey's ExifTool: https://exiftool.org/ + + +.. BLURB_END + +The library provides the class ``exiftool.ExifTool`` that runs the command-line tool in batch mode and features methods to send commands to that program, including methods to extract meta-information from one or more image files. Since ``exiftool`` is run in batch mode, only a @@ -11,52 +54,233 @@ single instance needs to be launched and can be reused for many queries. This is much more efficient than launching a separate process for every single query. -.. _ExifTool: http://www.sno.phy.queensu.ca/~phil/exiftool/ + +.. DESCRIPTION_END + +.. contents:: + :depth: 2 + :backlinks: none + +Example Usage +============= + +Simple example: :: + + import exiftool + + files = ["a.jpg", "b.png", "c.tif"] + with exiftool.ExifToolHelper() as et: + metadata = et.get_metadata(files) + for d in metadata: + print("{:20.20} {:20.20}".format(d["SourceFile"], + d["EXIF:DateTimeOriginal"])) + +Refer to documentation for more `Examples and Quick Start Guide`_ + +.. _`Examples and Quick Start Guide`: http://sylikc.github.io/pyexiftool/examples.html + + +.. INSTALLATION_START Getting PyExifTool ------------------- +================== + +PyPI +------------ -The source code can be checked out from the github repository with +Easiest: Install a version from the official `PyExifTool PyPI`_ :: - git clone git://github.com/smarnach/pyexiftool.git + python -m pip install -U pyexiftool -Alternatively, you can download a tarball_. There haven't been any -releases yet. +.. _PyExifTool PyPI: https://pypi.org/project/PyExifTool/ -.. _tarball: https://github.com/smarnach/pyexiftool/tarball/master -Installation +From Source ------------ -PyExifTool runs on Python 2.6 and above, including 3.x. It has been -tested on Windows and Linux, and probably also runs on other Unix-like -platforms. +#. Check out the source code from the github repository + + * ``git clone git://github.com/sylikc/pyexiftool.git`` + * Alternatively, you can download a tarball_. + +#. Run setup.py to install the module from source + + * ``python setup.py install [--user|--prefix=]`` + + +.. _tarball: https://github.com/sylikc/pyexiftool/tarball/master + + +PyExifTool Dependencies +======================= + +Python +------ + +PyExifTool runs on **Python 3.6+**. (If you need Python 2.6 support, +please use version v0.4.x). PyExifTool has been tested on Windows and +Linux, and probably also runs on other Unix-like platforms. + +Phil Harvey's exiftool +---------------------- + +For PyExifTool to function, ``exiftool`` command-line tool must exist on +the system. If ``exiftool`` is not on the ``PATH``, you can specify the full +pathname to it by using ``ExifTool(executable=)``. + +PyExifTool requires a **minimum version of 12.15** (which was the first +production version of exiftool featuring the options to allow exit status +checks used in conjuction with ``-echo3`` and ``-echo4`` parameters). + +To check your ``exiftool`` version: + +:: + + exiftool -ver -You need an installation of the ``exiftool`` command-line tool. The -code has been tested with version 8.60, but should work with version -8.40 or above (which was the first production version of exiftool -featuring the ``-stay_open`` option for batch mode). -PyExifTool currently only consists of a single module, so you can -simply copy or link this module to a place where Python finds it, or -you can call +Windows/Mac +^^^^^^^^^^^ + +Windows/Mac users can download the latest version of exiftool: :: - python setup.py install [--user|--prefix=NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.http://sphinx-doc.org/ + exit /b 1 +) + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% + +:end +popd diff --git a/doc/conf.py b/docs/source/conf.py similarity index 66% rename from doc/conf.py rename to docs/source/conf.py index 65e6a8a..529b39e 100644 --- a/doc/conf.py +++ b/docs/source/conf.py @@ -11,12 +11,43 @@ # All configuration values have a default; values that are commented out # serve to show the default. -import sys, os +import sys +from pathlib import Path # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. -sys.path.insert(1, os.path.abspath('..')) +# +# https://docs.python.org/3/library/pathlib.html#pathlib.PurePath.parent +# "Path.parent is a purely lexical operation +# If you want to walk an arbitrary filesystem path upwards, +# it is recommended to first call Path.resolve() so as to +# resolve symlinks and eliminate “..” components." +sys.path.insert(1, Path(__file__).resolve().parent.parent) + + + +# -- Project information ----------------------------------------------------- + +# General information about the project. +project = 'PyExifTool' +copyright = '2023, Kevin M (sylikc)' +author = 'Kevin M (sylikc)' + +# read directly from exiftool's version instead of hard coding it here +import exiftool +from packaging import version as pv +et_ver = pv.parse(exiftool.__version__) + +# The version info for the project you're documenting, acts as replacement for +# |version| and |release|, also used in various other places throughout the +# built documents. +# +# The short X.Y version. +version = f'{et_ver.major}.{et_ver.minor}' +# The full version, including alpha/beta/rc tags. +release = exiftool.__version__ + # -- General configuration ----------------------------------------------------- @@ -25,7 +56,57 @@ # Add any Sphinx extension module names here, as strings. They can be extensions # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. -extensions = ['sphinx.ext.autodoc'] +extensions = [ + 'sphinx.ext.autodoc', # Core library for html generation from docstrings + 'sphinx.ext.autodoc.typehints', + #'sphinx.ext.autosummary', # Create neat summary tables + 'autoapi.extension', # pip install sphinx-autoapi + 'sphinx_autodoc_typehints', # pip install sphinx-autodoc-typehints + 'sphinx.ext.inheritance_diagram', +] + +#autosummary_generate = True # Turn on sphinx.ext.autosummary + + + +autoapi_type = 'python' +autoapi_dirs = ['../../exiftool'] +autoapi_member_order = 'groupwise' +#autoapi_python_use_implicit_namespaces = True + +# make my life easier, configure the autoapi with specific options for things that I care about ... aka +# hide 'private-members' - inheriting classes should not have to handle or interfere with private variables +# hide 'imported-members' - after all i import the submodules into the base namespace - don't need it to show twice +autoapi_options = [ 'members', 'undoc-members', 'show-inheritance', 'show-inheritance-diagram', 'show-module-summary', 'special-members', ] +#autoapi_generate_api_docs = False +autoapi_python_class_content = 'both' # show __init__ with class docstring + + +# https://www.sphinx-doc.org/en/master/usage/extensions/autodoc.html#confval-autodoc_typehints +# comment out when all documentation has documented parameters ... sometimes causes duplicates, but that may be a RST problem... always put links at the END of the docstring instead of in the middle +autodoc_typehints = 'description' + +typehints_defaults = "comma" + + + +# the common names of the classes rather than the absolute paths +inheritance_alias = { + 'exiftool.exiftool.ExifTool': 'exiftool.ExifTool', + 'exiftool.helper.ExifToolHelper': 'exiftool.ExifToolHelper', + 'exiftool.experimental.ExifToolAlpha': 'exiftool.ExifToolAlpha', +} + +# help on attributes and Graphviz params: +# https://www.sphinx-doc.org/en/master/usage/extensions/inheritance.html +# https://graphs.grevian.org/reference +# https://graphviz.org/doc/info/attrs.html +#inheritance_graph_attrs = dict(rankdir="LR", size='"6.0, 8.0"', fontsize=14, ratio='compress') +inheritance_graph_attrs = dict(pad="0.2", center=True) +inheritance_node_attrs = dict(shape='box', fontsize=14, height=0.75, color='dodgerblue1', style='rounded') + + + # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] @@ -39,18 +120,6 @@ # The master toctree document. master_doc = 'index' -# General information about the project. -project = u'PyExifTool' -copyright = u'2012, Sven Marnach' - -# The version info for the project you're documenting, acts as replacement for -# |version| and |release|, also used in various other places throughout the -# built documents. -# -# The short X.Y version. -version = '0.1' -# The full version, including alpha/beta/rc tags. -release = '0.1' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. @@ -91,13 +160,21 @@ # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. -html_theme = 'default' +html_theme = 'sphinx_rtd_theme' # pip install sphinx_rtd_theme # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. #html_theme_options = {} +# https://stackoverflow.com/questions/62904172/how-do-i-replace-view-page-source-with-edit-on-github-links-in-sphinx-rtd-th/62904217#62904217 +html_context = { + #'display_github': True, + 'github_user': 'sylikc', + 'github_repo': 'pyexiftool', + 'github_version': 'master/docs/source/', +} + # Add any paths that contain custom themes here, relative to this directory. #html_theme_path = [] @@ -182,10 +259,12 @@ # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, author, documentclass [howto/manual]). +""" latex_documents = [ - ('index', 'PyExifTool.tex', u'PyExifTool Documentation', - u'Sven Marnach', 'manual'), + ('index', 'PyExifTool.tex', u'PyExifTool Documentation', + u'Sven Marnach', 'manual'), ] +""" # The name of an image file (relative to this directory) to place at the top of # the title page. @@ -212,10 +291,12 @@ # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). +""" man_pages = [ - ('index', 'pyexiftool', u'PyExifTool Documentation', - [u'Sven Marnach'], 1) + ('index', 'pyexiftool', u'PyExifTool Documentation', + [u'Sven Marnach'], 1) ] +""" # If true, show URL addresses after external links. #man_show_urls = False @@ -226,11 +307,13 @@ # Grouping the document tree into Texinfo files. List of tuples # (source start file, target name, title, author, # dir menu entry, description, category) +""" texinfo_documents = [ - ('index', 'PyExifTool', u'PyExifTool Documentation', - u'Sven Marnach', 'PyExifTool', 'One line description of project.', - 'Miscellaneous'), + ('index', 'PyExifTool', u'PyExifTool Documentation', + u'Sven Marnach', 'PyExifTool', 'One line description of project.', + 'Miscellaneous'), ] +""" # Documents to append as an appendix to all manuals. #texinfo_appendices = [] diff --git a/docs/source/examples.rst b/docs/source/examples.rst new file mode 100644 index 0000000..b2606d2 --- /dev/null +++ b/docs/source/examples.rst @@ -0,0 +1,334 @@ +********************** +Examples / Quick Start +********************** + +.. NOTE: No tabs in this file, all spaces, to simplify examples indentation + + +Try it yourself: All of these examples are using the images provided in the `tests directory`_ in the source + +.. _`tests directory`: https://github.com/sylikc/pyexiftool/tree/master/tests/images + + + +Understanding input and output from PyExifTool base methods +=========================================================== + +Almost all methods in PyExifTool revolve around the usage of two methods from the base :py:class:`exiftool.ExifTool` class. + + +**It is important to understand the ouput from each of these commands**, so here's a quick summary (you can click through to the API to read more details) + +.. note:: + + Because both methods are inherited by :py:class:`exiftool.ExifToolHelper` and :py:class:`exiftool.ExifToolAlpha`, you can call it from those classes as well. + + +.. _examples input params: + +Input parameters +---------------- + +Both methods take an argument list ``*args``. Examples: + +.. note:: + + As a general rule of thumb, if there is an **unquoted space on the command line** to *exiftool*, it's a **separate argument to the method** in PyExifTool. + + If you have a working `exiftool` command-line but having trouble figuring out how to properly separate the arguments, please refer to the :ref:`FAQ ` + +* Calling directly: + + * exiftool command-line: + + .. code-block:: text + + exiftool -XMPToolKit -Subject rose.jpg + + * PyExifTool: + + .. code-block:: + + execute("-XMPToolKit", "-Subject", "rose.jpg") + +* Using argument unpacking of a list: + + * exiftool command-line: + + .. code-block:: text + + exiftool -P -DateTimeOriginal="2021:01:02 03:04:05" -MakerNotes= "spaces in filename.jpg" + + * PyExifTool: + + .. note:: + + Parameters which need to be quoted on the command line generally do not need to be quoted in the parameters to PyExifTool. In fact, quoting may have unintended behavior. + + In this example, *DateTimeOriginal* value is not quoted in the parameter to execute(). + + .. code-block:: + + execute(*["-P", "-DateTimeOriginal=2021:01:02 03:04:05", "-MakerNotes=", "spaces in filename.jpg"]) + + +* Getting JSON output using argument unpacking of a list: + + * exiftool command-line: + + .. code-block:: text + + exiftool -j -XMP:all -JFIF:JFIFVersion /path/somefile.jpg + + * PyExifTool: + + .. code-block:: + + execute_json(*["-XMP:all", "-JFIF:JFIFVersion", "/path/somefile.jpg"]) + + +Output values +------------- + +* :py:meth:`exiftool.ExifTool.execute_json` + + * Returns a ``list`` of ``dict`` + * Each ``dict`` is a result from a file + * Each ``dict`` contains a key "SourceFile" which points to the relative or absolute file path of file + * All other keys/value pairs are requested metadata + +* :py:meth:`exiftool.ExifTool.execute` + + * Returns a ``str`` + * Typically used for **setting tags** as no values are returned in that case. + + +ExifToolHelper +============== + +Using methods provided by :py:class:`exiftool.ExifToolHelper`: + +ExifToolHelper provides some of the most commonly used operations most people use *exiftool* for + +Getting Tags +------------ + +* Get all tags on a single file + + .. code-block:: + + from exiftool import ExifToolHelper + with ExifToolHelper() as et: + for d in et.get_metadata("rose.jpg"): + for k, v in d.items(): + print(f"Dict: {k} = {v}") + + + .. code-block:: text + + Dict: SourceFile = rose.jpg + Dict: ExifTool:ExifToolVersion = 12.37 + Dict: File:FileName = rose.jpg + Dict: File:Directory = . + Dict: File:FileSize = 4949 + Dict: File:FileModifyDate = 2022:03:03 17:47:11-08:00 + Dict: File:FileAccessDate = 2022:03:27 08:28:16-07:00 + Dict: File:FileCreateDate = 2022:03:03 17:47:11-08:00 + Dict: File:FilePermissions = 100666 + Dict: File:FileType = JPEG + Dict: File:FileTypeExtension = JPG + Dict: File:MIMEType = image/jpeg + Dict: File:ImageWidth = 70 + Dict: File:ImageHeight = 46 + Dict: File:EncodingProcess = 0 + Dict: File:BitsPerSample = 8 + Dict: File:ColorComponents = 3 + Dict: File:YCbCrSubSampling = 2 2 + Dict: JFIF:JFIFVersion = 1 1 + Dict: JFIF:ResolutionUnit = 1 + Dict: JFIF:XResolution = 72 + Dict: JFIF:YResolution = 72 + Dict: XMP:XMPToolkit = Image::ExifTool 8.85 + Dict: XMP:Subject = Röschen + Dict: Composite:ImageSize = 70 46 + Dict: Composite:Megapixels = 0.00322 + +* Get some tags in multiple files + + .. code-block:: + + from exiftool import ExifToolHelper + with ExifToolHelper() as et: + for d in et.get_tags(["rose.jpg", "skyblue.png"], tags=["FileSize", "ImageSize"]): + for k, v in d.items(): + print(f"Dict: {k} = {v}") + + + .. code-block:: text + + Dict: SourceFile = rose.jpg + Dict: File:FileSize = 4949 + Dict: Composite:ImageSize = 70 46 + Dict: SourceFile = skyblue.png + Dict: File:FileSize = 206 + Dict: Composite:ImageSize = 64 64 + +Setting Tags +------------ + +* Setting date and time of some files to current time, overwriting file, but preserving original mod date + + .. code-block:: + + from exiftool import ExifToolHelper + from datetime import datetime + with ExifToolHelper() as et: + now = datetime.strftime(datetime.now(), "%Y:%m:%d %H:%M:%S") + et.set_tags( + ["rose.jpg", "skyblue.png"], + tags={"DateTimeOriginal": now}, + params=["-P", "-overwrite_original"] + ) + + (*No output is returned if successful*) + +* Setting keywords for a file. + + .. code-block:: + + from exiftool import ExifToolHelper + with ExifToolHelper() as et: + et.set_tags( + ["rose.jpg", "skyblue.png"], + tags={"Keywords": ["sunny", "nice day", "cool", "awesome"]} + ) + + (*No output is returned if successful*) + + + +Exceptions +---------- + +By default, ExifToolHelper has some **built-in error checking**, making the methods safer to use than calling the base methods directly. + +.. warning:: + + While "safer", the error checking isn't fool-proof. There are a lot of cases where *exiftool* just silently ignores bad input and doesn't indicate an error. + +* Example using get_tags() on a list which includes a non-existent file + + * ExifToolHelper with error-checking, using :py:meth:`exiftool.ExifToolHelper.get_tags` + + .. code-block:: + + from exiftool import ExifToolHelper + with ExifToolHelper() as et: + print(et.get_tags( + ["rose.jpg", "skyblue.png", "non-existent file.tif"], + tags=["FileSize"] + )) + + Output: + + .. code-block:: text + + Traceback (most recent call last): + File "T:\example.py", line 7, in + et.get_tags(["rose.jpg", "skyblue.png", "non-existent file.tif"], tags=["FileSize"]) + File "T:\pyexiftool\exiftool\helper.py", line 353, in get_tags + ret = self.execute_json(*exec_params) + File "T:\pyexiftool\exiftool\exiftool.py", line 1030, in execute_json + result = self.execute("-j", *params) # stdout + File "T:\pyexiftool\exiftool\helper.py", line 119, in execute + raise ExifToolExecuteError(self._last_status, self._last_stdout, self._last_stderr, params) + exiftool.exceptions.ExifToolExecuteError: execute returned a non-zero exit status: 1 + + + * ExifTool only, without error checking, using :py:meth:`exiftool.ExifTool.execute_json` (**Note how the missing file is silently ignored and doesn't show up in returned list.**) + + .. code-block:: + + from exiftool import ExifToolHelper + with ExifToolHelper() as et: + print(et.get_tags( + ["rose.jpg", "skyblue.png", "non-existent file.tif"], + tags=["FileSize"] + )) + + Output: + + .. code-block:: text + + [{'SourceFile': 'rose.jpg', 'File:FileSize': 4949}, {'SourceFile': 'skyblue.png', 'File:FileSize': 206}] + + +* Example using :py:meth:`exiftool.ExifToolHelper.get_tags` with a typo. Let's say you wanted to ``get_tags()``, but accidentally copy/pasted something and left a ``=`` character behind (deletes tag rather than getting!)... + + * Using :py:meth:`exiftool.ExifToolHelper.get_tags` + + .. code-block:: + + from exiftool import ExifToolHelper + with ExifToolHelper() as et: + print(et.get_tags(["skyblue.png"], tags=["XMP:Subject=hi"])) + + Output: + + .. code-block:: text + + Traceback (most recent call last): + File "T:\example.py", line 7, in + print(et.get_tags(["skyblue.png"], tags=["XMP:Subject=hi"])) + File "T:\pyexiftool\exiftool\helper.py", line 341, in get_tags + self.__class__._check_tag_list(final_tags) + File "T:\pyexiftool\exiftool\helper.py", line 574, in _check_tag_list + raise ExifToolTagNameError(t) + exiftool.exceptions.ExifToolTagNameError: Invalid Tag Name found: "XMP:Subject=hi" + + * Using :py:meth:`exiftool.ExifTool.execute_json`. It still raises an exception, but more cryptic and difficult to debug + + .. code-block:: + + from exiftool import ExifTool + with ExifTool() as et: + print(et.execute_json(*["-XMP:Subject=hi"] + ["skyblue.png"])) + + Output: + + .. code-block:: text + + Traceback (most recent call last): + File "T:\example.py", line 7, in + print(et.execute_json(*["-XMP:Subject=hi"] + ["skyblue.png"])) + File "T:\pyexiftool\exiftool\exiftool.py", line 1052, in execute_json + raise ExifToolOutputEmptyError(self._last_status, self._last_stdout, self._last_stderr, params) + exiftool.exceptions.ExifToolOutputEmptyError: execute_json expected output on stdout but got none + + * Using :py:meth:`exiftool.ExifTool.execute`. **No errors, but you have now written to the file instead of reading from it!** + + .. code-block:: + + from exiftool import ExifTool + with ExifTool() as et: + print(et.execute(*["-XMP:Subject=hi"] + ["skyblue.png"])) + + Output: + + .. code-block:: text + + 1 image files updated + +ExifTool +======== + +Using methods provided by :py:class:`exiftool.ExifTool` + +Calling execute() or execute_json() provides raw functionality for advanced use cases. Use with care! + + + +.. TODO show some ExifTool and ExifToolHelper use cases for common exiftool operations + +.. TODO show some Advanced use cases, and maybe even some don't-do-this-even-though-you-can cases (like using params for tags) + diff --git a/docs/source/faq.rst b/docs/source/faq.rst new file mode 100644 index 0000000..e05d7a1 --- /dev/null +++ b/docs/source/faq.rst @@ -0,0 +1,259 @@ +************************** +Frequently Asked Questions +************************** + +PyExifTool output is different from the exiftool command line +============================================================= + +One of the most frequently asked questions relates to the *default output* of PyExifTool. + +For example, using the `rose.jpg in tests`_, let's get **all JFIF tags**: + +Default exiftool output +----------------------- + +$ ``exiftool -JFIF:all rose.jpg`` + +.. code-block:: text + + JFIF Version : 1.01 + Resolution Unit : inches + X Resolution : 72 + Y Resolution : 72 + + +.. _`rose.jpg in tests`: https://github.com/sylikc/pyexiftool/blob/master/tests/files/rose.jpg + +Default PyExifTool output +------------------------- + +from PyExifTool, using the following code: + +.. code-block:: + + import exiftool + with exiftool.ExifTool() as et: + print(et.execute("-JFIF:all", "rose.jpg")) + +Output: + +.. code-block:: text + + [JFIF] JFIF Version : 1 1 + [JFIF] Resolution Unit : 1 + [JFIF] X Resolution : 72 + [JFIF] Y Resolution : 72 + +What's going on? +---------------- + +The reason for the different default output is that PyExifTool, by default, includes two arguments which make *exiftool* easier to use: ``-G, -n``. + +.. note:: + + The ``-n`` disables *print conversion* which displays **raw tag values**, making the output more **machine-parseable**. + + When *print conversion* is enabled, *some* raw values may be translated to prettier **human-readable** text. + + +.. note:: + The ``-G`` enables *group name (level 1)* option which displays a group in the output to help disambiguate tags with the same name in different groups. + + For example, *-DateCreated* can be ambiguous if both *-IPTC:DateCreated* and *-XMP:DateCreated* exists and have different values. ``-G`` would display which one was returned by *exiftool*. + + +Read the documentation for the ExifTool constructor ``common_args`` parameter for more details: :py:meth:`exiftool.ExifTool.__init__`. + +(You can also change ``common_args`` on an existing instance using :py:attr:`exiftool.ExifTool.common_args`, as long as the subprocess is not :py:attr:`exiftool.ExifTool.running`) + + + + +Ways to make the ouptut match +----------------------------- + +So if you want to have the ouput match (*useful for debugging*) between PyExifTool and exiftool, either: + +* **Enable print conversion on exiftool command line**: + + $ ``exiftool -G -n -JFIF:all rose.jpg`` + + .. code-block:: text + + [JFIF] JFIF Version : 1 1 + [JFIF] Resolution Unit : 1 + [JFIF] X Resolution : 72 + [JFIF] Y Resolution : 72 + +* **Disable print conversion and group name in PyExifTool**: + + .. code-block:: + + import exiftool + with exiftool.ExifTool(common_args=None) as et: + print(et.execute("-JFIF:all", "rose.jpg")) + + Output: + + .. code-block:: text + + JFIF Version : 1.01 + Resolution Unit : inches + X Resolution : 72 + Y Resolution : 72 + + + +.. _shlex split: + +I can run this on the command-line but it doesn't work in PyExifTool +==================================================================== + +A frequent problem encountered by first-time users, is figuring out how to properly split their arguments into a call to PyExifTool. + +As noted in the :ref:`Quick Start Examples `: + + If there is an **unquoted space on the command line** to *exiftool*, it's a **separate argument to the method** in PyExifTool. + +So, what does this look like in practice? + +Use `Python's shlex library`_ as a quick and easy way to figure out what the parameters to :py:meth:`exiftool.ExifTool.execute` or :py:meth:`exiftool.ExifTool.execute_json` should be. + +* Sample exiftool command line (with multiple quoted and unquoted parameters): + + .. code-block:: text + + exiftool -v0 -preserve -overwrite_original -api largefilesupport=1 -api "QuickTimeUTC=1" "-EXIF:DateTimeOriginal+=1:2:3 4:5:6" -XMP:DateTimeOriginal="2006:05:04 03:02:01" -gpsaltituderef="Above Sea Level" -make= test.mov + +* Using ``shlex`` to figure out the right argument list: + + .. code-block:: + + import shlex, exiftool + with exiftool.ExifToolHelper() as et: + params = shlex.split('-v0 -preserve -overwrite_original -api largefilesupport=1 "-EXIF:DateTimeOriginal+=1:2:3 4:5:6" -XMP:DateTimeOriginal="2006:05:04 03:02:01" -gpsaltituderef="Above Sea Level" -make= test.mov') + print(params) + # Output: ['-v0', '-preserve', '-overwrite_original', '-api', 'largefilesupport=1', '-api', 'QuickTimeUTC=1', '-EXIF:DateTimeOriginal+=1:2:3 4:5:6', '-XMP:DateTimeOriginal=2006:05:04 03:02:01', '-gpsaltituderef=Above Sea Level', '-make=', 'test.mov'] + et.execute(*params) + + .. note:: + + ``shlex.split()`` is a useful *tool to simplify discovery* of the correct arguments needed to call PyExifTool. + + However, since spliting and constructing immutable strings in Python is **slower than building the parameter list properly**, this method is *only recommended for* **debugging**! + + +.. _`Python's shlex library`: https://docs.python.org/library/shlex.html + +.. _set_json_loads faq: + +PyExifTool json turns some text fields into numbers +=================================================== + +A strange behavior of *exiftool* is documented in the `exiftool documentation`_:: + + -j[[+]=JSONFILE] (-json) + + Note that ExifTool quotes JSON values only if they don't look like numbers + (regardless of the original storage format or the relevant metadata specification). + +.. _`exiftool documentation`: https://exiftool.org/exiftool_pod.html#OPTIONS + +This causes a peculiar behavior if you set a text metadata field to a string that looks like a number: + +.. code-block:: + + import exiftool + with exiftool.ExifToolHelper() as et: + # Comment is a STRING field + et.set_tags("rose.jpg", {"Comment": "1.10"}) # string: "1.10" != "1.1" + + # FocalLength is a FLOAT field + et.set_tags("rose.jpg", {"FocalLength": 1.10}) # float: 1.10 == 1.1 + print(et.get_tags("rose.jpg", ["Comment", "FocalLength"])) + + # Prints: [{'SourceFile': 'rose.jpg', 'File:Comment': 1.1, 'EXIF:FocalLength': 1.1}] + +Workaround to enable output as string +------------------------------------- + +There is no universal fix which wouldn't affect other behaviors in PyExifTool, so this is an advanced workaround if you encounter this specific problem. + +PyExifTool does not do any processing on the fields returned by *exiftool*. In effect, what is returned is processed directly by ``json.loads()`` by default. + +You can change the behavior of the json string parser, or specify a different one using :py:meth:`exiftool.ExifTool.set_json_loads`. + +The `documentation of CPython's json.load`_ allows ``parse_float`` to be any parser of choice when a float is encountered in a JSON file. Thus, you can force the float to be interpreted as a string. +However, as you can see below, it also *changes the behavior of all float fields*. + + +.. _`documentation of CPython's json.load`: https://docs.python.org/3/library/json.html#json.load + +.. code-block:: + + import exiftool, json + with exiftool.ExifToolHelper() as et: + et.set_json_loads(json.loads, parse_float=str) + + # Comment is a STRING field + et.set_tags("rose.jpg", {"Comment": "1.10"}) # string: "1.10" == "1.10" + + # FocalLength is a FLOAT field + et.set_tags("rose.jpg", {"FocalLength": 1.10}) # float: 1.1 != "1.1" + print(et.get_tags("rose.jpg", ["Comment", "FocalLength"])) + + # Prints: [{'SourceFile': 'rose.jpg', 'File:Comment': '1.10', 'EXIF:FocalLength': '1.1'}] + +.. warning:: + + Unfortunately you can either change all float fields to a string, or possibly lose some float precision when working with floats in string metadata fields. + + There isn't any known universal workaround which wouldn't break one thing or the other, as it is an underlying *exiftool* quirk. + +There are other edge cases which may exhibit quirky behavior when storing numbers and whitespace only to text fields (See `test cases related to numeric tags`_). Since PyExifTool cannot accommodate all possible edge cases, +this workaround will allow you to configure PyExifTool to work in your environment! + +.. _`test cases related to numeric tags`: https://github.com/sylikc/pyexiftool/blob/master/tests/test_helper_tags_float.py + + +I would like to use a faster json string parser +=============================================== + +By default, PyExifTool uses the built-in ``json`` library to load the json string returned by *exiftool*. If you would like to use an alternate library, set it manually using :py:meth:`exiftool.ExifTool.set_json_loads` + + +.. code-block:: + + import exiftool, json + with exiftool.ExifToolHelper() as et: + et.set_json_loads(ujson.loads) + ... + +.. note:: + + In PyExifTool version before 0.5.6, ``ujson`` was supported automatically if the package was installed. + + To support any possible alternative JSON library, this behavior has now been changed and it must be enabled manually. + + +I'm getting an error! How do I debug PyExifTool output? +======================================================= + +To assist debugging, ExifTool has a ``logger`` in the constructor :py:meth:`exiftool.ExifTool.__init__`. You can also specify the logger after constructing the object by using the :py:attr:`exiftool.ExifTool.logger` property. + +First construct the logger object. The example below using the most common way to construct using ``getLogger(__name__)``. See more examples on `Python logging - Advanced Logging Tutorial`_ + + +.. _`Python logging - Advanced Logging Tutorial`: https://docs.python.org/3/howto/logging.html#advanced-logging-tutorial + +Example usage: + +.. code-block:: + + import logging + import exiftool + + logging.basicConfig(level=logging.DEBUG) + with exiftool.ExifToolHelper(logger=logging.getLogger(__name__)) as et: + et.execute("missingfile.jpg",) + diff --git a/docs/source/index.rst b/docs/source/index.rst new file mode 100644 index 0000000..2dbf647 --- /dev/null +++ b/docs/source/index.rst @@ -0,0 +1,34 @@ +.. PyExifTool documentation master file, created by + sphinx-quickstart on Thu Apr 12 17:42:54 2012. + +PyExifTool -- Python wrapper for Phil Harvey's ExifTool +======================================================= + +.. include:: ../../README.rst + :start-after: BLURB_START + :end-before: BLURB_END + +.. toctree:: + :maxdepth: 2 + :glob: + :caption: Contents: + + intro + package + installation + examples + reference/* + FAQ + autoapi/* + Source code on GitHub + + +.. maintenance/* +.. not public at the moment, at least it doesn't have to be in the TOC (adds unnecessary clutter) + + +Indices and tables +================== +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` diff --git a/docs/source/installation.rst b/docs/source/installation.rst new file mode 100644 index 0000000..b987407 --- /dev/null +++ b/docs/source/installation.rst @@ -0,0 +1,7 @@ +************ +Installation +************ + +.. include:: ../../README.rst + :start-after: INSTALLATION_START + :end-before: INSTALLATION_END diff --git a/docs/source/intro.rst b/docs/source/intro.rst new file mode 100644 index 0000000..681d6af --- /dev/null +++ b/docs/source/intro.rst @@ -0,0 +1,49 @@ +************ +Introduction +************ + +.. _introduction: + +.. include:: ../../README.rst + :start-after: DESCRIPTION_START + :end-before: DESCRIPTION_END + +Concepts +======== + +As noted in the :ref:`introduction `, PyExifTool is used to **communicate** with an instance of the external ExifTool process. + +.. note:: + + PyExifTool cannot do what ExifTool does not do. If you're not yet familiar with the capabilities of PH's ExifTool, please head over to `ExifTool by Phil Harvey`_ homepage and read up on how to use it, and what it's capable of. + +.. _ExifTool by Phil Harvey: https://exiftool.org/ + +What PyExifTool Is +------------------ + +* ... is a wrapper for PH's Exiftool, hence it can do everything PH's ExifTool can do. +* ... is a library which adds some helper functionality around ExifTool to make it easier to work with in Python. +* ... is extensible and you can add functionality on top of the base class for your use case. +* ... is supported on any platform which PH's ExifTool runs + +What PyExifTool Is NOT +---------------------- + +* ... is NOT a direct subtitute for Phil Harvey's ExifTool. The `exiftool` executable must still be installed and available for PyExifTool to use. +* ... is NOT a library which does direct image manipulation (ex. Python Pillow_). + +.. _Pillow: https://pillow.readthedocs.io/en/stable/ + +Nomenclature +============ + +PyExifTool's namespace is *exiftool*. Since library name the same name of the tool it's meant to interface with, it can cause some ambiguity when describing it in docs. +Hence, here's some common nomenclature used. + +Because the term `exiftool` is overloaded (lowercase, CapWords case, ...) and can mean several things: + +* `PH's ExifTool` = Phil Harvey's ExifTool +* ``ExifTool`` in context usually implies ``exiftool.ExifTool`` +* `exiftool` when used alone almost always refers to `PH's ExifTool`'s command line executable. (While Windows is supported with `exiftool.exe` the Linux nomenclature is used throughout the docs) + diff --git a/docs/source/maintenance/release-process.rst b/docs/source/maintenance/release-process.rst new file mode 100644 index 0000000..d98d1d7 --- /dev/null +++ b/docs/source/maintenance/release-process.rst @@ -0,0 +1,82 @@ +*************** +Release Process +*************** + +This page documents the steps to be taken to release a new version of PyExifTool. + + +Source Preparation +================== + +#. Update the version number in ``exiftool/__init__.py`` +#. Update the docs copyright year ``docs/source/conf.py`` and in source files +#. Add any changelog entries to ``CHANGELOG.md`` +#. Run Tests +#. Generate docs +#. Commit and push the changes. +#. Check that the tests passed on GitHub. + + +Pre-Requisites +============== + +Make sure the latest packages are installed. + +#. pip: ``python -m pip install --upgrade pip`` +#. build tools: ``python -m pip install --upgrade setuptools build`` +#. for uploading to PyPI: ``python -m pip install --upgrade twine`` + +Run Tests +========= + +#. Run in standard unittest: ``python -m unittest -v`` +#. Run in PyTest: ``scripts\pytest.bat`` + +Build and Check +=============== + +#. Build package: ``python -m build`` +#. `Validating reStructuredText markup`_: ``python -m twine check dist/*`` + +.. _Validating reStructuredText markup: https://packaging.python.org/guides/making-a-pypi-friendly-readme/#validating-restructuredtext-markup + +Upload to Test PyPI +=================== + +Set up the ``$HOME/.pypirc`` (Linux) or ``%UserProfile%\.pypirc`` (Windows) + +#. ``python -m twine upload --repository testpypi dist/*`` +#. Check package uploaded properly: `TestPyPI PyExifTool`_ +#. Create a temporary venv to test PyPI and run tests + + #. ``python -m venv tmp`` + #. Activate venv + #. ``python -m pip install -U -i https://test.pypi.org/simple/ PyExifTool`` + + * If there is an error with SSL verification, just trust it: ``python -m pip install --trusted-host test-files.pythonhosted.org -U -i https://test.pypi.org/simple/ PyExifTool`` + * If you want to test a specific version, can specify as ``PyExifTool==``, otherwise it installs the latest by default + + #. Make sure exiftool is found on PATH + #. Run tests: ``python -m pytext -v `` + +#. Examine files installed to make sure it looks ok + +#. Cleanup: ``python -m pip uninstall PyExifTool``, then delete temp venv + + +.. _`TestPyPI PyExifTool`: https://test.pypi.org/project/PyExifTool/#history + +Release +======= + +#. Be very sure all the tests pass and the package is good, because `PyPI does not allow for a filename to be reused`_ +#. Release to production PyPI: ``python -m twine upload dist/*`` +#. If needed, create a tag, and a GitHub release with the *whl* file + + .. code-block:: bash + + git tag -a vX.X.X + git push --tags + +.. _PyPI does not allow for a filename to be reused: https://pypi.org/help/#file-name-reuse + diff --git a/docs/source/package.rst b/docs/source/package.rst new file mode 100644 index 0000000..feadb0b --- /dev/null +++ b/docs/source/package.rst @@ -0,0 +1,34 @@ +**************** +Package Overview +**************** + +All classes live under the PyExifTool library namespace: ``exiftool`` + +Design +====== + +.. include:: ../../README.rst + :start-after: DESIGN_INFO_START + :end-before: DESIGN_INFO_END + +.. inheritance-diagram:: exiftool.ExifToolAlpha + +.. include:: ../../README.rst + :start-after: DESIGN_CLASS_START + :end-before: DESIGN_CLASS_END + + +Fork Origins / Brief History +============================ + +.. include:: ../../README.rst + :start-after: HISTORY_START + :end-before: HISTORY_END + + +License +======= + +.. include:: ../../README.rst + :start-after: LICENSE_START + :end-before: LICENSE_END diff --git a/docs/source/reference/1-exiftool.rst b/docs/source/reference/1-exiftool.rst new file mode 100644 index 0000000..2e1cbed --- /dev/null +++ b/docs/source/reference/1-exiftool.rst @@ -0,0 +1,14 @@ +*********************** +Class exiftool.ExifTool +*********************** + +.. inheritance-diagram:: exiftool.ExifTool + +.. autoapimodule:: exiftool.ExifTool + :members: + :undoc-members: + :special-members: __init__ + :show-inheritance: + +.. :private-members: +.. currently excluding private members diff --git a/docs/source/reference/2-helper.rst b/docs/source/reference/2-helper.rst new file mode 100644 index 0000000..4acf455 --- /dev/null +++ b/docs/source/reference/2-helper.rst @@ -0,0 +1,11 @@ +***************************** +Class exiftool.ExifToolHelper +***************************** + +.. inheritance-diagram:: exiftool.ExifToolHelper + +.. autoapimodule:: exiftool.ExifToolHelper + :members: + :undoc-members: + :special-members: __init__ + :show-inheritance: diff --git a/docs/source/reference/3-alpha.rst b/docs/source/reference/3-alpha.rst new file mode 100644 index 0000000..00da1a9 --- /dev/null +++ b/docs/source/reference/3-alpha.rst @@ -0,0 +1,11 @@ +**************************** +Class exiftool.ExifToolAlpha +**************************** + +.. inheritance-diagram:: exiftool.ExifToolAlpha + +.. autoapimodule:: exiftool.ExifToolAlpha + :members: + :undoc-members: + :special-members: __init__ + :show-inheritance: diff --git a/exiftool.py b/exiftool.py deleted file mode 100644 index 8a11daa..0000000 --- a/exiftool.py +++ /dev/null @@ -1,325 +0,0 @@ -# -*- coding: utf-8 -*- -# PyExifTool -# Copyright 2012 Sven Marnach - -# This file is part of PyExifTool. -# -# PyExifTool is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the licence, or -# (at your option) any later version, or the BSD licence. -# -# PyExifTool is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -# -# See COPYING.GPL or COPYING.BSD for more details. - -""" -PyExifTool is a Python library to communicate with an instance of Phil -Harvey's excellent ExifTool_ command-line application. The library -provides the class :py:class:`ExifTool` that runs the command-line -tool in batch mode and features methods to send commands to that -program, including methods to extract meta-information from one or -more image files. Since ``exiftool`` is run in batch mode, only a -single instance needs to be launched and can be reused for many -queries. This is much more efficient than launching a separate -process for every single query. - -.. _ExifTool: http://www.sno.phy.queensu.ca/~phil/exiftool/ - -The source code can be checked out from the github repository with - -:: - - git clone git://github.com/smarnach/pyexiftool.git - -Alternatively, you can download a tarball_. There haven't been any -releases yet. - -.. _tarball: https://github.com/smarnach/pyexiftool/tarball/master - -PyExifTool is licenced under GNU GPL version 3 or later. - -Example usage:: - - import exiftool - - files = ["a.jpg", "b.png", "c.tif"] - with exiftool.ExifTool() as et: - metadata = et.get_metadata_batch(files) - for d in metadata: - print("{:20.20} {:20.20}".format(d["SourceFile"], - d["EXIF:DateTimeOriginal"])) -""" - -from __future__ import unicode_literals - -import sys -import subprocess -import os -import json -import warnings -import codecs - -try: # Py3k compatibility - basestring -except NameError: - basestring = (bytes, str) - -executable = "exiftool" -"""The name of the executable to run. - -If the executable is not located in one of the paths listed in the -``PATH`` environment variable, the full path should be given here. -""" - -# Sentinel indicating the end of the output of a sequence of commands. -# The standard value should be fine. -sentinel = b"{ready}" - -# The block size when reading from exiftool. The standard value -# should be fine, though other values might give better performance in -# some cases. -block_size = 4096 - -# This code has been adapted from Lib/os.py in the Python source tree -# (sha1 265e36e277f3) -def _fscodec(): - encoding = sys.getfilesystemencoding() - errors = "strict" - if encoding != "mbcs": - try: - codecs.lookup_error("surrogateescape") - except LookupError: - pass - else: - errors = "surrogateescape" - - def fsencode(filename): - """ - Encode filename to the filesystem encoding with 'surrogateescape' error - handler, return bytes unchanged. On Windows, use 'strict' error handler if - the file system encoding is 'mbcs' (which is the default encoding). - """ - if isinstance(filename, bytes): - return filename - else: - return filename.encode(encoding, errors) - - return fsencode - -fsencode = _fscodec() -del _fscodec - -class ExifTool(object): - """Run the `exiftool` command-line tool and communicate to it. - - You can pass the file name of the ``exiftool`` executable as an - argument to the constructor. The default value ``exiftool`` will - only work if the executable is in your ``PATH``. - - Most methods of this class are only available after calling - :py:meth:`start()`, which will actually launch the subprocess. To - avoid leaving the subprocess running, make sure to call - :py:meth:`terminate()` method when finished using the instance. - This method will also be implicitly called when the instance is - garbage collected, but there are circumstance when this won't ever - happen, so you should not rely on the implicit process - termination. Subprocesses won't be automatically terminated if - the parent process exits, so a leaked subprocess will stay around - until manually killed. - - A convenient way to make sure that the subprocess is terminated is - to use the :py:class:`ExifTool` instance as a context manager:: - - with ExifTool() as et: - ... - - .. warning:: Note that there is no error handling. Nonsensical - options will be silently ignored by exiftool, so there's not - much that can be done in that regard. You should avoid passing - non-existent files to any of the methods, since this will lead - to undefied behaviour. - - .. py:attribute:: running - - A Boolean value indicating whether this instance is currently - associated with a running subprocess. - """ - - def __init__(self, executable_=None): - if executable_ is None: - self.executable = executable - else: - self.executable = executable_ - self.running = False - - def start(self): - """Start an ``exiftool`` process in batch mode for this instance. - - This method will issue a ``UserWarning`` if the subprocess is - already running. The process is started with the ``-G`` and - ``-n`` as common arguments, which are automatically included - in every command you run with :py:meth:`execute()`. - """ - if self.running: - warnings.warn("ExifTool already running; doing nothing.") - return - with open(os.devnull, "w") as devnull: - self._process = subprocess.Popen( - [self.executable, "-stay_open", "True", "-@", "-", - "-common_args", "-G", "-n"], - stdin=subprocess.PIPE, stdout=subprocess.PIPE, - stderr=devnull) - self.running = True - - def terminate(self): - """Terminate the ``exiftool`` process of this instance. - - If the subprocess isn't running, this method will do nothing. - """ - if not self.running: - return - self._process.stdin.write(b"-stay_open\nFalse\n") - self._process.stdin.flush() - self._process.communicate() - del self._process - self.running = False - - def __enter__(self): - self.start() - return self - - def __exit__(self, exc_type, exc_val, exc_tb): - self.terminate() - - def __del__(self): - self.terminate() - - def execute(self, *params): - """Execute the given batch of parameters with ``exiftool``. - - This method accepts any number of parameters and sends them to - the attached ``exiftool`` process. The process must be - running, otherwise ``ValueError`` is raised. The final - ``-execute`` necessary to actually run the batch is appended - automatically; see the documentation of :py:meth:`start()` for - the common options. The ``exiftool`` output is read up to the - end-of-output sentinel and returned as a raw ``bytes`` object, - excluding the sentinel. - - The parameters must also be raw ``bytes``, in whatever - encoding exiftool accepts. For filenames, this should be the - system's filesystem encoding. - - .. note:: This is considered a low-level method, and should - rarely be needed by application developers. - """ - if not self.running: - raise ValueError("ExifTool instance not running.") - self._process.stdin.write(b"\n".join(params + (b"-execute\n",))) - self._process.stdin.flush() - output = b"" - fd = self._process.stdout.fileno() - while not output[-32:].strip().endswith(sentinel): - output += os.read(fd, block_size) - return output.strip()[:-len(sentinel)] - - def execute_json(self, *params): - """Execute the given batch of parameters and parse the JSON output. - - This method is similar to :py:meth:`execute()`. It - automatically adds the parameter ``-j`` to request JSON output - from ``exiftool`` and parses the output. The return value is - a list of dictionaries, mapping tag names to the corresponding - values. All keys are Unicode strings with the tag names - including the ExifTool group name in the format :. - The values can have multiple types. All strings occurring as - values will be Unicode strings. Each dictionary contains the - name of the file it corresponds to in the key ``"SourceFile"``. - - The parameters to this function must be either raw strings - (type ``str`` in Python 2.x, type ``bytes`` in Python 3.x) or - Unicode strings (type ``unicode`` in Python 2.x, type ``str`` - in Python 3.x). Unicode strings will be encoded using - system's filesystem encoding. This behaviour means you can - pass in filenames according to the convention of the - respective Python version – as raw strings in Python 2.x and - as Unicode strings in Python 3.x. - """ - params = map(fsencode, params) - return json.loads(self.execute(b"-j", *params).decode("utf-8")) - - def get_metadata_batch(self, filenames): - """Return all meta-data for the given files. - - The return value will have the format described in the - documentation of :py:meth:`execute_json()`. - """ - return self.execute_json(*filenames) - - def get_metadata(self, filename): - """Return meta-data for a single file. - - The returned dictionary has the format described in the - documentation of :py:meth:`execute_json()`. - """ - return self.execute_json(filename)[0] - - def get_tags_batch(self, tags, filenames): - """Return only specified tags for the given files. - - The first argument is an iterable of tags. The tag names may - include group names, as usual in the format :. - - The second argument is an iterable of file names. - - The format of the return value is the same as for - :py:meth:`execute_json()`. - """ - # Explicitly ruling out strings here because passing in a - # string would lead to strange and hard-to-find errors - if isinstance(tags, basestring): - raise TypeError("The argument 'tags' must be " - "an iterable of strings") - if isinstance(filenames, basestring): - raise TypeError("The argument 'filenames' must be " - "an iterable of strings") - params = ["-" + t for t in tags] - params.extend(filenames) - return self.execute_json(*params) - - def get_tags(self, tags, filename): - """Return only specified tags for a single file. - - The returned dictionary has the format described in the - documentation of :py:meth:`execute_json()`. - """ - return self.get_tags_batch(tags, [filename])[0] - - def get_tag_batch(self, tag, filenames): - """Extract a single tag from the given files. - - The first argument is a single tag name, as usual in the - format :. - - The second argument is an iterable of file names. - - The return value is a list of tag values or ``None`` for - non-existent tags, in the same order as ``filenames``. - """ - data = self.get_tags_batch([tag], filenames) - result = [] - for d in data: - d.pop("SourceFile") - result.append(next(iter(d.values()), None)) - return result - - def get_tag(self, tag, filename): - """Extract a single tag from a single file. - - The return value is the value of the specified tag, or - ``None`` if this tag was not found in the file. - """ - return self.get_tag_batch(tag, [filename])[0] diff --git a/exiftool/__init__.py b/exiftool/__init__.py new file mode 100644 index 0000000..bba1aa4 --- /dev/null +++ b/exiftool/__init__.py @@ -0,0 +1,79 @@ +# -*- coding: utf-8 -*- +# +# This file is part of PyExifTool. +# +# PyExifTool +# +# Copyright 2019-2023 Kevin M (sylikc) +# Copyright 2012-2014 Sven Marnach +# +# Community contributors are listed in the CHANGELOG.md for the PRs +# +# PyExifTool is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the licence, or +# (at your option) any later version, or the BSD licence. +# +# PyExifTool is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See COPYING.GPL or COPYING.BSD for more details. + +""" +PyExifTool is a Python library to communicate with an instance of Phil +Harvey's excellent ExifTool_ command-line application. The library +provides the class :py:class:`ExifTool` that runs the command-line +tool in batch mode and features methods to send commands to that +program, including methods to extract meta-information from one or +more image files. Since ``exiftool`` is run in batch mode, only a +single instance needs to be launched and can be reused for many +queries. This is much more efficient than launching a separate +process for every single query. + +.. _ExifTool: https://exiftool.org + +The source code can be checked out from the github repository with + +:: + + git clone git://github.com/sylikc/pyexiftool.git + +Alternatively, you can download a tarball_. There haven't been any +releases yet. + +.. _tarball: https://github.com/sylikc/pyexiftool/tarball/master + +PyExifTool is licenced under GNU GPL version 3 or later, or BSD license. + +Example usage:: + + import exiftool + + files = ["a.jpg", "b.png", "c.tif"] + with exiftool.ExifToolHelper() as et: + metadata = et.get_metadata(files) + for d in metadata: + print("{:20.20} {:20.20}".format(d["SourceFile"], + d["EXIF:DateTimeOriginal"])) + +""" + +# version number using Semantic Versioning 2.0.0 https://semver.org/ +# may not be PEP-440 compliant https://www.python.org/dev/peps/pep-0440/#semantic-versioning +__version__ = "0.5.6" + + +# while we COULD import all the exceptions into the base library namespace, +# it's best that it lives as exiftool.exceptions, to not pollute the base namespace +from . import exceptions + + +# make all of the original exiftool stuff available in this namespace +from .exiftool import ExifTool +from .helper import ExifToolHelper +from .experimental import ExifToolAlpha + +# an old feature of the original class that exposed this variable at the library level +# TODO may remove and deprecate at a later time +#from .constants import DEFAULT_EXECUTABLE diff --git a/exiftool/constants.py b/exiftool/constants.py new file mode 100644 index 0000000..1ac65c2 --- /dev/null +++ b/exiftool/constants.py @@ -0,0 +1,107 @@ +# -*- coding: utf-8 -*- +# +# This file is part of PyExifTool. +# +# PyExifTool +# +# Copyright 2019-2023 Kevin M (sylikc) +# Copyright 2012-2014 Sven Marnach +# +# Community contributors are listed in the CHANGELOG.md for the PRs +# +# PyExifTool is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the licence, or +# (at your option) any later version, or the BSD licence. +# +# PyExifTool is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See COPYING.GPL or COPYING.BSD for more details. + +""" + +This submodule defines constants which are used by other modules in the package + +""" + +import sys + + +################################## +############# HELPERS ############ +################################## + +# instead of comparing everywhere sys.platform, do it all here in the constants (less typo chances) +# True if Windows +PLATFORM_WINDOWS: bool = (sys.platform == 'win32') +"""sys.platform check, set to True if Windows""" + +# Prior to Python 3.3, the value for any Linux version is always linux2; after, it is linux. +# https://stackoverflow.com/a/13874620/15384838 +PLATFORM_LINUX: bool = (sys.platform == 'linux' or sys.platform == 'linux2') +"""sys.platform check, set to True if Linux""" + + + +################################## +####### PLATFORM DEFAULTS ######## +################################## + + +# specify the extension so exiftool doesn't default to running "exiftool.py" on windows (which could happen) +DEFAULT_EXECUTABLE: str = "exiftool.exe" if PLATFORM_WINDOWS else "exiftool" +"""The name of the default executable to run. + +``exiftool.exe`` (Windows) or ``exiftool`` (Linux/Mac/non-Windows platforms) + +By default, the executable is searched for on one of the paths listed in the +``PATH`` environment variable. If it's not on the ``PATH``, a full path should be specified in the +``executable`` argument of the ExifTool constructor (:py:meth:`exiftool.ExifTool.__init__`). +""" + +""" +# flipped the if/else so that the sphinx documentation shows "exiftool" rather than "exiftool.exe" +if not PLATFORM_WINDOWS: # pytest-cov:windows: no cover + DEFAULT_EXECUTABLE = "exiftool" +else: + DEFAULT_EXECUTABLE = "exiftool.exe" +""" + + +################################## +####### STARTUP CONSTANTS ######## +################################## + +# for Windows STARTUPINFO +SW_FORCEMINIMIZE: int = 11 +"""Windows ShowWindow constant from win32con + +Indicates the launched process window should start minimized +""" + +# for Linux preexec_fn +PR_SET_PDEATHSIG: int = 1 +"""Extracted from linux/prctl.h + +Allows a kill signal to be sent to child processes when the parent unexpectedly dies +""" + + + +################################## +######## GLOBAL DEFAULTS ######### +################################## + +DEFAULT_BLOCK_SIZE: int = 4096 +"""The default block size when reading from exiftool. The standard value +should be fine, though other values might give better performance in +some cases.""" + +EXIFTOOL_MINIMUM_VERSION: str = "12.15" +"""this is the minimum *exiftool* version required for current version of PyExifTool + +* 8.40 / 8.60 (production): implemented the -stay_open flag +* 12.10 / 12.15 (production): implemented exit status on -echo4 +""" diff --git a/exiftool/exceptions.py b/exiftool/exceptions.py new file mode 100644 index 0000000..c3cc8b3 --- /dev/null +++ b/exiftool/exceptions.py @@ -0,0 +1,158 @@ +# -*- coding: utf-8 -*- +# +# This file is part of PyExifTool. +# +# PyExifTool +# +# Copyright 2019-2023 Kevin M (sylikc) +# Copyright 2012-2014 Sven Marnach +# +# Community contributors are listed in the CHANGELOG.md for the PRs +# +# PyExifTool is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the licence, or +# (at your option) any later version, or the BSD licence. +# +# PyExifTool is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See COPYING.GPL or COPYING.BSD for more details. + +""" + +This submodule holds all of the custom exceptions which can be raised by PyExifTool + +""" + + +######################################################## +#################### Base Exception #################### +######################################################## + + +class ExifToolException(Exception): + """ + Generic Base class for all ExifTool error classes + """ + + +############################################################# +#################### Process State Error #################### +############################################################# + + +class ExifToolProcessStateError(ExifToolException): + """ + Base class for all errors related to the invalid state of `exiftool` subprocess + """ + + +class ExifToolRunning(ExifToolProcessStateError): + """ + ExifTool is already running + """ + def __init__(self, message: str): + super().__init__(f"ExifTool instance is running: {message}") + + +class ExifToolNotRunning(ExifToolProcessStateError): + """ + ExifTool is not running + """ + def __init__(self, message: str): + super().__init__(f"ExifTool instance not running: {message}") + + +########################################################### +#################### Execute Exception #################### +########################################################### + +# all of these exceptions are related to something regarding execute + +class ExifToolExecuteException(ExifToolException): + """ + This is the base exception class for all execute() associated errors. + + This exception is never returned directly from any method, but provides common interface for subclassed errors. + + (mimics the signature of :py:class:`subprocess.CalledProcessError`) + + :attribute cmd: Parameters sent to *exiftool* which raised the error + :attribute returncode: Exit Status (Return code) of the ``execute()`` command which raised the error + :attribute stdout: STDOUT stream returned by the command which raised the error + :attribute stderr: STDERR stream returned by the command which raised the error + """ + def __init__(self, message, exit_status, cmd_stdout, cmd_stderr, params): + super().__init__(message) + + self.returncode: int = exit_status + self.cmd: list = params + self.stdout: str = cmd_stdout + self.stderr: str = cmd_stderr + + +class ExifToolExecuteError(ExifToolExecuteException): + """ + ExifTool executed the command but returned a non-zero exit status. + + .. note:: + There is a similarly named :py:exc:`ExifToolExecuteException` which this Error inherits from. + + That is a base class and never returned directly. This is what is raised. + """ + def __init__(self, exit_status, cmd_stdout, cmd_stderr, params): + super().__init__(f"execute returned a non-zero exit status: {exit_status}", exit_status, cmd_stdout, cmd_stderr, params) + + +######################################################## +#################### JSON Exception #################### +######################################################## + + +class ExifToolOutputEmptyError(ExifToolExecuteException): + """ + ExifTool execute_json() expected output, but execute() did not return any output on stdout + + This is an error, because if you expect no output, don't use execute_json() + + .. note:: + Only thrown by execute_json() + """ + def __init__(self, exit_status, cmd_stdout, cmd_stderr, params): + super().__init__("execute_json expected output on stdout but got none", exit_status, cmd_stdout, cmd_stderr, params) + + +class ExifToolJSONInvalidError(ExifToolExecuteException): + """ + ExifTool execute_json() expected valid JSON to be returned, but got invalid JSON. + + This is an error, because if you expect non-JSON output, don't use execute_json() + + .. note:: + Only thrown by execute_json() + """ + def __init__(self, exit_status, cmd_stdout, cmd_stderr, params): + super().__init__("execute_json received invalid JSON output from exiftool", exit_status, cmd_stdout, cmd_stderr, params) + + +######################################################### +#################### Other Exception #################### +######################################################### + +class ExifToolVersionError(ExifToolException): + """ + Generic Error to represent some version mismatch. + PyExifTool is coded to work with a range of exiftool versions. If the advanced params change in functionality and break PyExifTool, this error will be thrown + """ + + +class ExifToolTagNameError(ExifToolException): + """ + ExifToolHelper found an invalid tag name + + This error is raised when :py:attr:`exiftool.ExifToolHelper.check_tag_names` is enabled, and a bad tag is provided to a method + """ + def __init__(self, bad_tag): + super().__init__(f"Invalid Tag Name found: \"{bad_tag}\"") diff --git a/exiftool/exiftool.py b/exiftool/exiftool.py new file mode 100644 index 0000000..ca0f304 --- /dev/null +++ b/exiftool/exiftool.py @@ -0,0 +1,1308 @@ +# -*- coding: utf-8 -*- +# +# This file is part of PyExifTool. +# +# PyExifTool +# +# Copyright 2019-2023 Kevin M (sylikc) +# Copyright 2012-2014 Sven Marnach +# +# Community contributors are listed in the CHANGELOG.md for the PRs +# +# PyExifTool is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the licence, or +# (at your option) any later version, or the BSD licence. +# +# PyExifTool is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See COPYING.GPL or COPYING.BSD for more details. + + +""" +This submodule contains the core ``ExifTool`` class of PyExifTool + +.. note:: + :py:class:`exiftool.helper.ExifTool` class of this submodule is available in the ``exiftool`` namespace as :py:class:`exiftool.ExifTool` + +""" + +# ---------- standard Python imports ---------- +import select +import subprocess +import os +import shutil +from pathlib import Path # requires Python 3.4+ +import random +import locale +import warnings +import json # NOTE: to use other json libraries (simplejson/ujson/orjson/...), see :py:meth:`set_json_loads()` + +# for the pdeathsig +import signal +import ctypes + + + +# ---------- Typing Imports ---------- +# for static analysis / type checking - Python 3.5+ +from collections.abc import Callable +from typing import Optional, List, Union + + + +# ---------- Library Package Imports ---------- + +from . import constants +from .exceptions import ExifToolVersionError, ExifToolRunning, ExifToolNotRunning, ExifToolOutputEmptyError, ExifToolJSONInvalidError + + +# ====================================================================================================================== + + +# constants to make typos obsolete! +ENCODING_UTF8: str = "utf-8" +#ENCODING_LATIN1: str = "latin-1" + + +# ====================================================================================================================== + +def _set_pdeathsig(sig) -> Optional[Callable]: + """ + Use this method in subprocess.Popen(preexec_fn=set_pdeathsig()) to make sure, + the exiftool childprocess is stopped if this process dies. + However, this only works on linux. + """ + if constants.PLATFORM_LINUX: + def callable_method(): + libc = ctypes.CDLL("libc.so.6") + return libc.prctl(constants.PR_SET_PDEATHSIG, sig) + + return callable_method + else: + return None # pragma: no cover + + +# ====================================================================================================================== + +def _get_buffer_end(buffer_list: List[bytes], bytes_needed: int) -> bytes: + """ Given a list of bytes objects, return the equivalent of + b"".join(buffer_list)[-bytes_needed:] + but without having to concatenate the entire list. + """ + if bytes_needed < 1: + return b"" # pragma: no cover + + buf_chunks = [] + for buf in reversed(buffer_list): + buf_tail = buf[-bytes_needed:] + buf_chunks.append(buf_tail) + bytes_needed -= len(buf_tail) + if bytes_needed <= 0: + break + + buf_tail_joined = b"".join(reversed(buf_chunks)) + return buf_tail_joined + + +def _read_fd_endswith(fd, b_endswith: bytes, block_size: int) -> bytes: + """ read an fd and keep reading until it endswith the seq_ends + + this allows a consolidated read function that is platform indepdent + + if you're not careful, on windows, this will block + """ + output_list: List[bytes] = [] + + # if we're only looking at the last few bytes, make it meaningful. 4 is max size of \r\n? (or 2) + # this value can be bigger to capture more bytes at the "tail" of the read, but if it's too small, the whitespace might miss the detection + endswith_count = len(b_endswith) + 4 + + # I believe doing a splice, then a strip is more efficient in memory hence the original code did it this way. + # need to benchmark to see if in large strings, strip()[-endswithcount:] is more expensive or not + while not _get_buffer_end(output_list, endswith_count).strip().endswith(b_endswith): + if constants.PLATFORM_WINDOWS: + # windows does not support select() for anything except sockets + # https://docs.python.org/3.7/library/select.html + output_list.append(os.read(fd, block_size)) + else: # pytest-cov:windows: no cover + # this does NOT work on windows... and it may not work on other systems... in that case, put more things to use the original code above + inputready, outputready, exceptready = select.select([fd], [], []) + for i in inputready: + if i == fd: + output_list.append(os.read(fd, block_size)) + + return b"".join(output_list) + + + + + + +# ====================================================================================================================== + +class ExifTool(object): + """Run the `exiftool` command-line tool and communicate with it. + + Use ``common_args`` to enable/disable print conversion by specifying/omitting ``-n``, respectively. + This determines whether exiftool should perform print conversion, + which prints values in a human-readable way but + may be slower. If print conversion is enabled, appending ``#`` to a tag + name disables the print conversion for this particular tag. + See `Exiftool print conversion FAQ`_ for more details. + + .. _Exiftool print conversion FAQ: https://exiftool.org/faq.html#Q6 + + + Some methods of this class are only available after calling + :py:meth:`run()`, which will actually launch the *exiftool* subprocess. + To avoid leaving the subprocess running, make sure to call + :py:meth:`terminate()` method when finished using the instance. + This method will also be implicitly called when the instance is + garbage collected, but there are circumstance when this won't ever + happen, so you should not rely on the implicit process + termination. Subprocesses won't be automatically terminated if + the parent process exits, so a leaked subprocess will stay around + until manually killed. + + A convenient way to make sure that the subprocess is terminated is + to use the :py:class:`ExifTool` instance as a context manager:: + + with ExifTool() as et: + ... + + .. warning:: + Note that options and parameters are not checked. There is no error handling or validation of options passed to *exiftool*. + + Nonsensical options are mostly silently ignored by exiftool, so there's not + much that can be done in that regard. You should avoid passing + non-existent files to any of the methods, since this will lead + to undefined behaviour. + + """ + + ############################################################################## + #################################### INIT #################################### + ############################################################################## + + # ---------------------------------------------------------------------------------------------------------------------- + + def __init__(self, + executable: Optional[str] = None, + common_args: Optional[List[str]] = ["-G", "-n"], + win_shell: bool = False, + config_file: Optional[Union[str, Path]] = None, + encoding: Optional[str] = None, + logger = None) -> None: + """ + + :param executable: Specify file name of the *exiftool* executable if it is in your ``PATH``. Otherwise, specify the full path to the ``exiftool`` executable. + + Passed directly into :py:attr:`executable` property. + + .. note:: + The default value :py:attr:`exiftool.constants.DEFAULT_EXECUTABLE` will only work if the executable is in your ``PATH``. + + :type executable: str, or None to use default + + + :param common_args: + Pass in additional parameters for the stay-open instance of exiftool. + + Defaults to ``["-G", "-n"]`` as this is the most common use case. + + * ``-G`` (groupName level 1 enabled) separates the output with *groupName:tag* to disambiguate same-named tags under different groups. + * ``-n`` (print conversion disabled) improves the speed and consistency of output, and is more machine-parsable + + Passed directly into :py:attr:`common_args` property. + + + .. note:: + Depending on your use case, there may be other useful grouping levels and options. Search `Phil Harvey's exiftool documentation`_ for **groupNames** and **groupHeadings** to get more info. + + + + .. _`Phil Harvey's exiftool documentation`: https://exiftool.org/exiftool_pod.html + + :type common_args: list of str, or None. + + :param bool win_shell: (Windows only) Minimizes the exiftool process. + + .. note:: + This parameter may be deprecated in the future + + :param config_file: + File path to ``-config`` parameter when starting exiftool process. + + Passed directly into :py:attr:`config_file` property. + :type config_file: str, Path, or None + + :param encoding: Specify encoding to be used when communicating with + exiftool process. By default, will use ``locale.getpreferredencoding()`` + + Passed directly into :py:attr:`encoding` property. + + :param logger: Set a custom logger to log status and debug messages to. + + Passed directly into :py:attr:`logger` property. + """ + + # --- default settings / declare member variables --- + self._running: bool = False # is it running? + """A Boolean value indicating whether this instance is currently + associated with a running subprocess.""" + self._win_shell: bool = win_shell # do you want to see the shell on Windows? + + self._process = None # this is set to the process to interact with when _running=True + self._ver: Optional[str] = None # this is set to be the exiftool -v -ver when running + + self._last_stdout: Optional[str] = None # previous output + self._last_stderr: Optional[str] = None # previous stderr + self._last_status: Optional[int] = None # previous exit status from exiftool (look up EXIT STATUS in exiftool documentation for more information) + + self._block_size: int = constants.DEFAULT_BLOCK_SIZE # set to default block size + + + # these are set via properties + self._executable: Union[str, Path] = constants.DEFAULT_EXECUTABLE # executable absolute path (default set to just the executable name, so it can't be None) + self._config_file: Optional[str] = None # config file that can only be set when exiftool is not running + self._common_args: Optional[List[str]] = None + self._logger = None + self._encoding: Optional[str] = None + self._json_loads: Callable = json.loads # variable points to the actual callable method + self._json_loads_kwargs: dict = {} # default optional params to pass into json.loads() call + + + + # --- run external library initialization code --- + random.seed(None) # initialize random number generator + + + + + # --- set variables via properties (which do the error checking) -- + + # set first, so that debug and info messages get logged + self.logger = logger + + # use the passed in parameter, or the default if not set + # error checking is done in the property.setter + self.executable = executable or constants.DEFAULT_EXECUTABLE + self.encoding = encoding + self.common_args = common_args + + # set the property, error checking happens in the property.setter + self.config_file = config_file + + + + + ####################################################################################### + #################################### MAGIC METHODS #################################### + ####################################################################################### + + # ---------------------------------------------------------------------------------------------------------------------- + + def __enter__(self): + self.run() + return self + + # ---------------------------------------------------------------------------------------------------------------------- + def __exit__(self, exc_type, exc_val, exc_tb) -> None: + if self.running: + self.terminate() + + # ---------------------------------------------------------------------------------------------------------------------- + def __del__(self) -> None: + if self.running: + # indicate that __del__ has been started - allows running alternate code path in terminate() + self.terminate(_del=True) + + + + + ######################################################################################## + #################################### PROPERTIES R/w #################################### + ######################################################################################## + + # ---------------------------------------------------------------------------------------------------------------------- + @property + def executable(self) -> Union[str, Path]: + """ + Path to *exiftool* executable. + + :getter: Returns current exiftool path + :setter: Specify just the executable name, or an absolute path to the executable. + If path given is not absolute, searches environment ``PATH``. + + .. note:: + Setting is only available when exiftool process is not running. + + :raises ExifToolRunning: If attempting to set while running (:py:attr:`running` == True) + :type: str, Path + """ + return self._executable + + @executable.setter + def executable(self, new_executable: Union[str, Path]) -> None: + # cannot set executable when process is running + if self.running: + raise ExifToolRunning("Cannot set new executable") + + abs_path: Optional[str] = None + + # in testing, shutil.which() will work if a complete path is given, + # but this isn't clear from documentation, so we explicitly check and + # don't search if path exists + if Path(new_executable).exists(): + abs_path = new_executable + else: + # Python 3.3+ required + abs_path = shutil.which(new_executable) + + if abs_path is None: + raise FileNotFoundError(f'"{new_executable}" is not found, on path or as absolute path') + + # absolute path is returned + self._executable = str(abs_path) + + if self._logger: self._logger.info(f"Property 'executable': set to \"{abs_path}\"") + + + # ---------------------------------------------------------------------------------------------------------------------- + @property + def encoding(self) -> Optional[str]: + """ + Encoding of Popen() communication with *exiftool* process. + + :getter: Returns current encoding setting + + :setter: Set a new encoding. + + * If *new_encoding* is None, will detect it from ``locale.getpreferredencoding(do_setlocale=False)`` (do_setlocale is set to False as not to affect the caller). + * Default to ``utf-8`` if nothing is returned by ``getpreferredencoding`` + + .. warning:: + Property setter does NOT validate the encoding for validity. It is passed verbatim into subprocess.Popen() + + .. note:: + Setting is only available when exiftool process is not running. + + :raises ExifToolRunning: If attempting to set while running (:py:attr:`running` == True) + + """ + return self._encoding + + @encoding.setter + def encoding(self, new_encoding: Optional[str]) -> None: + # cannot set encoding when process is running + if self.running: + raise ExifToolRunning("Cannot set new encoding") + + # auto-detect system specific + self._encoding = new_encoding or (locale.getpreferredencoding(do_setlocale=False) or ENCODING_UTF8) + + + # ---------------------------------------------------------------------------------------------------------------------- + @property + def block_size(self) -> int: + """ + Block size for communicating with *exiftool* subprocess. Used when reading from the I/O pipe. + + :getter: Returns current block size + + :setter: Set a new block_size. Does basic error checking to make sure > 0. + + :raises ValueError: If new block size is invalid + + :type: int + """ + return self._block_size + + @block_size.setter + def block_size(self, new_block_size: int) -> None: + if new_block_size <= 0: + raise ValueError("Block Size doesn't make sense to be <= 0") + + self._block_size = new_block_size + + if self._logger: self._logger.info(f"Property 'block_size': set to \"{new_block_size}\"") + + + # ---------------------------------------------------------------------------------------------------------------------- + @property + def common_args(self) -> Optional[List[str]]: + """ + Common Arguments executed with every command passed to *exiftool* subprocess + + This is the parameter `-common_args`_ that is passed when the *exiftool* process is STARTED + + Read `Phil Harvey's ExifTool documentation`_ to get further information on what options are available / how to use them. + + .. _-common_args: https://exiftool.org/exiftool_pod.html#Advanced-options + .. _Phil Harvey's ExifTool documentation: https://exiftool.org/exiftool_pod.html + + :getter: Returns current common_args list + + :setter: If ``None`` is passed in, sets common_args to ``[]``. Otherwise, sets the given list without any validation. + + .. warning:: + No validation is done on the arguments list. It is passed verbatim to *exiftool*. Invalid options or combinations may result in undefined behavior. + + .. note:: + Setting is only available when exiftool process is not running. + + :raises ExifToolRunning: If attempting to set while running (:py:attr:`running` == True) + :raises TypeError: If setting is not a list + + :type: list[str], None + """ + return self._common_args + + @common_args.setter + def common_args(self, new_args: Optional[List[str]]) -> None: + + if self.running: + raise ExifToolRunning("Cannot set new common_args") + + if new_args is None: + self._common_args = [] + elif isinstance(new_args, list): + # default parameters to exiftool + # -n = disable print conversion (speedup) + self._common_args = new_args + else: + raise TypeError("common_args not a list of strings") + + if self._logger: self._logger.info(f"Property 'common_args': set to \"{self._common_args}\"") + + + # ---------------------------------------------------------------------------------------------------------------------- + @property + def config_file(self) -> Optional[Union[str, Path]]: + """ + Path to config file. + + See `ExifTool documentation for -config`_ for more details. + + + :getter: Returns current config file path, or None if not set + + :setter: File existence is checked when setting parameter + + * Set to ``None`` to disable the ``-config`` parameter when starting *exiftool* + * Set to ``""`` has special meaning and disables loading of the default config file. See `ExifTool documentation for -config`_ for more info. + + .. note:: + Currently file is checked for existence when setting. It is not checked when starting process. + + :raises ExifToolRunning: If attempting to set while running (:py:attr:`running` == True) + + :type: str, Path, None + + .. _ExifTool documentation for -config: https://exiftool.org/exiftool_pod.html#Advanced-options + """ + return self._config_file + + @config_file.setter + def config_file(self, new_config_file: Optional[Union[str, Path]]) -> None: + if self.running: + raise ExifToolRunning("Cannot set a new config_file") + + if new_config_file is None: + self._config_file = None + elif new_config_file == "": + # this is VALID usage of -config parameter + # As per exiftool documentation: Loading of the default config file may be disabled by setting CFGFILE to an empty string (ie. "") + self._config_file = "" + elif not Path(new_config_file).exists(): + raise FileNotFoundError("The config file could not be found") + else: + self._config_file = str(new_config_file) + + if self._logger: self._logger.info(f"Property 'config_file': set to \"{self._config_file}\"") + + + + ############################################################################################## + #################################### PROPERTIES Read only #################################### + ############################################################################################## + + # ---------------------------------------------------------------------------------------------------------------------- + @property + def running(self) -> bool: + """ + Read-only property which indicates whether the *exiftool* subprocess is running or not. + + :getter: Returns current running state + + .. note:: + This checks to make sure the process is still alive. + + If the process has died since last `running` detection, this property + will detect the state change and reset the status accordingly. + """ + if self._running: + # check if the process is actually alive + if self._process.poll() is not None: + # process died + warnings.warn("ExifTool process was previously running but died") + self._flag_running_false() + + if self._logger: self._logger.warning("Property 'running': ExifTool process was previously running but died") + + return self._running + + + # ---------------------------------------------------------------------------------------------------------------------- + @property + def version(self) -> str: + """ + Read-only property which is the string returned by ``exiftool -ver`` + + The *-ver* command is ran once at process startup and cached. + + :getter: Returns cached output of ``exiftool -ver`` + + :raises ExifToolNotRunning: If attempting to read while not running (:py:attr:`running` == False) + """ + + if not self.running: + raise ExifToolNotRunning("Can't get ExifTool version") + + return self._ver + + # ---------------------------------------------------------------------------------------------------------------------- + @property + def last_stdout(self) -> Optional[Union[str, bytes]]: + """ + ``STDOUT`` for most recent result from execute() + + .. note:: + The return type can be either str or bytes. + + If the most recent call to execute() ``raw_bytes=True``, then this will return ``bytes``. Otherwise this will be ``str``. + + .. note:: + This property can be read at any time, and is not dependent on running state of ExifTool. + + It is INTENTIONALLY *NOT* CLEARED on exiftool termination, to allow + for executing a command and terminating, but still having the result available. + """ + return self._last_stdout + + # ---------------------------------------------------------------------------------------------------------------------- + @property + def last_stderr(self) -> Optional[Union[str, bytes]]: + """ + ``STDERR`` for most recent result from execute() + + .. note:: + The return type can be either ``str`` or ``bytes``. + + If the most recent call to execute() ``raw_bytes=True``, then this will return ``bytes``. Otherwise this will be ``str``. + + .. note:: + This property can be read at any time, and is not dependent on running state of ExifTool. + + It is INTENTIONALLY *NOT* CLEARED on exiftool termination, to allow + for executing a command and terminating, but still having the result available. + """ + return self._last_stderr + + # ---------------------------------------------------------------------------------------------------------------------- + @property + def last_status(self) -> Optional[int]: + """ + ``Exit Status Code`` for most recent result from execute() + + .. note:: + This property can be read at any time, and is not dependent on running state of ExifTool. + + It is INTENTIONALLY *NOT* CLEARED on exiftool termination, to allow + for executing a command and terminating, but still having the result available. + """ + return self._last_status + + + + + ############################################################################################### + #################################### PROPERTIES Write only #################################### + ############################################################################################### + + # ---------------------------------------------------------------------------------------------------------------------- + def _set_logger(self, new_logger) -> None: + """ set a new user-created logging.Logger object + can be set at any time to start logging. + + Set to None at any time to stop logging. + """ + if new_logger is None: + self._logger = None + return + + # can't check this in case someone passes a drop-in replacement, like loguru, which isn't type logging.Logger + #elif not isinstance(new_logger, logging.Logger): + # raise TypeError("logger needs to be of type logging.Logger") + + + # do some basic checks on methods available in the "logger" provided + check = True + try: + # ExifTool will probably use all of these logging method calls at some point + # check all these are callable methods + check = callable(new_logger.info) and \ + callable(new_logger.warning) and \ + callable(new_logger.error) and \ + callable(new_logger.critical) and \ + callable(new_logger.exception) + except AttributeError: + check = False + + if not check: + raise TypeError("logger needs to implement methods (info,warning,error,critical,exception)") + + self._logger = new_logger + + # have to run this at the class level to create a special write-only property + # https://stackoverflow.com/questions/17576009/python-class-property-use-setter-but-evade-getter + # https://docs.python.org/3/howto/descriptor.html#properties + # can have it named same or different + logger = property(fset=_set_logger, doc="""Write-only property to set the class of logging.Logger""") + """ + Write-only property to set the class of logging.Logger + + If this is set, then status messages will log out to the given class. + + .. note:: + This can be set and unset (set to ``None``) at any time, regardless of whether the subprocess is running (:py:attr:`running` == True) or not. + + :setter: Specify an object to log to. The class is not checked, but validation is done to ensure the object has callable methods ``info``, ``warning``, ``error``, ``critical``, ``exception``. + + :raises AttributeError: If object does not contain one or more of the required methods. + :raises TypeError: If object contains those attributes, but one or more are non-callable methods. + + :type: Object + """ + + ######################################################################################### + ##################################### SETTER METHODS #################################### + ######################################################################################### + + + # ---------------------------------------------------------------------------------------------------------------------- + def set_json_loads(self, json_loads, **kwargs) -> None: + """ + **Advanced**: Override default built-in ``json.loads()`` method. The method is only used once in :py:meth:`execute_json` + + This allows using a different json string parser. + + (Alternate json libraries typically provide faster speed than the + built-in implementation, more supported features, and/or different behavior.) + + Examples of json libraries: `orjson`_, `rapidjson`_, `ujson`_, ... + + .. note:: + This method is designed to be called the same way you would expect to call the provided ``json_loads`` method. + + Include any ``kwargs`` you would in the call. + + For example, to pass additional arguments to ``json.loads()``: ``set_json_loads(json.loads, parse_float=str)`` + + .. note:: + This can be set at any time, regardless of whether the subprocess is running (:py:attr:`running` == True) or not. + + .. warning:: + This setter does not check whether the method provided actually parses json. Undefined behavior or crashes may occur if used incorrectly + + This is **advanced configuration** for specific use cases only. + + For an example use case, see the :ref:`FAQ ` + + :param json_loads: A callable method to replace built-in ``json.loads`` used in :py:meth:`execute_json` + + :type json_loads: callable + + :param kwargs: Parameters passed to the ``json_loads`` method call + + :raises TypeError: If ``json_loads`` is not callable + + + .. _orjson: https://pypi.org/project/orjson/ + .. _rapidjson: https://pypi.org/project/python-rapidjson/ + .. _ujson: https://pypi.org/project/ujson/ + """ + if not callable(json_loads): + # not a callable method + raise TypeError + + self._json_loads = json_loads + self._json_loads_kwargs = kwargs + + + + + ######################################################################################### + #################################### PROCESS CONTROL #################################### + ######################################################################################### + + + # ---------------------------------------------------------------------------------------------------------------------- + + def run(self) -> None: + """Start an *exiftool* subprocess in batch mode. + + This method will issue a ``UserWarning`` if the subprocess is + already running (:py:attr:`running` == True). The process is started with :py:attr:`common_args` as common arguments, + which are automatically included in every command you run with :py:meth:`execute()`. + + You can override these default arguments with the + ``common_args`` parameter in the constructor or setting :py:attr:`common_args` before caaling :py:meth:`run()`. + + .. note:: + If you have another executable named *exiftool* which isn't Phil Harvey's ExifTool, then you're shooting yourself in the foot as there's no error checking for that + + :raises FileNotFoundError: If *exiftool* is no longer found. Re-raised from subprocess.Popen() + :raises OSError: Re-raised from subprocess.Popen() + :raises ValueError: Re-raised from subprocess.Popen() + :raises subproccess.CalledProcessError: Re-raised from subprocess.Popen() + :raises RuntimeError: Popen() launched process but it died right away + :raises ExifToolVersionError: :py:attr:`exiftool.constants.EXIFTOOL_MINIMUM_VERSION` not met. ExifTool process will be automatically terminated. + """ + if self.running: + warnings.warn("ExifTool already running; doing nothing.", UserWarning) + return + + # first the executable ... + # TODO should we check the executable for existence here? + proc_args = [self._executable, ] + + # If working with a config file, it must be the first argument after the executable per: https://exiftool.org/config.html + if self._config_file is not None: + # must check explicitly for None, as "" is valid + # TODO check that the config file exists here? + proc_args.extend(["-config", self._config_file]) + + # this is the required stuff for the stay_open that makes pyexiftool so great! + proc_args.extend(["-stay_open", "True", "-@", "-"]) + + # only if there are any common_args. [] and None are skipped equally with this + if self._common_args: + proc_args.append("-common_args") # add this param only if there are common_args + proc_args.extend(self._common_args) # add the common arguments + + + # ---- set platform-specific kwargs for Popen ---- + kwargs: dict = {} + + if constants.PLATFORM_WINDOWS: + # TODO: I don't think this code actually does anything ... I've never seen a console pop up on Windows + # Perhaps need to specify subprocess.STARTF_USESHOWWINDOW to actually have any console pop up? + # https://docs.python.org/3/library/subprocess.html#windows-popen-helpers + startup_info = subprocess.STARTUPINFO() + if not self._win_shell: + # Adding enum 11 (SW_FORCEMINIMIZE in win32api speak) will + # keep it from throwing up a DOS shell when it launches. + startup_info.dwFlags |= constants.SW_FORCEMINIMIZE + + kwargs["startupinfo"] = startup_info + else: # pytest-cov:windows: no cover + # assume it's linux + kwargs["preexec_fn"] = _set_pdeathsig(signal.SIGTERM) + # Warning: The preexec_fn parameter is not safe to use in the presence of threads in your application. + # https://docs.python.org/3/library/subprocess.html#subprocess.Popen + + + try: + # NOTE: the encoding= parameter was removed from the Popen() call to support + # using bytes in the actual communication with exiftool process. + # Due to the way the code is written, ExifTool only uses stdin.write which would need to be in bytes. + # The reading is _NOT_ using subprocess.communicate(). This class reads raw bytes using os.read() + # Therefore, by switching off the encoding= in Popen(), we can support both bytes and str at the + # same time. (This change was to support https://github.com/sylikc/pyexiftool/issues/47) + + # unify both platform calls into one subprocess.Popen call + self._process = subprocess.Popen( + proc_args, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + **kwargs) + except FileNotFoundError: + raise + except OSError: + raise + except ValueError: + raise + except subprocess.CalledProcessError: + raise + # TODO print out more useful error messages to these different errors above + + # check error above before saying it's running + if self._process.poll() is not None: + # the Popen launched, then process terminated + self._process = None # unset it as it's now terminated + raise RuntimeError("exiftool did not execute successfully") + + + # have to set this before doing the checks below, or else execute() will fail + self._running = True + + # get ExifTool version here and any Exiftool metadata + # this can also verify that it is really ExifTool we ran, not some other random process + try: + # apparently because .execute() has code that already depends on v12.15+ functionality, + # _parse_ver() will throw a ValueError immediately with: + # ValueError: invalid literal for int() with base 10: '${status}' + self._ver = self._parse_ver() + except ValueError: + # trap the error and return it as a minimum version problem + self.terminate() + raise ExifToolVersionError(f"Error retrieving Exiftool info. Is your Exiftool version ('exiftool -ver') >= required version ('{constants.EXIFTOOL_MINIMUM_VERSION}')?") + + if self._logger: self._logger.info(f"Method 'run': Exiftool version '{self._ver}' (pid {self._process.pid}) launched with args '{proc_args}'") + + + # currently not needed... if it passes -ver check, the rest is OK + # may use in the future again if another version feature is needed but the -ver check passes + """ + # check that the minimum required version is met, if not, terminate... + # if you run against a version which isn't supported, strange errors come up during execute() + if not self._exiftool_version_check(): + self.terminate() + if self._logger: self._logger.error(f"Method 'run': Exiftool version '{self._ver}' did not meet the required minimum version '{constants.EXIFTOOL_MINIMUM_VERSION}'") + raise ExifToolVersionError(f"exiftool version '{self._ver}' < required '{constants.EXIFTOOL_MINIMUM_VERSION}'") + """ + + + # ---------------------------------------------------------------------------------------------------------------------- + def terminate(self, timeout: int = 30, _del: bool = False) -> None: + """Terminate the *exiftool* subprocess. + + If the subprocess isn't running, this method will throw a warning, and do nothing. + + .. note:: + There is a bug in CPython 3.8+ on Windows where terminate() does not work during ``__del__()`` + + See CPython issue `starting a thread in __del__ hangs at interpreter shutdown`_ for more info. + + .. _starting a thread in __del__ hangs at interpreter shutdown: https://github.com/python/cpython/issues/87950 + """ + if not self.running: + warnings.warn("ExifTool not running; doing nothing.", UserWarning) + return + + if _del and constants.PLATFORM_WINDOWS: + # don't cleanly exit on windows, during __del__ as it'll freeze at communicate() + self._process.kill() + #print("before comm", self._process.poll(), self._process) + self._process.poll() + try: + # TODO freezes here on windows if subprocess zombie remains + outs, errs = self._process.communicate() # have to cleanup the process or else .poll() will return None + #print("after comm") + # TODO a bug filed with Python, or user error... this doesn't seem to work at all ... .communicate() still hangs + # https://bugs.python.org/issue43784 , https://github.com/python/cpython/issues/87950... Windows-specific issue affecting Python 3.8-3.10 (as of this time) + except RuntimeError: + # Python 3.12 throws a runtime error -- see https://github.com/python/cpython/pull/104826 + # RuntimeError: can't create new thread at interpreter shutdown + pass + else: + try: + """ + On Windows, running this after __del__ freezes at communicate(), regardless of timeout + see the bug filed above for details + + On Linux, this runs as is, and the process terminates properly + """ + try: + self._process.communicate(input=b"-stay_open\nFalse\n", timeout=timeout) # this is a constant sequence specified by PH's exiftool + except RuntimeError: + # Python 3.12 throws a runtime error -- see https://github.com/python/cpython/pull/104826 + # RuntimeError: can't create new thread at interpreter shutdown + pass + self._process.kill() + except subprocess.TimeoutExpired: # this is new in Python 3.3 (for python 2.x, use the PyPI subprocess32 module) + self._process.kill() + outs, errs = self._process.communicate() + # err handling code from https://docs.python.org/3/library/subprocess.html#subprocess.Popen.communicate + + self._flag_running_false() + + # TODO log / return exit status from exiftool? + if self._logger: self._logger.info("Method 'terminate': Exiftool terminated successfully.") + + + + + + ################################################################################## + #################################### EXECUTE* #################################### + ################################################################################## + + # ---------------------------------------------------------------------------------------------------------------------- + def execute(self, *params: Union[str, bytes], raw_bytes: bool = False) -> Union[str, bytes]: + """Execute the given batch of parameters with *exiftool*. + + This method accepts any number of parameters and sends them to + the attached ``exiftool`` subprocess. The process must be + running, otherwise :py:exc:`exiftool.exceptions.ExifToolNotRunning` is raised. The final + ``-execute`` necessary to actually run the batch is appended + automatically; see the documentation of :py:meth:`run()` for + the common options. The ``exiftool`` output is read up to the + end-of-output sentinel and returned as a ``str`` decoded + based on the currently set :py:attr:`encoding`, + excluding the sentinel. + + The parameters must be of type ``str`` or ``bytes``. + ``str`` parameters are encoded to bytes automatically using the :py:attr:`encoding` property. + For filenames, this should be the system's filesystem encoding. + ``bytes`` parameters are untouched and passed directly to ``exiftool``. + + .. note:: + This is the core method to interface with the ``exiftool`` subprocess. + + No processing is done on the input or output. + + :param params: One or more parameters to send to the ``exiftool`` subprocess. + + Typically passed in via `Unpacking Argument Lists`_ + + .. note:: + The parameters to this function must be type ``str`` or ``bytes``. + + :type params: one or more string/bytes parameters + + :param raw_bytes: If True, returns bytes. Default behavior returns a str + + + :return: + * STDOUT is returned by the method call, and is also set in :py:attr:`last_stdout` + * STDERR is set in :py:attr:`last_stderr` + * Exit Status of the command is set in :py:attr:`last_status` + + :raises ExifToolNotRunning: If attempting to execute when not running (:py:attr:`running` == False) + :raises ExifToolVersionError: If unexpected text was returned from the command while parsing out the sentinels + :raises UnicodeDecodeError: If the :py:attr:`encoding` is not specified properly, it may be possible for ``.decode()`` method to raise this error + :raises TypeError: If ``params`` argument is not ``str`` or ``bytes`` + + + .. _Unpacking Argument Lists: https://docs.python.org/3/tutorial/controlflow.html#unpacking-argument-lists + """ + if not self.running: + raise ExifToolNotRunning("Cannot execute()") + + + # ---------- build the special params to execute ---------- + + # there's a special usage of execute/ready specified in the manual which make almost ensure we are receiving the right signal back + # from exiftool man pages: When this number is added, -q no longer suppresses the "{ready}" + signal_num = random.randint(100000, 999999) # arbitrary create a 6 digit number (keep it down to save memory maybe) + + # constant special sequences when running -stay_open mode + seq_execute = f"-execute{signal_num}\n" # the default string is b"-execute\n" + seq_ready = f"{{ready{signal_num}}}" # the default string is b"{ready}" + + # these are special sequences to help with synchronization. It will print specific text to STDERR before and after processing + #SEQ_STDERR_PRE_FMT = "pre{}" # can have a PRE sequence too but we don't need it for syncing + seq_err_post = f"post{signal_num}" # default there isn't any string + + SEQ_ERR_STATUS_DELIM = "=" # this can be configured to be one or more chacters... the code below will accomodate for longer sequences: len() >= 1 + seq_err_status = "${status}" # a special sequence, ${status} returns EXIT STATUS as per exiftool documentation - only supported on exiftool v12.10+ + + + # ---------- build the params list and encode the params to bytes, if necessary ---------- + cmd_params: List[bytes] = [] + + # this is necessary as the encoding parameter of Popen() is not specified. We manually encode as per the .encoding() parameter + for p in params: + # we use isinstance() over type() not only for subclass, but + # according to https://switowski.com/blog/type-vs-isinstance + # isinstance() is 40% faster than type() + if isinstance(p, bytes): + # no conversion needed, pass in raw (caller has already encoded) + cmd_params.append(p) + elif isinstance(p, str): + # conversion needed, encode based on specified encoding + cmd_params.append(p.encode(self._encoding)) + else: + # technically at this point we could support any object and call str() + # but leave this up to an extended class like ExifToolHelper() + raise TypeError(f"ERROR: Parameter was not bytes/str: {type(p)} => {p}") + + # f-strings are faster than concatentation of multiple strings -- https://stackoverflow.com/questions/59180574/string-concatenation-with-vs-f-string + cmd_params.extend( + (b"-echo4", + f"{SEQ_ERR_STATUS_DELIM}{seq_err_status}{SEQ_ERR_STATUS_DELIM}{seq_err_post}".encode(self._encoding), + seq_execute.encode(self._encoding)) + ) + + cmd_bytes = b"\n".join(cmd_params) + + + # ---------- write to the pipe connected with exiftool process ---------- + + self._process.stdin.write(cmd_bytes) + self._process.stdin.flush() + + if self._logger: self._logger.info("Method 'execute': Command sent = {}".format(cmd_params[:-1])) # logs without the -execute (it would confuse people to include that) + + + # ---------- read output from exiftool process until special sequences reached ---------- + + # NOTE: + # + # while subprocess recommends: "Use communicate() rather than .stdin.write, .stdout.read or .stderr.read to avoid deadlocks due to any of the other OS pipe buffers filling up and blocking the child process." + # + # this raw reading is used instead of Popen.communicate() due to the note: + # https://docs.python.org/3/library/subprocess.html#subprocess.Popen.communicate + # + # "The data read is buffered in memory, so do not use this method if the data size is large or unlimited." + # + # The data that comes back from exiftool falls into this, and so unbuffered reads are done with os.read() + + fdout = self._process.stdout.fileno() + raw_stdout = _read_fd_endswith(fdout, seq_ready.encode(self._encoding), self._block_size) + + # when it's ready, we can safely read all of stderr out, as the command is already done + fderr = self._process.stderr.fileno() + raw_stderr = _read_fd_endswith(fderr, seq_err_post.encode(self._encoding), self._block_size) + + + if not raw_bytes: + # decode if not returning bytes + raw_stdout = raw_stdout.decode(self._encoding) + raw_stderr = raw_stderr.decode(self._encoding) + + + # ---------- parse output ---------- + + # save the outputs to some variables first + cmd_stdout = raw_stdout.strip()[:-len(seq_ready)] + cmd_stderr = raw_stderr.strip()[:-len(seq_err_post)] # save it in case the error below happens and output can be checked easily + + + # if raw_bytes is True, the check has to become bytes rather than str + err_status_delim = SEQ_ERR_STATUS_DELIM if not raw_bytes else SEQ_ERR_STATUS_DELIM.encode(self._encoding) + + + # sanity check the status code from the stderr output + delim_len = len(err_status_delim) + if cmd_stderr[-delim_len:] != err_status_delim: + # exiftool is expected to dump out the status code within the delims... if it doesn't, the class doesn't match expected exiftool output for current version + raise ExifToolVersionError(f"Exiftool expected to return status on stderr, but got unexpected character: {cmd_stderr[-delim_len:]} != {err_status_delim}") + + # look for the previous delim (we could use regex here to do all this in one step, but it's probably overkill, and could slow down the code significantly) + # the other simplification that can be done is that, as of this writing: Exiftool is expected to only return 0, 1, or 2 as per documentation + # you could just lop the last 3 characters off... but if the return status changes in the future, then this code would break + err_delim_1 = cmd_stderr.rfind(err_status_delim, 0, -delim_len) + cmd_status = cmd_stderr[err_delim_1 + delim_len : -delim_len] + + + # ---------- save the output to class vars for later retrieval ---------- + + # lop off the actual status code from stderr + self._last_stderr = cmd_stderr[:err_delim_1] + self._last_stdout = cmd_stdout + # can check .isnumeric() here, but best just to duck-type cast it + self._last_status = int(cmd_status) + + + + if self._logger: + self._logger.debug(f"{self.__class__.__name__}.execute: IN params = {params}") + self._logger.debug(f"{self.__class__.__name__}.execute: OUT stdout = \"{self._last_stdout}\"") + self._logger.debug(f"{self.__class__.__name__}.execute: OUT stderr = \"{self._last_stderr}\"") + self._logger.debug(f"{self.__class__.__name__}.execute: OUT status = {self._last_status}") + + + # the standard return: just stdout + # if you need other output, retrieve from properties + return self._last_stdout + + + + # ---------------------------------------------------------------------------------------------------------------------- + def execute_json(self, *params: Union[str, bytes]) -> List: + """Execute the given batch of parameters and parse the JSON output. + + This method is similar to :py:meth:`execute()`. It + automatically adds the parameter ``-j`` to request JSON output + from ``exiftool`` and parses the output. + + The return value is + a list of dictionaries, mapping tag names to the corresponding + values. All keys are strings. + The values can have multiple types. Each dictionary contains the + name of the file it corresponds to in the key ``"SourceFile"``. + + .. note:: + By default, the tag names include the group name in the format : (if using the ``-G`` option). + + You can adjust the output structure with various *exiftool* options. + + .. warning:: + This method does not verify the exit status code returned by *exiftool*. That is left up to the caller. + + This will mimic exiftool's default method of operation "continue on error" and "best attempt" to complete commands given. + + If you want automated error checking, use :py:class:`exiftool.ExifToolHelper` + + :param params: One or more parameters to send to the ``exiftool`` subprocess. + + Typically passed in via `Unpacking Argument Lists`_ + + .. note:: + The parameters to this function must be type ``str`` or ``bytes``. + + :type params: one or more string/bytes parameters + + :return: Valid JSON parsed into a Python list of dicts + :raises ExifToolOutputEmptyError: If *exiftool* did not return any STDOUT + + .. note:: + This is not necessarily an *exiftool* error, but rather a programmer error. + + For example, setting tags can cause this behavior. + + If you are executing a command and expect no output, use :py:meth:`execute()` instead. + + :raises ExifToolJSONInvalidError: If *exiftool* returned STDOUT which is invalid JSON. + + .. note:: + This is not necessarily an *exiftool* error, but rather a programmer error. + + For example, ``-w`` can cause this behavior. + + If you are executing a command and expect non-JSON output, use :py:meth:`execute()` instead. + + + .. _Unpacking Argument Lists: https://docs.python.org/3/tutorial/controlflow.html#unpacking-argument-lists + """ + + result = self.execute("-j", *params) # stdout + + # NOTE: I have decided NOT to check status code + # There are quite a few use cases where it's desirable to have continue-on-error behavior, + # as that is exiftool's default mode of operation. exiftool normally just does what it can + # and tells you that it did all this and that, but some files didn't process. In this case + # exit code is non-zero, but exiftool did SOMETHING. I leave it up to the caller to figure + # out what was done or not done. + + + if len(result) == 0: + # the output from execute() can be empty under many relatively ambiguous situations + # * command has no files it worked on + # * a file specified or files does not exist + # * some other type of error + # * a command that does not return anything (like metadata manipulation/setting tags) + # + # There's no easy way to check which params are files, or else we have to reproduce the parser exiftool does (so it's hard to detect to raise a FileNotFoundError) + + # Returning [] could be ambiguous if Exiftool changes the returned JSON structure in the future + # Returning None was preferred, because it's the safest as it clearly indicates that nothing came back from execute(), but it means execute_json() doesn't always return JSON + # Raising an error is the current solution, as that clearly indicates that you used execute_json() expecting output, but got nothing + raise ExifToolOutputEmptyError(self._last_status, self._last_stdout, self._last_stderr, params) + + + try: + parsed = self._json_loads(result, **self._json_loads_kwargs) + except ValueError as e: + # most known JSON libraries return ValueError or a subclass. + # built-in json.JSONDecodeError is a subclass of ValueError -- https://docs.python.org/3/library/json.html#json.JSONDecodeError + + # if `-w` flag is specified in common_args or params, stdout will not be JSON parseable + # + # which will return something like: + # x image files read + # x output files created + + # the user is expected to know this ahead of time, and if -w exists in common_args or as a param, this error will be thrown + + # explicit chaining https://www.python.org/dev/peps/pep-3134/ + raise ExifToolJSONInvalidError(self._last_status, self._last_stdout, self._last_stderr, params) from e + + return parsed + + + ######################################################################################### + #################################### PRIVATE METHODS #################################### + ######################################################################################### + + # ---------------------------------------------------------------------------------------------------------------------- + def _flag_running_false(self) -> None: + """ private method that resets the "running" state + It used to be that there was only self._running to unset, but now it's a trio of variables + + This method makes it less likely a maintainer will leave off a variable if other ones are added in the future + """ + self._process = None # don't delete, just leave as None + self._ver = None # unset the version + self._running = False + + # as an FYI, as per the last_* properties, they are intentionally not cleared when process closes + + + # ---------------------------------------------------------------------------------------------------------------------- + def _parse_ver(self): + """ private method to run exiftool -ver + and parse out the information + """ + if not self.running: + raise ExifToolNotRunning("Cannot get version") + + + # -ver is just the version + # -v gives you more info (perl version, platform, libraries) but isn't helpful for this library + # -v2 gives you even more, but it's less useful at that point + return self.execute("-ver").strip() + + # ---------------------------------------------------------------------------------------------------------------------- + """ + def _exiftool_version_check(self) -> bool: + "" " private method to check the minimum required version of ExifTool + + returns false if the version check fails + returns true if it's OK + + "" " + + # parse (major, minor) with integers... so far Exiftool versions are all ##.## with no exception + # this isn't entirely tested... possibly a version with more "." or something might break this parsing + arr: List = self._ver.split(".", 1) # split to (major).(whatever) + + version_nums: List = [] + try: + for v in arr: + res.append(int(v)) + except ValueError: + raise ValueError(f"Error parsing ExifTool version for version check: '{self._ver}'") + + if len(version_nums) != 2: + raise ValueError(f"Expected Major.Minor len()==2, got: {version_nums}") + + curr_major, curr_minor = version_nums + + + # same logic above except on one line + req_major, req_minor = [int(x) for x in constants.EXIFTOOL_MINIMUM_VERSION.split(".", 1)] + + if curr_major > req_major: + # major version is bigger + return True + elif curr_major < req_major: + # major version is smaller + return False + elif curr_minor >= req_minor: + # major version is equal + # current minor is equal or better + return True + else: + # anything else is False + return False + """ + + # ---------------------------------------------------------------------------------------------------------------------- diff --git a/exiftool/experimental.py b/exiftool/experimental.py new file mode 100644 index 0000000..db8a13d --- /dev/null +++ b/exiftool/experimental.py @@ -0,0 +1,328 @@ +# -*- coding: utf-8 -*- +# +# This file is part of PyExifTool. +# +# PyExifTool +# +# Copyright 2019-2023 Kevin M (sylikc) +# Copyright 2012-2014 Sven Marnach +# +# Community contributors are listed in the CHANGELOG.md for the PRs +# +# PyExifTool is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the licence, or +# (at your option) any later version, or the BSD licence. +# +# PyExifTool is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See COPYING.GPL or COPYING.BSD for more details. + + +""" +This submodule contains the ``ExifToolAlpha`` class, which extends the ``ExifToolHelper`` class with experimental functionality. + +.. note:: + :py:class:`exiftool.helper.ExifToolAlpha` class of this submodule is available in the ``exiftool`` namespace as :py:class:`exiftool.ExifToolAlpha` + +""" + +from pathlib import Path + +from .helper import ExifToolHelper + + +try: # Py3k compatibility + basestring +except NameError: + basestring = (bytes, str) + +# ====================================================================================================================== + +#def atexit_handler + +# constants related to keywords manipulations +KW_TAGNAME = "IPTC:Keywords" +KW_REPLACE, KW_ADD, KW_REMOVE = range(3) + + + + + +# ====================================================================================================================== + + + +#string helper +def strip_nl(s): + return ' '.join(s.splitlines()) + +# ====================================================================================================================== + +# Error checking function +# very rudimentary checking +# Note: They are quite fragile, because this just parses the output text from exiftool +def check_ok(result): + """Evaluates the output from a exiftool write operation (e.g. `set_tags`) + + The argument is the result from the execute method. + + The result is True or False. + """ + return not result is None and (not "due to errors" in result) + +# ====================================================================================================================== + +def format_error(result): + """Evaluates the output from a exiftool write operation (e.g. `set_tags`) + + The argument is the result from the execute method. + + The result is a human readable one-line string. + """ + if check_ok(result): + return f'exiftool probably finished properly. ("{strip_nl(result)}")' + else: + if result is None: + return "exiftool operation can't be evaluated: No result given" + else: + return f'exiftool finished with error: "{strip_nl(result)}"' + + + +# ====================================================================================================================== + +class ExifToolAlpha(ExifToolHelper): + """ + This class is for the "experimental" functionality. In the grand scheme of things, this class + contains "not well tested" functionality, methods that are less used, or methods with niche use cases. + In some methods, edge cases on some of these methods may produce unexpected or ambiguous results. + However, if there is increased demand, or robustness improves, functionality may merge into + :py:class:`exiftool.ExifToolHelper` class. + + The starting point of this class was to remove all the "less used" functionality that was merged in + on some arbitrary pull requests to the original v0.2 PyExifTool repository. This alpha-quality code is brittle and contains + a lot of "hacks" for a niche set of use cases. As such, it may be buggy and it shouldn't crowd the core functionality + of the :py:class:`exiftool.ExifTool` class or the stable extended functionality of the :py:class:`exiftool.ExifToolHelper` class + with unneeded bloat. + + The class heirarchy: ExifTool -> ExifToolHelper -> ExifToolAlpha + + * ExifTool - stable base class with CORE functionality + * ExifToolHelper - user friendly class that extends the base class with general functionality not found in the core + * ExifToolAlpha - alpha-quality code which extends the ExifToolHelper to add functionality that is niche, brittle, or not well tested + + Because of this heirarchy, you could always use/extend the :py:class:`exiftool.ExifToolAlpha` class to have all functionality, + or at your discretion, use one of the more stable classes above. + + Please issue PR to this class to add functionality, even if not tested well. This class is for experimental code after all! + """ + + # ---------------------------------------------------------------------------------------------------------------------- + # i'm not sure if the verification works, but related to pull request (#11) + def execute_json_wrapper(self, filenames, params=None, retry_on_error=True): + # make sure the argument is a list and not a single string + # which would lead to strange errors + if isinstance(filenames, basestring): + raise TypeError("The argument 'filenames' must be an iterable of strings") + + execute_params = [] + + if params: + execute_params.extend(params) + execute_params.extend(filenames) + + result = self.execute_json(execute_params) + + if result: + try: + ExifToolAlpha._check_result_filelist(filenames, result) + except IOError as error: + # Restart the exiftool child process in these cases since something is going wrong + self.terminate() + self.run() + + if retry_on_error: + result = self.execute_json_filenames(filenames, params, retry_on_error=False) + else: + raise error + else: + # Reasons for exiftool to provide an empty result, could be e.g. file not found, etc. + # What should we do in these cases? We don't have any information what went wrong, therefore + # we just return empty dictionaries. + result = [{} for _ in filenames] + + return result + + # ---------------------------------------------------------------------------------------------------------------------- + # allows adding additional checks (#11) + def get_metadata_batch_wrapper(self, filenames, params=None): + return self.execute_json_wrapper(filenames=filenames, params=params) + + # ---------------------------------------------------------------------------------------------------------------------- + # (#11) + def get_metadata_wrapper(self, filename, params=None): + return self.execute_json_wrapper(filenames=[filename], params=params)[0] + + # ---------------------------------------------------------------------------------------------------------------------- + # (#11) + def get_tags_batch_wrapper(self, tags, filenames, params=None): + params = (params if params else []) + ["-" + t for t in tags] + return self.execute_json_wrapper(filenames=filenames, params=params) + + # ---------------------------------------------------------------------------------------------------------------------- + # (#11) + def get_tags_wrapper(self, tags, filename, params=None): + return self.get_tags_batch_wrapper(tags, [filename], params=params)[0] + + # ---------------------------------------------------------------------------------------------------------------------- + # (#11) + def get_tag_batch_wrapper(self, tag, filenames, params=None): + data = self.get_tags_batch_wrapper([tag], filenames, params=params) + result = [] + for d in data: + d.pop("SourceFile") + result.append(next(iter(d.values()), None)) + return result + + # ---------------------------------------------------------------------------------------------------------------------- + # this was a method with good intentions by the original author, but returns some inconsistent results in some cases + # for example, if you passed in a single tag, or a group name, it would return the first tag back instead of the whole group + # try calling get_tag_batch("*.mp4", "QuickTime") or "QuickTime:all" ... the expected results is a dictionary but a single tag is returned + def get_tag_batch(self, filenames, tag): + """Extract a single tag from the given files. + + The first argument is a single tag name, as usual in the + format :. + + The second argument is an iterable of file names. + + The return value is a list of tag values or ``None`` for + non-existent tags, in the same order as ``filenames``. + """ + data = self.get_tags(filenames, [tag]) + result = [] + for d in data: + d.pop("SourceFile") + result.append(next(iter(d.values()), None)) + return result + + # ---------------------------------------------------------------------------------------------------------------------- + # (#11) + def get_tag_wrapper(self, tag, filename, params=None): + return self.get_tag_batch_wrapper(tag, [filename], params=params)[0] + + # ---------------------------------------------------------------------------------------------------------------------- + def get_tag(self, filename, tag): + """ + Extract a single tag from a single file. + + The return value is the value of the specified tag, or + ``None`` if this tag was not found in the file. + + Does existence checks + """ + + #return self.get_tag_batch([filename], tag)[0] + + p = Path(filename) + + if not p.exists(): + raise FileNotFoundError + + data = self.get_tags(p, tag) + if len(data) > 1: + raise RuntimeError("one file requested but multiple returned?") + + d = data[0] + d.pop("SourceFile") + + if len(d.values()) > 1: + raise RuntimeError("multiple tag values returned, invalid use case") + + return next(iter(d.values()), None) + + + + # ---------------------------------------------------------------------------------------------------------------------- + def copy_tags(self, from_filename, to_filename): + """Copy all tags from one file to another.""" + params = ["-overwrite_original", "-TagsFromFile", str(from_filename), str(to_filename)] + self.execute(*params) + + # ---------------------------------------------------------------------------------------------------------------------- + def set_keywords_batch(self, files, mode, keywords): + """Modifies the keywords tag for the given files. + + The first argument is the operation mode: + + * KW_REPLACE: Replace (i.e. set) the full keywords tag with `keywords`. + * KW_ADD: Add `keywords` to the keywords tag. + If a keyword is present, just keep it. + * KW_REMOVE: Remove `keywords` from the keywords tag. + If a keyword wasn't present, just leave it. + + The second argument is an iterable of key words. + + The third argument is an iterable of file names. + + The format of the return value is the same as for + :py:meth:`execute()`. + + It can be passed into `check_ok()` and `format_error()`. + """ + # Explicitly ruling out strings here because passing in a + # string would lead to strange and hard-to-find errors + if isinstance(keywords, basestring): + raise TypeError("The argument 'keywords' must be " + "an iterable of strings") + + # allow the files argument to be a str, and process it into a list of str + filenames = self.__class__._parse_arg_files(files) + + params = [] + + kw_operation = {KW_REPLACE: "-%s=%s", + KW_ADD: "-%s+=%s", + KW_REMOVE: "-%s-=%s"}[mode] + + kw_params = [kw_operation % (KW_TAGNAME, w) for w in keywords] + + params.extend(kw_params) + params.extend(filenames) + if self._logger: self._logger.debug(params) + + return self.execute(*params) + + # ---------------------------------------------------------------------------------------------------------------------- + def set_keywords(self, filename, mode, keywords): + """Modifies the keywords tag for the given file. + + This is a convenience function derived from `set_keywords_batch()`. + Only difference is that it takes as last argument only one file name + as a string. + """ + return self.set_keywords_batch([filename], mode, keywords) + + + # ---------------------------------------------------------------------------------------------------------------------- + @staticmethod + def _check_result_filelist(file_paths, result): + """ + Checks if the given file paths matches the 'SourceFile' entries in the result returned by + exiftool. This is done to find possible mix ups in the streamed responses. + """ + # do some sanity checks on the results to make sure nothing was mixed up during reading from stdout + if len(result) != len(file_paths): + raise IOError(f"exiftool returned {len(result)} results, but expected was {len(file_paths)}") + + for i in range(len(file_paths)): + returned_source_file = result[i].get('SourceFile') + requested_file = file_paths[i] + + if returned_source_file != requested_file: + raise IOError(f"exiftool returned data for file {returned_source_file}, but expected was {requested_file}") + + # ---------------------------------------------------------------------------------------------------------------------- diff --git a/exiftool/helper.py b/exiftool/helper.py new file mode 100644 index 0000000..504deb4 --- /dev/null +++ b/exiftool/helper.py @@ -0,0 +1,585 @@ +# -*- coding: utf-8 -*- +# +# This file is part of PyExifTool. +# +# PyExifTool +# +# Copyright 2019-2023 Kevin M (sylikc) +# Copyright 2012-2014 Sven Marnach +# +# Community contributors are listed in the CHANGELOG.md for the PRs +# +# PyExifTool is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the licence, or +# (at your option) any later version, or the BSD licence. +# +# PyExifTool is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See COPYING.GPL or COPYING.BSD for more details. + + +""" +This submodule contains the ``ExifToolHelper`` class, which makes the core ``ExifTool`` class easier, and safer to use. + +.. note:: + :py:class:`exiftool.helper.ExifToolHelper` class of this submodule is available in the ``exiftool`` namespace as :py:class:`exiftool.ExifToolHelper` + +""" + +import re + +from .exiftool import ExifTool +from .exceptions import ExifToolOutputEmptyError, ExifToolJSONInvalidError, ExifToolExecuteError, ExifToolTagNameError + +# basestring makes no sense in Python 3, so renamed tuple to this const +TUPLE_STR_BYTES: tuple = (str, bytes,) + +from typing import Any, Union, Optional, List, Dict + + + + +# ====================================================================================================================== + + +def _is_iterable(in_param: Any, ignore_str_bytes: bool = False) -> bool: + """ + Checks if this item is iterable, instead of using isinstance(list), anything iterable can be ok + + .. note:: + STRINGS ARE CONSIDERED ITERABLE by Python + + if you need to consider a code path for strings first, check that before checking if a parameter is iterable via this function + + or specify ``ignore_str_bytes=True`` + + :param in_param: Something to check if iterable or not + :param ignore_str_bytes: str/bytes are iterable. But usually we don't want to check that. set ``ignore_str_bytes`` to ``True`` to ignore strings on check + """ + + if ignore_str_bytes and isinstance(in_param, TUPLE_STR_BYTES): + return False + + # a different type of test of iterability, instead of using isinstance(list) + # https://stackoverflow.com/questions/1952464/in-python-how-do-i-determine-if-an-object-is-iterable + try: + iterator = iter(in_param) + except TypeError: + return False + + return True + + + +# ====================================================================================================================== + +class ExifToolHelper(ExifTool): + """ + This class extends the low-level :py:class:`exiftool.ExifTool` class with 'wrapper'/'helper' functionality + + It keeps low-level core functionality with the base class but extends helper functions in a separate class + """ + + ########################################################################################## + #################################### OVERRIDE METHODS #################################### + ########################################################################################## + + # ---------------------------------------------------------------------------------------------------------------------- + def __init__(self, auto_start: bool = True, check_execute: bool = True, check_tag_names: bool = True, **kwargs) -> None: + """ + :param bool auto_start: Will automatically start the exiftool process on first command run, defaults to True + :param bool check_execute: Will check the exit status (return code) of all commands. This catches some invalid commands passed to exiftool subprocess, defaults to True. See :py:attr:`check_execute` for more info. + :param bool check_tag_names: Will check the tag names provided to methods which work directly with tag names. This catches unintended uses and bugs, default to True. See :py:attr:`check_tag_names` for more info. + + :param kwargs: All other parameters are passed directly to the super-class constructor: :py:meth:`exiftool.ExifTool.__init__()` + """ + # call parent's constructor + super().__init__(**kwargs) + + self._auto_start: bool = auto_start + self._check_execute: bool = check_execute + self._check_tag_names: bool = check_tag_names + + + # ---------------------------------------------------------------------------------------------------------------------- + def execute(self, *params: Any, **kwargs) -> Union[str, bytes]: + """ + Override the :py:meth:`exiftool.ExifTool.execute()` method + + Adds logic to auto-start if not running, if :py:attr:`auto_start` == True + + Adds logic to str() any parameter which is not a str or bytes. (This allows passing in objects like Path _without_ casting before passing it in.) + + :raises ExifToolExecuteError: If :py:attr:`check_execute` == True, and exit status was non-zero + """ + if self._auto_start and not self.running: + self.run() + + # by default, any non-(str/bytes) would throw a TypeError from ExifTool.execute(), so they're casted to a string here + # + # duck-type any given object to string + # this was originally to support Path() but it's now generic enough to support any object that str() to something useful + # + # Thanks @jangop for the single line contribution! + str_bytes_params: Union[str, bytes] = [x if isinstance(x, TUPLE_STR_BYTES) else str(x) for x in params] + # TODO: this list copy could be expensive if the input is a very huge list. Perhaps in the future have a flag that takes the lists in verbatim without any processing? + + + result: Union[str, bytes] = super().execute(*str_bytes_params, **kwargs) + + # imitate the subprocess.run() signature. check=True will check non-zero exit status + if self._check_execute and self._last_status: + raise ExifToolExecuteError(self._last_status, self._last_stdout, self._last_stderr, str_bytes_params) + + return result + + # ---------------------------------------------------------------------------------------------------------------------- + def run(self) -> None: + """ + override the :py:meth:`exiftool.ExifTool.run()` method + + Will not attempt to run if already running (so no warning about 'ExifTool already running' will trigger) + """ + if self.running: + return + + super().run() + + + # ---------------------------------------------------------------------------------------------------------------------- + def terminate(self, **opts) -> None: + """ + Overrides the :py:meth:`exiftool.ExifTool.terminate()` method. + + Will not attempt to terminate if not running (so no warning about 'ExifTool not running' will trigger) + + :param opts: passed directly to the parent call :py:meth:`exiftool.ExifTool.terminate()` + """ + if not self.running: + return + + super().terminate(**opts) + + + ######################################################################################## + #################################### NEW PROPERTIES #################################### + ######################################################################################## + + # ---------------------------------------------------------------------------------------------------------------------- + @property + def auto_start(self) -> bool: + """ + Read-only property. Gets the current setting passed into the constructor as to whether auto_start is enabled or not. + + (There's really no point to having this a read-write property, but allowing a read can be helpful at runtime to detect expected behavior.) + """ + return self._auto_start + + + + # ---------------------------------------------------------------------------------------------------------------------- + @property + def check_execute(self) -> bool: + """ + Flag to enable/disable checking exit status (return code) on execute + + If enabled, will raise :py:exc:`exiftool.exceptions.ExifToolExecuteError` if a non-zero exit status is returned during :py:meth:`execute()` + + .. warning:: + While this property is provided to give callers an option to enable/disable error checking, it is generally **NOT** recommended to disable ``check_execute``. + + **If disabled, exiftool will fail silently, and hard-to-catch bugs may arise.** + + That said, there may be some use cases where continue-on-error behavior is desired. (Example: dump all exif in a directory with files which don't all have the same tags, exiftool returns exit code 1 for unknown files, but results are valid for other files with those tags) + + :getter: Returns current setting + :setter: Enable or Disable the check + + .. note:: + This settings can be changed any time and will only affect subsequent calls + + :type: bool + """ + return self._check_execute + + @check_execute.setter + def check_execute(self, new_setting: bool) -> None: + self._check_execute = new_setting + + + # ---------------------------------------------------------------------------------------------------------------------- + @property + def check_tag_names(self) -> bool: + """ + Flag to enable/disable checking of tag names + + If enabled, will raise :py:exc:`exiftool.exceptions.ExifToolTagNameError` if an invalid tag name is detected. + + .. warning:: + ExifToolHelper only checks the validity of the Tag **NAME** based on a simple regex pattern. + + * It *does not* validate whether the tag name is actually valid on the file type(s) you're accessing. + * It *does not* validate whether the tag you passed in that "looks like" a tag is actually an option + * It does support a "#" at the end of the tag name to disable print conversion + + Please refer to `ExifTool Tag Names`_ documentation for a complete list of valid tags recognized by ExifTool. + + .. warning:: + While this property is provided to give callers an option to enable/disable tag names checking, it is generally **NOT** recommended to disable ``check_tag_names``. + + **If disabled, you could accidentally edit a file when you meant to read it.** + + Example: ``get_tags("a.jpg", "tag=value")`` will call ``execute_json("-tag=value", "a.jpg")`` which will inadvertently write to a.jpg instead of reading it! + + That said, if PH's exiftool changes its tag name regex and tag names are being erroneously rejected because of this flag, disabling this could be used as a workaround (more importantly, if this is happening, please `file an issue`_!). + + :getter: Returns current setting + :setter: Enable or Disable the check + + .. note:: + This settings can be changed any time and will only affect subsequent calls + + :type: bool + + + .. _file an issue: https://github.com/sylikc/pyexiftool/issues + .. _ExifTool Tag Names: https://exiftool.org/TagNames/ + """ + return self._check_tag_names + + @check_tag_names.setter + def check_tag_names(self, new_setting: bool) -> None: + self._check_tag_names = new_setting + + + # ---------------------------------------------------------------------------------------------------------------------- + + + + + + + ##################################################################################### + #################################### NEW METHODS #################################### + ##################################################################################### + + + # all generic helper functions will follow a convention of + # function(files to be worked on, ... , params=) + + + # ---------------------------------------------------------------------------------------------------------------------- + def get_metadata(self, files: Union[str, List], params: Optional[Union[str, List]] = None) -> List: + """ + Return all metadata for the given files. + + .. note:: + + This is a convenience method. + + The implementation calls :py:meth:`get_tags()` with ``tags=None`` + + :param files: Files parameter matches :py:meth:`get_tags()` + + :param params: Optional parameters to send to *exiftool* + :type params: list or None + + :return: The return value will have the format described in the documentation of :py:meth:`get_tags()`. + """ + return self.get_tags(files, None, params=params) + + + # ---------------------------------------------------------------------------------------------------------------------- + def get_tags(self, files: Union[Any, List[Any]], tags: Optional[Union[str, List]], params: Optional[Union[str, List]] = None) -> List: + """ + Return only specified tags for the given files. + + :param files: File(s) to be worked on. + + * If a non-iterable is provided, it will get tags for a single item (str(non-iterable)) + * If an iterable is provided, the list is passed into :py:meth:`execute_json` verbatim. + + .. note:: + Any files/params which are not bytes/str will be casted to a str in :py:meth:`execute()`. + + .. warning:: + Currently, filenames are NOT checked for existence! That is left up to the caller. + + .. warning:: + Wildcard strings are valid and passed verbatim to exiftool. + + However, exiftool's wildcard matching/globbing may be different than Python's matching/globbing, + which may cause unexpected behavior if you're using one and comparing the result to the other. + Read `ExifTool Common Mistakes - Over-use of Wildcards in File Names`_ for some related info. + + :type files: Any or List(Any) - see Note + + + :param tags: Tag(s) to read. If tags is None, or [], method will returns all tags + + .. note:: + The tag names may include group names, as usual in the format ``:``. + + :type tags: str, list, or None + + + :param params: Optional parameter(s) to send to *exiftool* + :type params: Any, List[Any], or None + + + :return: The format of the return value is the same as for :py:meth:`exiftool.ExifTool.execute_json()`. + + + :raises ValueError: Invalid Parameter + :raises TypeError: Invalid Parameter + :raises ExifToolExecuteError: If :py:attr:`check_execute` == True, and exit status was non-zero + + + .. _ExifTool Common Mistakes - Over-use of Wildcards in File Names: https://exiftool.org/mistakes.html#M2 + + """ + + final_tags: Optional[List] = None + final_files: List = self.__class__._parse_arg_files(files) + + if tags is None: + # all tags + final_tags = [] + elif isinstance(tags, TUPLE_STR_BYTES): + final_tags = [tags] + elif _is_iterable(tags): + final_tags = tags + else: + raise TypeError(f"{self.__class__.__name__}.get_tags: argument 'tags' must be a str/bytes or a list") + + if self._check_tag_names: + # run check if enabled + self.__class__._check_tag_list(final_tags) + + exec_params: List = [] + + # we extend an empty list to avoid modifying any referenced inputs + if params: + if _is_iterable(params, ignore_str_bytes=True): + exec_params.extend(params) + else: + exec_params.append(params) + + # tags is always a list by this point. It will always be iterable... don't have to check for None + exec_params.extend([f"-{t}" for t in final_tags]) + + exec_params.extend(final_files) + + try: + ret = self.execute_json(*exec_params) + except ExifToolOutputEmptyError: + raise + #raise RuntimeError(f"{self.__class__.__name__}.get_tags: exiftool returned no data") + except ExifToolJSONInvalidError: + raise + except ExifToolExecuteError: + # if last_status is <> 0, raise an error that one or more files failed? + raise + + return ret + + + # ---------------------------------------------------------------------------------------------------------------------- + def set_tags(self, files: Union[Any, List[Any]], tags: Dict, params: Optional[Union[str, List]] = None): + """ + Writes the values of the specified tags for the given file(s). + + :param files: File(s) to be worked on. + + * If a non-iterable is provided, it will get tags for a single item (str(non-iterable)) + * If an iterable is provided, the list is passed into :py:meth:`execute_json` verbatim. + + .. note:: + Any files/params which are not bytes/str will be casted to a str in :py:meth:`execute()`. + + .. warning:: + Currently, filenames are NOT checked for existence! That is left up to the caller. + + .. warning:: + Wildcard strings are valid and passed verbatim to exiftool. + + However, exiftool's wildcard matching/globbing may be different than Python's matching/globbing, + which may cause unexpected behavior if you're using one and comparing the result to the other. + Read `ExifTool Common Mistakes - Over-use of Wildcards in File Names`_ for some related info. + + :type files: Any or List(Any) - see Note + + + :param tags: Tag(s) to write. + + Dictionary keys = tags, values = tag values (str or list) + + * If a value is a str, will set key=value + * If a value is a list, will iterate over list and set each individual value to the same tag ( + + .. note:: + The tag names may include group names, as usual in the format ``:``. + + .. note:: + Value of the dict can be a list, in which case, the tag will be passed with each item in the list, in the order given + + This allows setting things like ``-Keywords=a -Keywords=b -Keywords=c`` by passing in ``tags={"Keywords": ['a', 'b', 'c']}`` + + :type tags: dict + + + :param params: Optional parameter(s) to send to *exiftool* + :type params: str, list, or None + + + :return: The format of the return value is the same as for :py:meth:`execute()`. + + + :raises ValueError: Invalid Parameter + :raises TypeError: Invalid Parameter + :raises ExifToolExecuteError: If :py:attr:`check_execute` == True, and exit status was non-zero + + + .. _ExifTool Common Mistakes - Over-use of Wildcards in File Names: https://exiftool.org/mistakes.html#M2 + + """ + final_files: List = self.__class__._parse_arg_files(files) + + if not tags: + raise ValueError(f"{self.__class__.__name__}.set_tags: argument 'tags' cannot be empty") + elif not isinstance(tags, dict): + raise TypeError(f"{self.__class__.__name__}.set_tags: argument 'tags' must be a dict") + + + if self._check_tag_names: + # run check if enabled + self.__class__._check_tag_list(list(tags)) # gets only the keys (tag names) + + exec_params: List = [] + + # we extend an empty list to avoid modifying any referenced inputs + if params: + if _is_iterable(params, ignore_str_bytes=True): + exec_params.extend(params) + else: + exec_params.append(params) + + for tag, value in tags.items(): + # contributed by @daviddorme in https://github.com/sylikc/pyexiftool/issues/12#issuecomment-821879234 + # allows setting things like Keywords which require separate directives + # > exiftool -Keywords=keyword1 -Keywords=keyword2 -Keywords=keyword3 file.jpg + # which are not supported as duplicate keys in a dictionary + if isinstance(value, list): + for item in value: + exec_params.append(f"-{tag}={item}") + else: + exec_params.append(f"-{tag}={value}") + + exec_params.extend(final_files) + + try: + return self.execute(*exec_params) + #TODO if execute returns data, then error? + except ExifToolExecuteError: + # last status non-zero + raise + + + # ---------------------------------------------------------------------------------------------------------------------- + + + + + + + + ######################################################################################### + #################################### PRIVATE METHODS #################################### + ######################################################################################### + + + + # ---------------------------------------------------------------------------------------------------------------------- + @staticmethod + def _parse_arg_files(files: Union[str, List]) -> List: + """ + This logic to process the files argument is common across most ExifToolHelper methods + + It can be used by a developer to process the files argument the same way if this class is extended + + :param files: File(s) to be worked on. + :type files: str or list + + :return: A list of one or more elements containing strings of files + + :raises ValueError: Files parameter is empty + """ + + final_files: List = [] + + if not files: + # Exiftool process would return an error anyways + raise ValueError("ERROR: Argument 'files' cannot be empty") + elif not _is_iterable(files, ignore_str_bytes=True): + # if it's not a string but also not iterable + final_files = [files] + else: + final_files = files + + + return final_files + + + # ---------------------------------------------------------------------------------------------------------------------- + @staticmethod + def _check_tag_list(tags: List) -> None: + """ + Private method. This method is used to check the validity of a tag list passed in. + + See any notes/warnings in the property :py:attr:`check_tag_names` to get a better understanding of what this is for and not for. + + :param list tags: List of tags to check + + :return: None if checks passed. Raises an error otherwise. (Think of it like an assert statement) + """ + # In the future if a specific version changed the match pattern, + # we can check self.version ... then this method will no longer + # be static and requires the underlying exiftool process to be running to get the self.version + # + # This is not done right now because the odds of the tag name format changing is very low, and requiring + # exiftool to be running during this tag check could introduce unneccesary overhead at this time + + + + # According to the exiftool source code, the valid regex on tags is (/^([-\w*]+:)*([-\w*?]+)#?$/) + # However, it appears that "-" may be allowed within a tag name/group (i.e. https://exiftool.org/TagNames/XMP.html Description tags) + # + # \w in Perl => https://perldoc.perl.org/perlrecharclass#Backslash-sequences + # \w in Python => https://docs.python.org/3/library/re.html#regular-expression-syntax + # + # Perl vs Python's "\w" seem to mean slightly different things, so we write our own regex / matching algorithm + + + # * make sure the first character is not a special one + # * "#" can only appear at the end + # * Tag:Tag:tag is not valid, but passes the simple regex (it's ok, this is not supposed to be a catch-all)... exiftool subprocess accepts it anyways, even if invalid. + # * *wildcard* tags are permitted by exiftool + tag_regex = r"[\w\*][\w\:\-\*]*(#|)" + + for t in tags: + if re.fullmatch(tag_regex, t) is None: + raise ExifToolTagNameError(t) + + # returns nothing, if no error was raised, the tags passed + + # considering making this... + # * can't begin with - + # * can't have "=" anywhere, and that's it... + # there's a lot of variations which might make this code buggy for some edge use cases + + + + # ---------------------------------------------------------------------------------------------------------------------- diff --git a/mypy.ini b/mypy.ini new file mode 100644 index 0000000..613a598 --- /dev/null +++ b/mypy.ini @@ -0,0 +1,2 @@ +;[mypy-json.*] +;ignore_no_redef = True diff --git a/scripts/README.txt b/scripts/README.txt new file mode 100644 index 0000000..280443d --- /dev/null +++ b/scripts/README.txt @@ -0,0 +1,10 @@ +These are standardized scripts/batch files which run tests, code reviews, or other maintenance tasks in a repeatable way. + + +While scripts could automatically install requirements, it is left up to the caller: + +