diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml new file mode 100644 index 0000000..2fe42c9 --- /dev/null +++ b/.github/workflows/codeql-analysis.yml @@ -0,0 +1,71 @@ +# For most projects, this workflow file will not need changing; you simply need +# to commit it to your repository. +# +# You may wish to alter this file to override the set of languages analyzed, +# or to provide custom queries or build logic. +name: "CodeQL" + +on: + push: + branches: [master] + pull_request: + # The branches below must be a subset of the branches above + branches: [master] + schedule: + - cron: '0 1 * * 4' + +jobs: + analyze: + name: Analyze + runs-on: ubuntu-latest + + strategy: + fail-fast: false + matrix: + # Override automatic language detection by changing the below list + # Supported options are ['csharp', 'cpp', 'go', 'java', 'javascript', 'python'] + language: ['python'] + # Learn more... + # https://docs.github.com/en/github/finding-security-vulnerabilities-and-errors-in-your-code/configuring-code-scanning#overriding-automatic-language-detection + + steps: + - name: Checkout repository + uses: actions/checkout@v2 + with: + # We must fetch at least the immediate parents so that if this is + # a pull request then we can checkout the head. + fetch-depth: 2 + + # If this run was triggered by a pull request event, then checkout + # the head of the pull request instead of the merge commit. + - run: git checkout HEAD^2 + if: ${{ github.event_name == 'pull_request' }} + + # Initializes the CodeQL tools for scanning. + - name: Initialize CodeQL + uses: github/codeql-action/init@v1 + with: + languages: ${{ matrix.language }} + # If you wish to specify custom queries, you can do so here or in a config file. + # By default, queries listed here will override any specified in a config file. + # Prefix the list here with "+" to use these queries and those in the config file. + # queries: ./path/to/local/query, your-org/your-repo/queries@main + + # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). + # If this step fails, then you should remove it and run the build manually (see below) + - name: Autobuild + uses: github/codeql-action/autobuild@v1 + + # ℹ️ Command-line programs to run using the OS shell. + # 📚 https://git.io/JvXDl + + # ✏️ If the Autobuild fails above, remove it and uncomment the following three lines + # and modify them (or add more) to build your code if your project + # uses a compiled language + + #- run: | + # make bootstrap + # make release + + - name: Perform CodeQL Analysis + uses: github/codeql-action/analyze@v1 diff --git a/.gitignore b/.gitignore index ebb5ea6..6bc9168 100644 --- a/.gitignore +++ b/.gitignore @@ -1,13 +1,29 @@ - +# Python files *.pyc +*.pyo +__pycache__/ .cache +# Test files +.pytest_cache/ +temp/ +.tox +.coverage .testmondata +.benchmarks + +# Deb files +debian/files +debian/*debhelper* +debian/*substvars +# Build files +.eggs/ +*.egg-info/ build/ _build/ -deb_dist/ dist/ -*.tar.gz -*egg-info -MANIFEST +venv/ + +# Editors +.vscode diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..017ca86 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "JSON-Schema-Test-Suite"] + path = JSON-Schema-Test-Suite + url = https://github.com/json-schema-org/JSON-Schema-Test-Suite.git diff --git a/AUTHORS b/AUTHORS new file mode 100644 index 0000000..721bda3 --- /dev/null +++ b/AUTHORS @@ -0,0 +1,11 @@ +MAINTAINER +Michal Hořejšek + +CONTRIBUTORS +anentropic +Antti Jokipii +bcaller +Frederik Petersen +Guillaume Desvé +Kris Molendyke +David Majda diff --git a/CHANGELOG.txt b/CHANGELOG.txt new file mode 100644 index 0000000..b8e3038 --- /dev/null +++ b/CHANGELOG.txt @@ -0,0 +1,155 @@ +=== 2.14.5 (2020-08-17) + +* Fix missing dependencies +* Fix schema cache + + +=== 2.14.4 (2020-03-19) + +* Fix $id property +* Add extra properties to error message when additionalProperties are set to False +* Better exception message when referencing schema is not valid JSON + + +=== 2.14.3 (2020-02-27) + +* Tuple is also valid array + + +=== 2.14.2 (2019-12-11) + +* Fix of `additionalProperties=true` for JSON schema 4 +* Use decimal for multipleOf implementation and add respective tests +* Better escaping of definition names + + +=== 2.14.1 (2019-10-09) + +* Fix of undefined format exception message + + +=== 2.14.0 (2019-10-08) + +* Optimization: do not do the same type checks, keep it in one block if possible +* More context in JsonSchemaException (value, variable_name, variable_path, definition, rule and rule_definition) +* Possibility to pass custom formats +* Raise JsonSchemaDefinitionException when definition of property is not valid +* Fix of uniqueItems when used with other than array type +* Fix of date-time regexp (time zone is required by RFC 3339) + + +=== 2.13 (2019-06-10) + +* Resolved Python 3.7 warnings +* Updated JSON Schema test suites + * Fix of date-time regexp (allow small T and Z). + * Fix escaping (proper handling of \n, \r or " everywhere). + + +=== 2.12 (2019-05-24) + +* Fix of properties (local variable referenced before assignment). + + +=== 2.11 (2019-04-16) + +* Fix of additionalProperties (colliding variable names). + + +=== 2.10 (2019-04-15) + +* Fix pattern regexps with a space. + + +=== 2.9 (2019-03-04) + +* Use of urllib instead of requests for smaller memory usage. + + +=== 2.8 (2019-01-05) + +* Fix quotes in enum generating invalid code + + +=== 2.7 (2018-11-16) + +* Fix regexps for Python 3.7 + + +=== 2.6 (2018-11-01) + +* Swap $ in regexps to \Z to follow ECMA 262 ($ matches really the end of the string, not the end or new line and the end). Because of that your regular expressions have to escape dollar when used as regular character. + + +=== 2.5 (2018-10-22) + +* E-mail regex allows any e-mail with @ + + +=== 2.4 (2018-09-27) + +* Fix overriding variables (in pattern properties, property names, unique items and contains) +* Fix string in const +* Improve security: not generating code from any definition +* Added validate function for lazy programmers + + +=== 2.3 (2018-09-14) === + +* Fix regex of hostname + + +=== 2.2 (2018-09-12) === + +* Fix code generation with long regex patterns +* Fix regex of date-time (allow time without miliseconds) + + +=== 2.1 (2018-09-12) === + +* Fix code generation with regex patterns + + +=== 2.0 (2018-09-07) === + +* Support of draft-06 +* Support of draft-07 +* Code generation to a file + + +=== 1.6 (2018-06-21) === + +* Bugfixing + + +=== 1.5 (2018-06-20) === + +* Support of definitions +* Support of referencies + + +=== 1.4 (2018-06-11) === + +* Better date-time regex +* Support of dependencies + + +=== 1.3 (2018-04-25) === + +* Fix patter inside of anyOf + + +=== 1.2 (2018-04-24) === + +* Support of formats +* Support of properties + + +=== 1.1 (2017-01-03) === + +* Support of float numbers + + +=== 1.0 (2016-09-23) === + +* First version diff --git a/JSON-Schema-Test-Suite b/JSON-Schema-Test-Suite new file mode 160000 index 0000000..1bd999a --- /dev/null +++ b/JSON-Schema-Test-Suite @@ -0,0 +1 @@ +Subproject commit 1bd999ac16bd8d3fdb5c44ef13a0759aefb4ab73 diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..1d77bbf --- /dev/null +++ b/LICENSE @@ -0,0 +1,27 @@ +Copyright (c) 2018, Michal Horejsek +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + + Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + + Redistributions in binary form must reproduce the above copyright notice, this + list of conditions and the following disclaimer in the documentation and/or + other materials provided with the distribution. + + Neither the name of the {organization} nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..1aba38f --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1 @@ +include LICENSE diff --git a/Makefile b/Makefile index f65acca..26b6d1f 100644 --- a/Makefile +++ b/Makefile @@ -1,30 +1,74 @@ +.PHONY: all venv lint jsonschemasuitcases test test-lf benchmark benchmark-save performance printcode doc upload deb clean +SHELL=/bin/bash + +VENV_NAME?=venv +VENV_BIN=$(shell pwd)/${VENV_NAME}/bin + +PYTHON=${VENV_BIN}/python3 + all: - @echo "make install - Install on local system" @echo "make test - Run tests during development" @echo "make performance - Run performance test of this and other implementation" @echo "make doc - Make documentation" @echo "make clean - Get rid of scratch and byte files" -deb: - python3 setup.py --command-packages=stdeb.command bdist_deb +venv: $(VENV_NAME)/bin/activate +$(VENV_NAME)/bin/activate: setup.py + test -d $(VENV_NAME) || virtualenv -p python3 $(VENV_NAME) + # Some problem in latest version of setuptools during extracting translations. + ${PYTHON} -m pip install -U pip setuptools==39.1.0 + ${PYTHON} -m pip install -e .[devel] + touch $(VENV_NAME)/bin/activate + +lint: venv + ${PYTHON} -m pylint fastjsonschema -upload: - python3 setup.py register sdist upload +jsonschemasuitcases: + git submodule init + git submodule update -install: - python3 setup.py install +test: venv jsonschemasuitcases + ${PYTHON} -m pytest -W default --benchmark-skip +test-lf: venv jsonschemasuitcases + ${PYTHON} -m pytest -W default --benchmark-skip --last-failed -test: - python3 -m pytest tests +# Call make benchmark-save before change and then make benchmark to compare results. +benchmark: venv jsonschemasuitcases + ${PYTHON} -m pytest \ + -W default \ + --benchmark-only \ + --benchmark-sort=name \ + --benchmark-group-by=fullfunc \ + --benchmark-disable-gc \ + --benchmark-compare \ + --benchmark-compare-fail='min:5%' +benchmark-save: venv jsonschemasuitcases + ${PYTHON} -m pytest \ + -W default \ + --benchmark-only \ + --benchmark-sort=name \ + --benchmark-group-by=fullfunc \ + --benchmark-disable-gc \ + --benchmark-save=benchmark \ + --benchmark-save-data -performance: - python3 performance.py +performance: venv + ${PYTHON} performance.py + +printcode: venv + cat schema.json | python3 -m fastjsonschema doc: cd docs; make +upload: venv + ${PYTHON} setup.py register sdist bdist_wheel upload + +deb: venv + ${PYTHON} setup.py --command-packages=stdeb.command bdist_deb + clean: - python3 setup.py clean - find . -name '*.pyc' -delete + find . -name '*.pyc' -exec rm --force {} + + rm -rf $(VENV_NAME) *.eggs *.egg-info dist build docs/_build .mypy_cache .cache diff --git a/README.markdown b/README.markdown deleted file mode 100644 index 46b367b..0000000 --- a/README.markdown +++ /dev/null @@ -1,11 +0,0 @@ -# Fast JSON schema for Python - -## Install - -`pip install fastjsonschema` - -Support for Python 3.3 and higher. - -## Documentation - -Documentation: http://opensource.seznam.cz/python-fastjsonschema/ diff --git a/README.rst b/README.rst new file mode 100644 index 0000000..428a9dc --- /dev/null +++ b/README.rst @@ -0,0 +1,15 @@ +=========================== +Fast JSON schema for Python +=========================== + +|PyPI| |Pythons| + +.. |PyPI| image:: https://img.shields.io/pypi/v/fastjsonschema.svg + :alt: PyPI version + :target: https://pypi.python.org/pypi/fastjsonschema + +.. |Pythons| image:: https://img.shields.io/pypi/pyversions/fastjsonschema.svg + :alt: Supported Python versions + :target: https://pypi.python.org/pypi/fastjsonschema + +See `documentation `_. diff --git a/docs/Makefile b/docs/Makefile index fefaf1a..3ba9e91 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -8,5 +8,6 @@ SPHINXOPTS=-n -W -d $(BUILDDIR)/doctrees . sphinx: sphinx-build -b html $(SPHINXOPTS) $(BUILDDIR)/html +.PHONY: clean clean: rm -rf build diff --git a/docs/conf.py b/docs/conf.py index 8a8f693..a83394f 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -23,7 +23,7 @@ # General information about the project. project = u'fastjsonschema' -copyright = u'2016-{}, Seznam.cz'.format(time.strftime("%Y")) +copyright = u'2016-{}, Michal Horejsek'.format(time.strftime("%Y")) # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the diff --git a/docs/index.rst b/docs/index.rst index c47e933..40c5af9 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -1,15 +1,5 @@ -fastjsonschema documentation -############################ - -Installation -************ - -.. code-block:: bash - - pip install python-fastjsonschema - -Documentation -************* +Fast JSON schema for Python +########################### .. automodule:: fastjsonschema :members: diff --git a/fastjsonschema/__init__.py b/fastjsonschema/__init__.py index 1e2f45c..f7715b8 100644 --- a/fastjsonschema/__init__.py +++ b/fastjsonschema/__init__.py @@ -1,63 +1,115 @@ -""" -This project is there because commonly used JSON schema libraries in Python -are really slow which was problem at out project. Just let's see some numbers first: +# ___ +# \./ DANGER: This project implements some code generation +# .--.O.--. techniques involving string concatenation. +# \/ \/ If you look at it, you might die. +# + +r""" +Installation +************ + +.. code-block:: bash + + pip install fastjsonschema + +Support only for Python 3.3 and higher. + +About +***** + +``fastjsonschema`` implements validation of JSON documents by JSON schema. +The library implements JSON schema drafts 04, 06 and 07. The main purpose is +to have a really fast implementation. See some numbers: - * Probalby most popular ``jsonschema`` can take in tests up to 11 seconds for valid inputs - and 2.5 seconds for invalid inputs. - * Secondly most popular ``json-spec`` is even worse with up to 12 and 3 seconds. - * Lastly ``validictory`` is much better with 800 or 50 miliseconds, but it does not - follow all standards and it can be still slow for some purposes. + * Probably most popular ``jsonschema`` can take up to 5 seconds for valid inputs + and 1.2 seconds for invalid inputs. + * Second most popular ``json-spec`` is even worse with up to 7.2 and 1.7 seconds. + * Last ``validictory``, now deprecated, is much better with 370 or 23 milliseconds, + but it does not follow all standards and it can be still slow for some purposes. -That's why there is this project which compiles definition into Python most stupid code -which people would had hard time to write by themselfs because of not-written-rule DRY -(don't repeat yourself). When you compile definition, then times are 60 miliseconds for -valid inputs and 3 miliseconds for invalid inputs. Pretty amazing, right? :-) +With this library you can gain big improvements as ``fastjsonschema`` takes +only about 25 milliseconds for valid inputs and 2 milliseconds for invalid ones. +Pretty amazing, right? :-) -You can try it for yourself with included script: +Technically it works by generating the most stupid code on the fly which is fast but +is hard to write by hand. The best efficiency is achieved when compiled once and used +many times, of course. It works similarly like regular expressions. But you can also +generate the code to the file which is even slightly faster. + +You can do the performance on your computer or server with an included script: .. code-block:: bash $ make performance - fast_compiled valid ==> 0.06058199889957905 - fast_compiled invalid ==> 0.0028909146785736084 - fast_not_compiled valid ==> 7.054106639698148 - fast_not_compiled invalid ==> 1.6773221027106047 - jsonschema valid ==> 11.189393147826195 - jsonschema invalid ==> 2.642645660787821 - jsonspec valid ==> 11.942349303513765 - jsonspec invalid ==> 2.9887414034456015 - validictory valid ==> 0.7500483158230782 - validictory invalid ==> 0.03606216423213482 - -This library follows and implements `JSON schema `_. Sometimes -it's not perfectly clear so I recommend also check out this `understaning json schema -`_. + fast_compiled valid ==> 0.0464646 + fast_compiled invalid ==> 0.0030227 + fast_file valid ==> 0.0461219 + fast_file invalid ==> 0.0030608 + fast_not_compiled valid ==> 11.4627202 + fast_not_compiled invalid ==> 2.5726230 + jsonschema valid ==> 7.5844927 + jsonschema invalid ==> 1.9204665 + jsonschema_compiled valid ==> 0.6938364 + jsonschema_compiled invalid ==> 0.0359244 + jsonspec valid ==> 9.0715843 + jsonspec invalid ==> 2.1650488 + validictory valid ==> 0.4874793 + validictory invalid ==> 0.0232244 + +This library follows and implements `JSON schema draft-04, draft-06, and draft-07 +`_. Sometimes it's not perfectly clear so I recommend also +check out this `understanding JSON schema `_. Note that there are some differences compared to JSON schema standard: - * ``dependency`` for objects are not implemented yet. Future implementation will not change speed. - * ``patternProperty`` for objects are not implemented yet. Future implementation can little bit - slow down validation of object properties. Of course only for those who uses ``properties``. - * ``definitions`` for sharing JSON schema are not implemented yet. Future implementation will - not change speed. - * Regular expressions are full what Python provides, not only what JSON schema allows. It's easier + * Regular expressions are full Python ones, not only what JSON schema allows. It's easier to allow everything and also it's faster to compile without limits. So keep in mind that when - you will use more advanced regular expression, it may not work with other library. + you will use a more advanced regular expression, it may not work with other library or in + other languages. + * Because Python matches new line for a dollar in regular expressions (``a$`` matches ``a`` and ``a\\n``), + instead of ``$`` is used ``\Z`` and all dollars in your regular expression are changed to ``\\Z`` + as well. When you want to use dollar as regular character, you have to escape it (``\$``). * JSON schema says you can use keyword ``default`` for providing default values. This implementation uses that and always returns transformed input data. -Support only for Python 3.3 and higher. +API +*** """ -from .exceptions import JsonSchemaException -from .generator import CodeGenerator +from .draft04 import CodeGeneratorDraft04 +from .draft06 import CodeGeneratorDraft06 +from .draft07 import CodeGeneratorDraft07 +from .exceptions import JsonSchemaException, JsonSchemaDefinitionException +from .ref_resolver import RefResolver +from .version import VERSION + +__all__ = ('VERSION', 'JsonSchemaException', 'JsonSchemaDefinitionException', 'validate', 'compile', 'compile_to_code') -__all__ = ('JsonSchemaException', 'compile') +def validate(definition, data, handlers={}, formats={}): + """ + Validation function for lazy programmers or for use cases, when you need + to call validation only once, so you do not have to compile it first. + Use it only when you do not care about performance (even thought it will + be still faster than alternative implementations). + + .. code-block:: python + + import fastjsonschema + + fastjsonschema.validate({'type': 'string'}, 'hello') + # same as: compile({'type': 'string'})('hello') -def compile(definition): + Preferred is to use :any:`compile` function. """ - Generates validation function for validating JSON schema by ``definition``. Example: + return compile(definition, handlers, formats)(data) + + +# pylint: disable=redefined-builtin,dangerous-default-value,exec-used +def compile(definition, handlers={}, formats={}): + """ + Generates validation function for validating JSON schema passed in ``definition``. + Example: .. code-block:: python @@ -80,9 +132,88 @@ def compile(definition): data = validate({}) assert data == {'a': 42} - Exception :any:`JsonSchemaException` is thrown when validation fails. + Supported implementations are draft-04, draft-06 and draft-07. Which version + should be used is determined by `$draft` in your ``definition``. When not + specified, the latest implementation is used (draft-07). + + .. code-block:: python + + validate = fastjsonschema.compile({ + '$schema': 'http://json-schema.org/draft-04/schema', + 'type': 'number', + }) + + You can pass mapping from URI to function that should be used to retrieve + remote schemes used in your ``definition`` in parameter ``handlers``. + + Also, you can pass mapping for custom formats. Key is the name of your + formatter and value can be regular expression which will be compiled or + callback returning `bool` (or you can raise your own exception). + + .. code-block:: python + + validate = fastjsonschema.compile(definition, formats={ + 'foo': r'foo|bar', + 'bar': lambda value: value in ('foo', 'bar'), + }) + + Exception :any:`JsonSchemaDefinitionException` is raised when generating the + code fails (bad definition). + + Exception :any:`JsonSchemaException` is raised from generated funtion when + validation fails (data do not follow the definition). + """ + resolver, code_generator = _factory(definition, handlers, formats) + global_state = code_generator.global_state + # Do not pass local state so it can recursively call itself. + exec(code_generator.func_code, global_state) + return global_state[resolver.get_scope_name()] + + +# pylint: disable=dangerous-default-value +def compile_to_code(definition, handlers={}, formats={}): + """ + Generates validation code for validating JSON schema passed in ``definition``. + Example: + + .. code-block:: python + + import fastjsonschema + + code = fastjsonschema.compile_to_code({'type': 'string'}) + with open('your_file.py', 'w') as f: + f.write(code) + + You can also use it as a script: + + .. code-block:: bash + + echo "{'type': 'string'}" | python3 -m fastjsonschema > your_file.py + python3 -m fastjsonschema "{'type': 'string'}" > your_file.py + + Exception :any:`JsonSchemaDefinitionException` is raised when generating the + code fails (bad definition). """ - code_generator = CodeGenerator(definition) - local_state = {} - exec(code_generator.func_code, code_generator.global_state, local_state) - return local_state['func'] + _, code_generator = _factory(definition, handlers, formats) + return ( + 'VERSION = "' + VERSION + '"\n' + + code_generator.global_state_code + '\n' + + code_generator.func_code + ) + + +def _factory(definition, handlers, formats={}): + resolver = RefResolver.from_schema(definition, handlers=handlers) + code_generator = _get_code_generator_class(definition)(definition, resolver=resolver, formats=formats) + return resolver, code_generator + + +def _get_code_generator_class(schema): + # Schema in from draft-06 can be just the boolean value. + if isinstance(schema, dict): + schema_version = schema.get('$schema', '') + if 'draft-04' in schema_version: + return CodeGeneratorDraft04 + if 'draft-06' in schema_version: + return CodeGeneratorDraft06 + return CodeGeneratorDraft07 diff --git a/fastjsonschema/__main__.py b/fastjsonschema/__main__.py new file mode 100644 index 0000000..e5f3aa7 --- /dev/null +++ b/fastjsonschema/__main__.py @@ -0,0 +1,19 @@ +import json +import sys + +from . import compile_to_code + + +def main(): + if len(sys.argv) == 2: + definition = sys.argv[1] + else: + definition = sys.stdin.read() + + definition = json.loads(definition) + code = compile_to_code(definition) + print(code) + + +if __name__ == '__main__': + main() diff --git a/fastjsonschema/draft04.py b/fastjsonschema/draft04.py new file mode 100644 index 0000000..bfa8049 --- /dev/null +++ b/fastjsonschema/draft04.py @@ -0,0 +1,579 @@ +import decimal +import re + +from .exceptions import JsonSchemaDefinitionException +from .generator import CodeGenerator, enforce_list + + +JSON_TYPE_TO_PYTHON_TYPE = { + 'null': 'NoneType', + 'boolean': 'bool', + 'number': 'int, float, Decimal', + 'integer': 'int', + 'string': 'str', + 'array': 'list, tuple', + 'object': 'dict', +} + +DOLLAR_FINDER = re.compile(r"(? {maxLength}:'): + self.exc('{name} must be shorter than or equal to {maxLength} characters', rule='maxLength') + + def generate_pattern(self): + with self.l('if isinstance({variable}, str):'): + pattern = self._definition['pattern'] + safe_pattern = pattern.replace('\\', '\\\\').replace('"', '\\"') + end_of_string_fixed_pattern = DOLLAR_FINDER.sub(r'\\Z', pattern) + self._compile_regexps[pattern] = re.compile(end_of_string_fixed_pattern) + with self.l('if not REGEX_PATTERNS[{}].search({variable}):', repr(pattern)): + self.exc('{name} must match pattern {}', safe_pattern, rule='pattern') + + def generate_format(self): + """ + Means that value have to be in specified format. For example date, email or other. + + .. code-block:: python + + {'format': 'email'} + + Valid value for this definition is user@example.com but not @username + """ + with self.l('if isinstance({variable}, str):'): + format_ = self._definition['format'] + # Checking custom formats - user is allowed to override default formats. + if format_ in self._custom_formats: + custom_format = self._custom_formats[format_] + if isinstance(custom_format, str): + self._generate_format(format_, format_ + '_re_pattern', custom_format) + else: + with self.l('if not custom_formats["{}"]({variable}):', format_): + self.exc('{name} must be {}', format_, rule='format') + elif format_ in self.FORMAT_REGEXS: + format_regex = self.FORMAT_REGEXS[format_] + self._generate_format(format_, format_ + '_re_pattern', format_regex) + # Format regex is used only in meta schemas. + elif format_ == 'regex': + with self.l('try:', optimize=False): + self.l('re.compile({variable})') + with self.l('except Exception:'): + self.exc('{name} must be a valid regex', rule='format') + else: + raise JsonSchemaDefinitionException('Unknown format: {}'.format(format_)) + + + def _generate_format(self, format_name, regexp_name, regexp): + if self._definition['format'] == format_name: + if not regexp_name in self._compile_regexps: + self._compile_regexps[regexp_name] = re.compile(regexp) + with self.l('if not REGEX_PATTERNS["{}"].match({variable}):', regexp_name): + self.exc('{name} must be {}', format_name, rule='format') + + def generate_minimum(self): + with self.l('if isinstance({variable}, (int, float)):'): + if not isinstance(self._definition['minimum'], (int, float)): + raise JsonSchemaDefinitionException('minimum must be a number') + if self._definition.get('exclusiveMinimum', False): + with self.l('if {variable} <= {minimum}:'): + self.exc('{name} must be bigger than {minimum}', rule='minimum') + else: + with self.l('if {variable} < {minimum}:'): + self.exc('{name} must be bigger than or equal to {minimum}', rule='minimum') + + def generate_maximum(self): + with self.l('if isinstance({variable}, (int, float)):'): + if not isinstance(self._definition['maximum'], (int, float)): + raise JsonSchemaDefinitionException('maximum must be a number') + if self._definition.get('exclusiveMaximum', False): + with self.l('if {variable} >= {maximum}:'): + self.exc('{name} must be smaller than {maximum}', rule='maximum') + else: + with self.l('if {variable} > {maximum}:'): + self.exc('{name} must be smaller than or equal to {maximum}', rule='maximum') + + def generate_multiple_of(self): + with self.l('if isinstance({variable}, (int, float)):'): + if not isinstance(self._definition['multipleOf'], (int, float)): + raise JsonSchemaDefinitionException('multipleOf must be a number') + # For proper multiplication check of floats we need to use decimals, + # because for example 19.01 / 0.01 = 1901.0000000000002. + if isinstance(self._definition['multipleOf'], float): + self._extra_imports_lines.append('from decimal import Decimal') + self._extra_imports_objects['Decimal'] = decimal.Decimal + self.l('quotient = Decimal(repr({variable})) / Decimal(repr({multipleOf}))') + else: + self.l('quotient = {variable} / {multipleOf}') + with self.l('if int(quotient) != quotient:'): + self.exc('{name} must be multiple of {multipleOf}', rule='multipleOf') + + def generate_min_items(self): + self.create_variable_is_list() + with self.l('if {variable}_is_list:'): + if not isinstance(self._definition['minItems'], int): + raise JsonSchemaDefinitionException('minItems must be a number') + self.create_variable_with_length() + with self.l('if {variable}_len < {minItems}:'): + self.exc('{name} must contain at least {minItems} items', rule='minItems') + + def generate_max_items(self): + self.create_variable_is_list() + with self.l('if {variable}_is_list:'): + if not isinstance(self._definition['maxItems'], int): + raise JsonSchemaDefinitionException('maxItems must be a number') + self.create_variable_with_length() + with self.l('if {variable}_len > {maxItems}:'): + self.exc('{name} must contain less than or equal to {maxItems} items', rule='maxItems') + + def generate_unique_items(self): + """ + With Python 3.4 module ``timeit`` recommended this solutions: + + .. code-block:: python + + >>> timeit.timeit("len(x) > len(set(x))", "x=range(100)+range(100)", number=100000) + 0.5839540958404541 + >>> timeit.timeit("len({}.fromkeys(x)) == len(x)", "x=range(100)+range(100)", number=100000) + 0.7094449996948242 + >>> timeit.timeit("seen = set(); any(i in seen or seen.add(i) for i in x)", "x=range(100)+range(100)", number=100000) + 2.0819358825683594 + >>> timeit.timeit("np.unique(x).size == len(x)", "x=range(100)+range(100); import numpy as np", number=100000) + 2.1439831256866455 + """ + self.create_variable_is_list() + with self.l('if {variable}_is_list:'): + self.create_variable_with_length() + with self.l('if {variable}_len > len(set(str({variable}_x) for {variable}_x in {variable})):'): + self.exc('{name} must contain unique items', rule='uniqueItems') + + def generate_items(self): + """ + Means array is valid only when all items are valid by this definition. + + .. code-block:: python + + { + 'items': [ + {'type': 'integer'}, + {'type': 'string'}, + ], + } + + Valid arrays are those with integers or strings, nothing else. + + Since draft 06 definition can be also boolean. True means nothing, False + means everything is invalid. + """ + items_definition = self._definition['items'] + if items_definition is True: + return + + self.create_variable_is_list() + with self.l('if {variable}_is_list:'): + self.create_variable_with_length() + if items_definition is False: + with self.l('if {variable}:'): + self.exc('{name} must not be there', rule='items') + elif isinstance(items_definition, list): + for idx, item_definition in enumerate(items_definition): + with self.l('if {variable}_len > {}:', idx): + self.l('{variable}__{0} = {variable}[{0}]', idx) + self.generate_func_code_block( + item_definition, + '{}__{}'.format(self._variable, idx), + '{}[{}]'.format(self._variable_name, idx), + ) + if isinstance(item_definition, dict) and 'default' in item_definition: + self.l('else: {variable}.append({})', repr(item_definition['default'])) + + if 'additionalItems' in self._definition: + if self._definition['additionalItems'] is False: + with self.l('if {variable}_len > {}:', len(items_definition)): + self.exc('{name} must contain only specified items', rule='items') + else: + with self.l('for {variable}_x, {variable}_item in enumerate({variable}[{0}:], {0}):', len(items_definition)): + self.generate_func_code_block( + self._definition['additionalItems'], + '{}_item'.format(self._variable), + '{}[{{{}_x}}]'.format(self._variable_name, self._variable), + ) + else: + if items_definition: + with self.l('for {variable}_x, {variable}_item in enumerate({variable}):'): + self.generate_func_code_block( + items_definition, + '{}_item'.format(self._variable), + '{}[{{{}_x}}]'.format(self._variable_name, self._variable), + ) + + def generate_min_properties(self): + self.create_variable_is_dict() + with self.l('if {variable}_is_dict:'): + if not isinstance(self._definition['minProperties'], int): + raise JsonSchemaDefinitionException('minProperties must be a number') + self.create_variable_with_length() + with self.l('if {variable}_len < {minProperties}:'): + self.exc('{name} must contain at least {minProperties} properties', rule='minProperties') + + def generate_max_properties(self): + self.create_variable_is_dict() + with self.l('if {variable}_is_dict:'): + if not isinstance(self._definition['maxProperties'], int): + raise JsonSchemaDefinitionException('maxProperties must be a number') + self.create_variable_with_length() + with self.l('if {variable}_len > {maxProperties}:'): + self.exc('{name} must contain less than or equal to {maxProperties} properties', rule='maxProperties') + + def generate_required(self): + self.create_variable_is_dict() + with self.l('if {variable}_is_dict:'): + if not isinstance(self._definition['required'], (list, tuple)): + raise JsonSchemaDefinitionException('required must be an array') + self.create_variable_with_length() + with self.l('if not all(prop in {variable} for prop in {required}):'): + self.exc('{name} must contain {} properties', self.e(self._definition['required']), rule='required') + + def generate_properties(self): + """ + Means object with defined keys. + + .. code-block:: python + + { + 'properties': { + 'key': {'type': 'number'}, + }, + } + + Valid object is containing key called 'key' and value any number. + """ + self.create_variable_is_dict() + with self.l('if {variable}_is_dict:'): + self.create_variable_keys() + for key, prop_definition in self._definition['properties'].items(): + key_name = re.sub(r'($[^a-zA-Z]|[^a-zA-Z0-9])', '', key) + if not isinstance(prop_definition, (dict, bool)): + raise JsonSchemaDefinitionException('{}[{}] must be object'.format(self._variable, key_name)) + with self.l('if "{}" in {variable}_keys:', self.e(key)): + self.l('{variable}_keys.remove("{}")', self.e(key)) + self.l('{variable}__{0} = {variable}["{1}"]', key_name, self.e(key)) + self.generate_func_code_block( + prop_definition, + '{}__{}'.format(self._variable, key_name), + '{}.{}'.format(self._variable_name, self.e(key)), + clear_variables=True, + ) + if isinstance(prop_definition, dict) and 'default' in prop_definition: + self.l('else: {variable}["{}"] = {}', self.e(key), repr(prop_definition['default'])) + + def generate_pattern_properties(self): + """ + Means object with defined keys as patterns. + + .. code-block:: python + + { + 'patternProperties': { + '^x': {'type': 'number'}, + }, + } + + Valid object is containing key starting with a 'x' and value any number. + """ + self.create_variable_is_dict() + with self.l('if {variable}_is_dict:'): + self.create_variable_keys() + for pattern, definition in self._definition['patternProperties'].items(): + self._compile_regexps[pattern] = re.compile(pattern) + with self.l('for {variable}_key, {variable}_val in {variable}.items():'): + for pattern, definition in self._definition['patternProperties'].items(): + with self.l('if REGEX_PATTERNS[{}].search({variable}_key):', repr(pattern)): + with self.l('if {variable}_key in {variable}_keys:'): + self.l('{variable}_keys.remove({variable}_key)') + self.generate_func_code_block( + definition, + '{}_val'.format(self._variable), + '{}.{{{}_key}}'.format(self._variable_name, self._variable), + clear_variables=True, + ) + + def generate_additional_properties(self): + """ + Means object with keys with values defined by definition. + + .. code-block:: python + + { + 'properties': { + 'key': {'type': 'number'}, + } + 'additionalProperties': {'type': 'string'}, + } + + Valid object is containing key called 'key' and it's value any number and + any other key with any string. + """ + self.create_variable_is_dict() + with self.l('if {variable}_is_dict:'): + self.create_variable_keys() + add_prop_definition = self._definition["additionalProperties"] + if add_prop_definition == True: + return + elif add_prop_definition: + properties_keys = list(self._definition.get("properties", {}).keys()) + with self.l('for {variable}_key in {variable}_keys:'): + with self.l('if {variable}_key not in {}:', properties_keys): + self.l('{variable}_value = {variable}.get({variable}_key)') + self.generate_func_code_block( + add_prop_definition, + '{}_value'.format(self._variable), + '{}.{{{}_key}}'.format(self._variable_name, self._variable), + ) + else: + with self.l('if {variable}_keys:'): + self.exc('{name} must not contain "+str({variable}_keys)+" properties', rule='additionalProperties') + + def generate_dependencies(self): + """ + Means when object has property, it needs to have also other property. + + .. code-block:: python + + { + 'dependencies': { + 'bar': ['foo'], + }, + } + + Valid object is containing only foo, both bar and foo or none of them, but not + object with only bar. + + Since draft 06 definition can be boolean or empty array. True and empty array + means nothing, False means that key cannot be there at all. + """ + self.create_variable_is_dict() + with self.l('if {variable}_is_dict:'): + isEmpty = True + for key, values in self._definition["dependencies"].items(): + if values == [] or values is True: + continue + isEmpty = False + with self.l('if "{}" in {variable}:', self.e(key)): + if values is False: + self.exc('{} in {name} must not be there', key, rule='dependencies') + elif isinstance(values, list): + for value in values: + with self.l('if "{}" not in {variable}:', self.e(value)): + self.exc('{name} missing dependency {} for {}', self.e(value), self.e(key), rule='dependencies') + else: + self.generate_func_code_block(values, self._variable, self._variable_name, clear_variables=True) + if isEmpty: + self.l('pass') diff --git a/fastjsonschema/draft06.py b/fastjsonschema/draft06.py new file mode 100644 index 0000000..adbf655 --- /dev/null +++ b/fastjsonschema/draft06.py @@ -0,0 +1,185 @@ +from .draft04 import CodeGeneratorDraft04, JSON_TYPE_TO_PYTHON_TYPE +from .exceptions import JsonSchemaDefinitionException +from .generator import enforce_list + + +class CodeGeneratorDraft06(CodeGeneratorDraft04): + FORMAT_REGEXS = dict(CodeGeneratorDraft04.FORMAT_REGEXS, **{ + 'json-pointer': r'^(/(([^/~])|(~[01]))*)*\Z', + 'uri-reference': r'^(\w+:(\/?\/?))?[^#\\\s]*(#[^\\\s]*)?\Z', + 'uri-template': ( + r'^(?:(?:[^\x00-\x20\"\'<>%\\^`{|}]|%[0-9a-f]{2})|' + r'\{[+#./;?&=,!@|]?(?:[a-z0-9_]|%[0-9a-f]{2})+' + r'(?::[1-9][0-9]{0,3}|\*)?(?:,(?:[a-z0-9_]|%[0-9a-f]{2})+' + r'(?::[1-9][0-9]{0,3}|\*)?)*\})*\Z' + ), + }) + + def __init__(self, definition, resolver=None, formats={}): + super().__init__(definition, resolver, formats) + self._json_keywords_to_function.update(( + ('exclusiveMinimum', self.generate_exclusive_minimum), + ('exclusiveMaximum', self.generate_exclusive_maximum), + ('propertyNames', self.generate_property_names), + ('contains', self.generate_contains), + ('const', self.generate_const), + )) + + def _generate_func_code_block(self, definition): + if isinstance(definition, bool): + self.generate_boolean_schema() + elif '$ref' in definition: + # needed because ref overrides any sibling keywords + self.generate_ref() + else: + self.run_generate_functions(definition) + + def generate_boolean_schema(self): + """ + Means that schema can be specified by boolean. + True means everything is valid, False everything is invalid. + """ + if self._definition is False: + self.exc('{name} must not be there') + + def generate_type(self): + """ + Validation of type. Can be one type or list of types. + + Since draft 06 a float without fractional part is an integer. + + .. code-block:: python + + {'type': 'string'} + {'type': ['string', 'number']} + """ + types = enforce_list(self._definition['type']) + try: + python_types = ', '.join(JSON_TYPE_TO_PYTHON_TYPE[t] for t in types) + except KeyError as exc: + raise JsonSchemaDefinitionException('Unknown type: {}'.format(exc)) + + extra = '' + + if 'integer' in types: + extra += ' and not (isinstance({variable}, float) and {variable}.is_integer())'.format( + variable=self._variable, + ) + + if ('number' in types or 'integer' in types) and 'boolean' not in types: + extra += ' or isinstance({variable}, bool)'.format(variable=self._variable) + + with self.l('if not isinstance({variable}, ({})){}:', python_types, extra): + self.exc('{name} must be {}', ' or '.join(types), rule='type') + + def generate_exclusive_minimum(self): + with self.l('if isinstance({variable}, (int, float)):'): + if not isinstance(self._definition['exclusiveMinimum'], (int, float)): + raise JsonSchemaDefinitionException('exclusiveMinimum must be an integer or a float') + with self.l('if {variable} <= {exclusiveMinimum}:'): + self.exc('{name} must be bigger than {exclusiveMinimum}', rule='exclusiveMinimum') + + def generate_exclusive_maximum(self): + with self.l('if isinstance({variable}, (int, float)):'): + if not isinstance(self._definition['exclusiveMaximum'], (int, float)): + raise JsonSchemaDefinitionException('exclusiveMaximum must be an integer or a float') + with self.l('if {variable} >= {exclusiveMaximum}:'): + self.exc('{name} must be smaller than {exclusiveMaximum}', rule='exclusiveMaximum') + + def generate_property_names(self): + """ + Means that keys of object must to follow this definition. + + .. code-block:: python + + { + 'propertyNames': { + 'maxLength': 3, + }, + } + + Valid keys of object for this definition are foo, bar, ... but not foobar for example. + """ + property_names_definition = self._definition.get('propertyNames', {}) + if property_names_definition is True: + pass + elif property_names_definition is False: + self.create_variable_keys() + with self.l('if {variable}_keys:'): + self.exc('{name} must not be there', rule='propertyNames') + else: + self.create_variable_is_dict() + with self.l('if {variable}_is_dict:'): + self.create_variable_with_length() + with self.l('if {variable}_len != 0:'): + self.l('{variable}_property_names = True') + with self.l('for {variable}_key in {variable}:'): + with self.l('try:'): + self.generate_func_code_block( + property_names_definition, + '{}_key'.format(self._variable), + self._variable_name, + clear_variables=True, + ) + with self.l('except JsonSchemaException:'): + self.l('{variable}_property_names = False') + with self.l('if not {variable}_property_names:'): + self.exc('{name} must be named by propertyName definition', rule='propertyNames') + + def generate_contains(self): + """ + Means that array must contain at least one defined item. + + .. code-block:: python + + { + 'contains': { + 'type': 'number', + }, + } + + Valid array is any with at least one number. + """ + self.create_variable_is_list() + with self.l('if {variable}_is_list:'): + contains_definition = self._definition['contains'] + + if contains_definition is False: + self.exc('{name} is always invalid', rule='contains') + elif contains_definition is True: + with self.l('if not {variable}:'): + self.exc('{name} must not be empty', rule='contains') + else: + self.l('{variable}_contains = False') + with self.l('for {variable}_key in {variable}:'): + with self.l('try:'): + self.generate_func_code_block( + contains_definition, + '{}_key'.format(self._variable), + self._variable_name, + clear_variables=True, + ) + self.l('{variable}_contains = True') + self.l('break') + self.l('except JsonSchemaException: pass') + + with self.l('if not {variable}_contains:'): + self.exc('{name} must contain one of contains definition', rule='contains') + + def generate_const(self): + """ + Means that value is valid when is equeal to const definition. + + .. code-block:: python + + { + 'const': 42, + } + + Only valid value is 42 in this example. + """ + const = self._definition['const'] + if isinstance(const, str): + const = '"{}"'.format(const) + with self.l('if {variable} != {}:', const): + self.exc('{name} must be same as const definition', rule='const') diff --git a/fastjsonschema/draft07.py b/fastjsonschema/draft07.py new file mode 100644 index 0000000..8728a8c --- /dev/null +++ b/fastjsonschema/draft07.py @@ -0,0 +1,116 @@ +from .draft06 import CodeGeneratorDraft06 + + +class CodeGeneratorDraft07(CodeGeneratorDraft06): + FORMAT_REGEXS = dict(CodeGeneratorDraft06.FORMAT_REGEXS, **{ + 'date': r'^(?P\d{4})-(?P\d{1,2})-(?P\d{1,2})\Z', + 'iri': r'^\w+:(\/?\/?)[^\s]+\Z', + 'iri-reference': r'^(\w+:(\/?\/?))?[^#\\\s]*(#[^\\\s]*)?\Z', + 'idn-email': r'^[^@]+@[^@]+\.[^@]+\Z', + #'idn-hostname': r'', + 'relative-json-pointer': r'^(?:0|[1-9][0-9]*)(?:#|(?:\/(?:[^~/]|~0|~1)*)*)\Z', + #'regex': r'', + 'time': ( + r'^(?P\d{1,2}):(?P\d{1,2})' + r'(?::(?P\d{1,2})(?:\.(?P\d{1,6}))?' + r'([zZ]|[+-]\d\d:\d\d)?)?\Z' + ), + }) + + def __init__(self, definition, resolver=None, formats={}): + super().__init__(definition, resolver, formats) + # pylint: disable=duplicate-code + self._json_keywords_to_function.update(( + ('if', self.generate_if_then_else), + ('contentEncoding', self.generate_content_encoding), + ('contentMediaType', self.generate_content_media_type), + )) + + def generate_if_then_else(self): + """ + Implementation of if-then-else. + + .. code-block:: python + + { + 'if': { + 'exclusiveMaximum': 0, + }, + 'then': { + 'minimum': -10, + }, + 'else': { + 'multipleOf': 2, + }, + } + + Valid values are any between -10 and 0 or any multiplication of two. + """ + with self.l('try:'): + self.generate_func_code_block( + self._definition['if'], + self._variable, + self._variable_name, + clear_variables=True + ) + with self.l('except JsonSchemaException:'): + if 'else' in self._definition: + self.generate_func_code_block( + self._definition['else'], + self._variable, + self._variable_name, + clear_variables=True + ) + else: + self.l('pass') + if 'then' in self._definition: + with self.l('else:'): + self.generate_func_code_block( + self._definition['then'], + self._variable, + self._variable_name, + clear_variables=True + ) + + def generate_content_encoding(self): + """ + Means decoding value when it's encoded by base64. + + .. code-block:: python + + { + 'contentEncoding': 'base64', + } + """ + if self._definition['contentEncoding'] == 'base64': + with self.l('if isinstance({variable}, str):'): + with self.l('try:'): + self.l('import base64') + self.l('{variable} = base64.b64decode({variable})') + with self.l('except Exception:'): + self.exc('{name} must be encoded by base64') + with self.l('if {variable} == "":'): + self.exc('contentEncoding must be base64') + + def generate_content_media_type(self): + """ + Means loading value when it's specified as JSON. + + .. code-block:: python + + { + 'contentMediaType': 'application/json', + } + """ + if self._definition['contentMediaType'] == 'application/json': + with self.l('if isinstance({variable}, bytes):'): + with self.l('try:'): + self.l('{variable} = {variable}.decode("utf-8")') + with self.l('except Exception:'): + self.exc('{name} must encoded by utf8') + with self.l('if isinstance({variable}, str):'): + with self.l('try:'): + self.l('import json') + self.l('{variable} = json.loads({variable})') + with self.l('except Exception:'): + self.exc('{name} must be valid JSON') diff --git a/fastjsonschema/exceptions.py b/fastjsonschema/exceptions.py index d950798..b555356 100644 --- a/fastjsonschema/exceptions.py +++ b/fastjsonschema/exceptions.py @@ -1,9 +1,45 @@ +import re + + +SPLIT_RE = re.compile(r'[\.\[\]]+') + class JsonSchemaException(ValueError): """ - Exception raised by validation function. Contains ``message`` with - information what is wrong. + Exception raised by validation function. Available properties: + + * ``message`` containing human-readable information what is wrong (e.g. ``data.property[index] must be smaller than or equal to 42``), + * invalid ``value`` (e.g. ``60``), + * ``name`` of a path in the data structure (e.g. ``data.propery[index]``), + * ``path`` as an array in the data structure (e.g. ``['data', 'propery', 'index']``), + * the whole ``definition`` which the ``value`` has to fulfil (e.g. ``{'type': 'number', 'maximum': 42}``), + * ``rule`` which the ``value`` is breaking (e.g. ``maximum``) + * and ``rule_definition`` (e.g. ``42``). + + .. versionchanged:: 2.14.0 + Added all extra properties. """ - def __init__(self, message): + def __init__(self, message, value=None, name=None, definition=None, rule=None): + super().__init__(message) self.message = message + self.value = value + self.name = name + self.definition = definition + self.rule = rule + + @property + def path(self): + return [item for item in SPLIT_RE.split(self.name) if item != ''] + + @property + def rule_definition(self): + if not self.rule or not self.definition: + return None + return self.definition.get(self.rule) + + +class JsonSchemaDefinitionException(JsonSchemaException): + """ + Exception raised by generator of validation function. + """ diff --git a/fastjsonschema/generator.py b/fastjsonschema/generator.py index 3d5031b..17cebd6 100644 --- a/fastjsonschema/generator.py +++ b/fastjsonschema/generator.py @@ -1,16 +1,9 @@ - -# ___ -# \./ DANGER: This module implements some code generation -# .--.O.--. techniques involving string concatenation. -# \/ \/ If you look at it, you might die. -# - from collections import OrderedDict -import decimal import re -from .exceptions import JsonSchemaException +from .exceptions import JsonSchemaException, JsonSchemaDefinitionException from .indent import indent +from .ref_resolver import RefResolver def enforce_list(variable): @@ -19,6 +12,7 @@ def enforce_list(variable): return [variable] +# pylint: disable=too-many-instance-attributes,too-many-public-methods class CodeGenerator: """ This class is not supposed to be used directly. Anything @@ -34,56 +28,46 @@ class CodeGenerator: INDENT = 4 # spaces - JSON_TYPE_TO_PYTHON_TYPE = { - 'null': 'NoneType', - 'boolean': 'bool', - 'number': 'int, float, Decimal', - 'integer': 'int', - 'string': 'str, unicode', - 'array': 'list', - 'object': 'dict', - } - - def __init__(self, definition): + def __init__(self, definition, resolver=None): self._code = [] self._compile_regexps = {} + # Any extra library should be here to be imported only once. + # Lines are imports to be printed in the file and objects + # key-value pair to pass to compile function directly. + self._extra_imports_lines = [] + self._extra_imports_objects = {} + self._variables = set() self._indent = 0 + self._indent_last_line = None self._variable = None self._variable_name = None + self._root_definition = definition self._definition = None - self._json_keywords_to_function = OrderedDict(( - ('type', self.generate_type), - ('enum', self.generate_enum), - ('allOf', self.generate_all_of), - ('anyOf', self.generate_any_of), - ('oneOf', self.generate_one_of), - ('not', self.generate_not), - ('minLength', self.generate_min_length), - ('maxLength', self.generate_max_length), - ('pattern', self.generate_pattern), - ('minimum', self.generate_minimum), - ('maximum', self.generate_maximum), - ('multipleOf', self.generate_multiple_of), - ('minItems', self.generate_min_items), - ('maxItems', self.generate_max_items), - ('uniqueItems', self.generate_unique_items), - ('items', self.generate_items), - ('minProperties', self.generate_min_properties), - ('maxProperties', self.generate_max_properties), - ('required', self.generate_required), - ('properties', self.generate_properties), - )) - - self.generate_func_code(definition) + # map schema URIs to validation function names for functions + # that are not yet generated, but need to be generated + self._needed_validation_functions = {} + # validation function names that are already done + self._validation_functions_done = set() + + if resolver is None: + resolver = RefResolver.from_schema(definition) + self._resolver = resolver + + # add main function to `self._needed_validation_functions` + self._needed_validation_functions[self._resolver.get_uri()] = self._resolver.get_scope_name() + + self._json_keywords_to_function = OrderedDict() @property def func_code(self): """ Returns generated code of whole validation function as string. """ + self._generate_func_code() + return '\n'.join(self._code) @property @@ -93,13 +77,125 @@ def global_state(self): compiled regular expressions and imports, so it does not have to do it every time when validation function is called. """ + self._generate_func_code() + return dict( - self._compile_regexps, - Decimal=decimal.Decimal, + **self._extra_imports_objects, + REGEX_PATTERNS=self._compile_regexps, re=re, JsonSchemaException=JsonSchemaException, ) + @property + def global_state_code(self): + """ + Returns global variables for generating function from ``func_code`` as code. + Includes compiled regular expressions and imports. + """ + self._generate_func_code() + + if not self._compile_regexps: + return '\n'.join(self._extra_imports_lines + [ + 'from fastjsonschema import JsonSchemaException', + '', + '', + ]) + regexs = ['"{}": re.compile(r"{}")'.format(key, value.pattern) for key, value in self._compile_regexps.items()] + return '\n'.join(self._extra_imports_lines + [ + 'import re', + 'from fastjsonschema import JsonSchemaException', + '', + '', + 'REGEX_PATTERNS = {', + ' ' + ',\n '.join(regexs), + '}', + '', + ]) + + + def _generate_func_code(self): + if not self._code: + self.generate_func_code() + + def generate_func_code(self): + """ + Creates base code of validation function and calls helper + for creating code by definition. + """ + self.l('NoneType = type(None)') + # Generate parts that are referenced and not yet generated + while self._needed_validation_functions: + # During generation of validation function, could be needed to generate + # new one that is added again to `_needed_validation_functions`. + # Therefore usage of while instead of for loop. + uri, name = self._needed_validation_functions.popitem() + self.generate_validation_function(uri, name) + + def generate_validation_function(self, uri, name): + """ + Generate validation function for given uri with given name + """ + self._validation_functions_done.add(uri) + self.l('') + with self._resolver.resolving(uri) as definition: + with self.l('def {}(data):', name): + self.generate_func_code_block(definition, 'data', 'data', clear_variables=True) + self.l('return data') + + def generate_func_code_block(self, definition, variable, variable_name, clear_variables=False): + """ + Creates validation rules for current definition. + """ + backup = self._definition, self._variable, self._variable_name + self._definition, self._variable, self._variable_name = definition, variable, variable_name + if clear_variables: + backup_variables = self._variables + self._variables = set() + + self._generate_func_code_block(definition) + + self._definition, self._variable, self._variable_name = backup + if clear_variables: + self._variables = backup_variables + + def _generate_func_code_block(self, definition): + if not isinstance(definition, dict): + raise JsonSchemaDefinitionException("definition must be an object") + if '$ref' in definition: + # needed because ref overrides any sibling keywords + self.generate_ref() + else: + self.run_generate_functions(definition) + + def run_generate_functions(self, definition): + for key, func in self._json_keywords_to_function.items(): + if key in definition: + func() + + def generate_ref(self): + """ + Ref can be link to remote or local definition. + + .. code-block:: python + + {'$ref': 'http://json-schema.org/draft-04/schema#'} + { + 'properties': { + 'foo': {'type': 'integer'}, + 'bar': {'$ref': '#/properties/foo'} + } + } + """ + with self._resolver.in_scope(self._definition['$ref']): + name = self._resolver.get_scope_name() + uri = self._resolver.get_uri() + if uri not in self._validation_functions_done: + self._needed_validation_functions[uri] = name + # call validation function + self.l('{}({variable})', name) + + + # pylint: disable=invalid-name @indent def l(self, line, *args, **kwds): """ @@ -131,11 +227,30 @@ def l(self, line, *args, **kwds): name=name, **kwds ) - self._code.append(spaces + line.format(*args, **context)) + line = line.format(*args, **context) + line = line.replace('\n', '\\n').replace('\r', '\\r') + self._code.append(spaces + line) + return line + + def e(self, string): + """ + Short-cut of escape. Used for inserting user values into a string message. + + .. code-block:: python + + self.l('raise JsonSchemaException("Variable: {}")', self.e(variable)) + """ + return str(string).replace('"', '\\"') + + def exc(self, msg, *args, rule=None): + """ + """ + msg = 'raise JsonSchemaException("'+msg+'", value={variable}, name="{name}", definition={definition}, rule={rule})' + self.l(msg, *args, definition=repr(self._definition), rule=repr(rule)) def create_variable_with_length(self): """ - In code append code for creating variable with length of that variable + Append code for creating variable with length of that variable (for example length of list or dictionary) with name ``{variable}_len``. It can be called several times and always it's done only when that variable still does not exists. @@ -146,273 +261,35 @@ def create_variable_with_length(self): self._variables.add(variable_name) self.l('{variable}_len = len({variable})') - - def generate_func_code(self, definition): - """ - Creates base code of validation function and calls helper - for creating code by definition. - """ - with self.l('def func(data):'): - self.l('NoneType = type(None)') - self.generate_func_code_block(definition, 'data', 'data') - self.l('return data') - - def generate_func_code_block(self, definition, variable, variable_name): + def create_variable_keys(self): """ - Creates validation rules for current definition. + Append code for creating variable with keys of that variable (dictionary) + with a name ``{variable}_keys``. Similar to `create_variable_with_length`. """ - backup = self._definition, self._variable, self._variable_name - self._definition, self._variable, self._variable_name = definition, variable, variable_name - - for key, func in self._json_keywords_to_function.items(): - if key in definition: - func() - - self._definition, self._variable, self._variable_name = backup - - def generate_type(self): - """ - Validation of type. Can be one type or list of types. - - .. code-block:: python - - {'type': 'string'} - {'type': ['string', 'number']} - """ - types = enforce_list(self._definition['type']) - python_types = ', '.join(self.JSON_TYPE_TO_PYTHON_TYPE.get(t) for t in types) - - extra = '' - if ('number' in types or 'integer' in types) and 'boolean' not in types: - extra = ' or isinstance({variable}, bool)'.format(variable=self._variable) - - with self.l('if not isinstance({variable}, ({})){}:', python_types, extra): - self.l('raise JsonSchemaException("{name} must be {}")', ' or '.join(types)) - - def generate_enum(self): - with self.l('if {variable} not in {enum}:'): - self.l('raise JsonSchemaException("{name} must be one of {enum}")') - - def generate_all_of(self): - """ - Means that value have to be valid by all of those definitions. It's like put it in - one big definition. - - .. code-block:: python - - { - 'allOf': [ - {'type': 'number'}, - {'minimum': 5}, - ], - } - - Valid values for this definition are 5, 6, 7, ... but not 4 or 'abc' for example. - """ - for definition_item in self._definition['allOf']: - self.generate_func_code_block(definition_item, self._variable, self._variable_name) - - def generate_any_of(self): - """ - Means that value have to be valid by any of those definitions. It can also be valid - by all of them. - - .. code-block:: python - - { - 'anyOf': [ - {'type': 'number', 'minimum': 10}, - {'type': 'number', 'maximum': 5}, - ], - } - - Valid values for this definition are 3, 4, 5, 10, 11, ... but not 8 for example. - """ - self.l('{variable}_any_of_count = 0') - for definition_item in self._definition['anyOf']: - with self.l('if not {variable}_any_of_count:'): - with self.l('try:'): - self.generate_func_code_block(definition_item, self._variable, self._variable_name) - self.l('{variable}_any_of_count += 1') - self.l('except JsonSchemaException: pass') - - with self.l('if not {variable}_any_of_count:'): - self.l('raise JsonSchemaException("{name} must be valid by one of anyOf definition")') - - def generate_one_of(self): - """ - Means that value have to be valid by only one of those definitions. It can't be valid - by two or more of them. - - .. code-block:: python - - { - 'oneOf': [ - {'type': 'number', 'multipleOf': 3}, - {'type': 'number', 'multipleOf': 5}, - ], - } - - Valid values for this definitions are 3, 5, 6, ... but not 15 for example. - """ - self.l('{variable}_one_of_count = 0') - for definition_item in self._definition['oneOf']: - with self.l('try:'): - self.generate_func_code_block(definition_item, self._variable, self._variable_name) - self.l('{variable}_one_of_count += 1') - self.l('except JsonSchemaException: pass') - - with self.l('if {variable}_one_of_count != 1:'): - self.l('raise JsonSchemaException("{name} must be valid exactly by one of oneOf definition")') + variable_name = '{}_keys'.format(self._variable) + if variable_name in self._variables: + return + self._variables.add(variable_name) + self.l('{variable}_keys = set({variable}.keys())') - def generate_not(self): + def create_variable_is_list(self): """ - Means that value have not to be valid by this definition. - - .. code-block:: python - - {'not': {'type': 'null'}} - - Valid values for this definitions are 'hello', 42, ... but not None. + Append code for creating variable with bool if it's instance of list + with a name ``{variable}_is_list``. Similar to `create_variable_with_length`. """ - with self.l('try:'): - self.generate_func_code_block(self._definition['not'], self._variable, self._variable_name) - self.l('except JsonSchemaException: pass') - self.l('else: raise JsonSchemaException("{name} must not be valid by not definition")') - - def generate_min_length(self): - self.create_variable_with_length() - with self.l('if {variable}_len < {minLength}:'): - self.l('raise JsonSchemaException("{name} must be longer than or equal to {minLength} characters")') - - def generate_max_length(self): - self.create_variable_with_length() - with self.l('if {variable}_len > {maxLength}:'): - self.l('raise JsonSchemaException("{name} must be shorter than or equal to {maxLength} characters")') - - def generate_pattern(self): - self._compile_regexps['{}_re'.format(self._variable)] = re.compile(self._definition['pattern']) - with self.l('if not {variable}_re.match({variable}):'): - self.l('raise JsonSchemaException("{name} must match pattern {pattern}")') - - def generate_minimum(self): - if self._definition.get('exclusiveMinimum', False): - with self.l('if {variable} <= {minimum}:'): - self.l('raise JsonSchemaException("{name} must be bigger than {minimum}")') - else: - with self.l('if {variable} < {minimum}:'): - self.l('raise JsonSchemaException("{name} must be bigger than or equal to {minimum}")') - - def generate_maximum(self): - if self._definition.get('exclusiveMaximum', False): - with self.l('if {variable} >= {maximum}:'): - self.l('raise JsonSchemaException("{name} must be smaller than {maximum}")') - else: - with self.l('if {variable} > {maximum}:'): - self.l('raise JsonSchemaException("{name} must be smaller than or equal to {maximum}")') - - def generate_multiple_of(self): - with self.l('if {variable} % {multipleOf} != 0:'): - self.l('raise JsonSchemaException("{name} must be multiple of {multipleOf}")') - - def generate_min_items(self): - self.create_variable_with_length() - with self.l('if {variable}_len < {minItems}:'): - self.l('raise JsonSchemaException("{name} must contain at least {minItems} items")') - - def generate_max_items(self): - self.create_variable_with_length() - with self.l('if {variable}_len > {maxItems}:'): - self.l('raise JsonSchemaException("{name} must contain less than or equal to {maxItems} items")') + variable_name = '{}_is_list'.format(self._variable) + if variable_name in self._variables: + return + self._variables.add(variable_name) + self.l('{variable}_is_list = isinstance({variable}, (list, tuple))') - def generate_unique_items(self): + def create_variable_is_dict(self): """ - With Python 3.4 module ``timeit`` recommended this solutions: - - .. code-block:: python - - >>> timeit.timeit("len(x) > len(set(x))", "x=range(100)+range(100)", number=100000) - 0.5839540958404541 - >>> timeit.timeit("len({}.fromkeys(x)) == len(x)", "x=range(100)+range(100)", number=100000) - 0.7094449996948242 - >>> timeit.timeit("seen = set(); any(i in seen or seen.add(i) for i in x)", "x=range(100)+range(100)", number=100000) - 2.0819358825683594 - >>> timeit.timeit("np.unique(x).size == len(x)", "x=range(100)+range(100); import numpy as np", number=100000) - 2.1439831256866455 + Append code for creating variable with bool if it's instance of list + with a name ``{variable}_is_dict``. Similar to `create_variable_with_length`. """ - self.create_variable_with_length() - with self.l('if {variable}_len > len(set({variable})):'): - self.l('raise JsonSchemaException("{name} must contain unique items")') - - def generate_items(self): - self.create_variable_with_length() - if isinstance(self._definition['items'], list): - for x, item_definition in enumerate(self._definition['items']): - with self.l('if {variable}_len > {}:', x): - self.l('{variable}_{0} = {variable}[{0}]', x) - self.generate_func_code_block( - item_definition, - '{}_{}'.format(self._variable, x), - '{}[{}]'.format(self._variable_name, x), - ) - if 'default' in item_definition: - self.l('else: {variable}.append({})', repr(item_definition['default'])) - - if 'additionalItems' in self._definition: - if self._definition['additionalItems'] is False: - self.l('if {variable}_len > {}: raise JsonSchemaException("{name} must contain only spcified items")', len(self._definition['items'])) - else: - with self.l('for {variable}_x, {variable}_item in enumerate({variable}[{0}:], {0}):', len(self._definition['items'])): - self.generate_func_code_block( - self._definition['additionalItems'], - '{}_item'.format(self._variable), - '{}[{{{}_x}}]'.format(self._variable_name, self._variable), - ) - else: - with self.l('for {variable}_x, {variable}_item in enumerate({variable}):'): - self.generate_func_code_block( - self._definition['items'], - '{}_item'.format(self._variable), - '{}[{{{}_x}}]'.format(self._variable_name, self._variable), - ) - - def generate_min_properties(self): - self.create_variable_with_length() - with self.l('if {variable}_len < {minProperties}:'): - self.l('raise JsonSchemaException("{name} must contain at least {minProperties} properties")') - - def generate_max_properties(self): - self.create_variable_with_length() - with self.l('if {variable}_len > {maxProperties}:'): - self.l('raise JsonSchemaException("{name} must contain less than or equal to {maxProperties} properties")') - - def generate_required(self): - self.create_variable_with_length() - with self.l('if not all(prop in {variable} for prop in {required}):'): - self.l('raise JsonSchemaException("{name} must contain {required} properties")') - - def generate_properties(self): - self.l('{variable}_keys = set({variable}.keys())') - for key, prop_definition in self._definition['properties'].items(): - with self.l('if "{}" in {variable}_keys:', key): - self.l('{variable}_keys.remove("{}")', key) - self.l('{variable}_{0} = {variable}["{0}"]', key) - self.generate_func_code_block( - prop_definition, - '{}_{}'.format(self._variable, key), - '{}.{}'.format(self._variable_name, key), - ) - if 'default' in prop_definition: - self.l('else: {variable}["{}"] = {}', key, repr(prop_definition['default'])) - - if 'additionalProperties' in self._definition: - if self._definition['additionalProperties'] is False: - self.l('if {variable}_keys: raise JsonSchemaException("{name} must contain only spcified properties")') - else: - with self.l('for {variable}_key in {variable}_keys:'): - self.l('{variable}_value = {variable}.get({variable}_key)') - self.generate_func_code_block( - self._definition['additionalProperties'], - '{}_value'.format(self._variable), - '{}.{{{}_key}}'.format(self._variable_name, self._variable), - ) + variable_name = '{}_is_dict'.format(self._variable) + if variable_name in self._variables: + return + self._variables.add(variable_name) + self.l('{variable}_is_dict = isinstance({variable}, dict)') diff --git a/fastjsonschema/indent.py b/fastjsonschema/indent.py index 89c9493..411c69f 100644 --- a/fastjsonschema/indent.py +++ b/fastjsonschema/indent.py @@ -1,22 +1,28 @@ - - def indent(func): """ Decorator for allowing to use method as normal method or with context manager for auto-indenting code blocks. """ - def wrapper(self, *args, **kwds): - func(self, *args, **kwds) - return Indent(self) + def wrapper(self, line, *args, optimize=True, **kwds): + last_line = self._indent_last_line + line = func(self, line, *args, **kwds) + # When two blocks have the same condition (such as value has to be dict), + # do the check only once and keep it under one block. + if optimize and last_line == line: + self._code.pop() + self._indent_last_line = line + return Indent(self, line) return wrapper class Indent: - def __init__(self, instance): + def __init__(self, instance, line): self.instance = instance + self.line = line def __enter__(self): self.instance._indent += 1 - def __exit__(self, type, value, traceback): + def __exit__(self, type_, value, traceback): self.instance._indent -= 1 + self.instance._indent_last_line = self.line diff --git a/fastjsonschema/ref_resolver.py b/fastjsonschema/ref_resolver.py new file mode 100644 index 0000000..38cf7e0 --- /dev/null +++ b/fastjsonschema/ref_resolver.py @@ -0,0 +1,169 @@ +""" +JSON Schema URI resolution scopes and dereferencing + +https://tools.ietf.org/id/draft-zyp-json-schema-04.html#rfc.section.7 + +Code adapted from https://github.com/Julian/jsonschema +""" + +import contextlib +import json +import re +from urllib import parse as urlparse +from urllib.parse import unquote +from urllib.request import urlopen + +from .exceptions import JsonSchemaDefinitionException + + +def get_id(schema): + """ + Originally ID was `id` and since v7 it's `$id`. + """ + return schema.get('$id', schema.get('id', '')) + + +def resolve_path(schema, fragment): + """ + Return definition from path. + + Path is unescaped according https://tools.ietf.org/html/rfc6901 + """ + fragment = fragment.lstrip('/') + parts = unquote(fragment).split('/') if fragment else [] + for part in parts: + part = part.replace('~1', '/').replace('~0', '~') + if isinstance(schema, list): + schema = schema[int(part)] + elif part in schema: + schema = schema[part] + else: + raise JsonSchemaDefinitionException('Unresolvable ref: {}'.format(part)) + return schema + + +def normalize(uri): + return urlparse.urlsplit(uri).geturl() + + +def resolve_remote(uri, handlers): + """ + Resolve a remote ``uri``. + + .. note:: + + urllib library is used to fetch requests from the remote ``uri`` + if handlers does notdefine otherwise. + """ + scheme = urlparse.urlsplit(uri).scheme + if scheme in handlers: + result = handlers[scheme](uri) + else: + req = urlopen(uri) + encoding = req.info().get_content_charset() or 'utf-8' + try: + result = json.loads(req.read().decode(encoding),) + except ValueError as exc: + raise JsonSchemaDefinitionException('{} failed to decode: {}'.format(uri, exc)) + return result + + +class RefResolver: + """ + Resolve JSON References. + """ + + # pylint: disable=dangerous-default-value,too-many-arguments + def __init__(self, base_uri, schema, store={}, cache=True, handlers={}): + """ + `base_uri` is URI of the referring document from the `schema`. + """ + self.base_uri = base_uri + self.resolution_scope = base_uri + self.schema = schema + self.store = store + self.cache = cache + self.handlers = handlers + self.walk(schema) + + @classmethod + def from_schema(cls, schema, handlers={}, **kwargs): + """ + Construct a resolver from a JSON schema object. + """ + return cls( + get_id(schema) if isinstance(schema, dict) else '', + schema, + handlers=handlers, + **kwargs + ) + + @contextlib.contextmanager + def in_scope(self, scope: str): + """ + Context manager to handle current scope. + """ + old_scope = self.resolution_scope + self.resolution_scope = urlparse.urljoin(old_scope, scope) + try: + yield + finally: + self.resolution_scope = old_scope + + @contextlib.contextmanager + def resolving(self, ref: str): + """ + Context manager which resolves a JSON ``ref`` and enters the + resolution scope of this ref. + """ + new_uri = urlparse.urljoin(self.resolution_scope, ref) + uri, fragment = urlparse.urldefrag(new_uri) + + if uri and normalize(uri) in self.store: + schema = self.store[normalize(uri)] + elif not uri or uri == self.base_uri: + schema = self.schema + else: + schema = resolve_remote(uri, self.handlers) + if self.cache: + self.store[normalize(uri)] = schema + + old_base_uri, old_schema = self.base_uri, self.schema + self.base_uri, self.schema = uri, schema + try: + with self.in_scope(uri): + yield resolve_path(schema, fragment) + finally: + self.base_uri, self.schema = old_base_uri, old_schema + + def get_uri(self): + return normalize(self.resolution_scope) + + def get_scope_name(self): + """ + Get current scope and return it as a valid function name. + """ + name = 'validate_' + unquote(self.resolution_scope).replace('~1', '_').replace('~0', '_').replace('"', '') + name = re.sub(r'($[^a-zA-Z]|[^a-zA-Z0-9])', '_', name) + name = name.lower().rstrip('_') + return name + + def walk(self, node: dict): + """ + Walk thru schema and dereferencing ``id`` and ``$ref`` instances + """ + if isinstance(node, bool): + pass + elif '$ref' in node and isinstance(node['$ref'], str): + ref = node['$ref'] + node['$ref'] = urlparse.urljoin(self.resolution_scope, ref) + elif ('$id' in node or 'id' in node) and isinstance(get_id(node), str): + with self.in_scope(get_id(node)): + self.store[normalize(self.resolution_scope)] = node + for _, item in node.items(): + if isinstance(item, dict): + self.walk(item) + else: + for _, item in node.items(): + if isinstance(item, dict): + self.walk(item) diff --git a/fastjsonschema/version.py b/fastjsonschema/version.py new file mode 100644 index 0000000..32f7293 --- /dev/null +++ b/fastjsonschema/version.py @@ -0,0 +1 @@ +VERSION = '2.14.5' diff --git a/performance.py b/performance.py index c72e3b4..31824a3 100644 --- a/performance.py +++ b/performance.py @@ -1,5 +1,7 @@ - +import importlib.util import timeit +import tempfile +from textwrap import dedent # apt-get install jsonschema json-spec validictory import fastjsonschema @@ -11,6 +13,7 @@ NUMBER = 1000 JSON_SCHEMA = { + '$schema': 'http://json-schema.org/draft-04/schema#', 'type': 'array', 'items': [ { @@ -71,8 +74,35 @@ fastjsonschema_validate = fastjsonschema.compile(JSON_SCHEMA) -fast_compiled = lambda value, _: fastjsonschema_validate(value) -fast_not_compiled = lambda value, json_schema: fastjsonschema.compile(json_schema)(value) + + +def fast_compiled(value, _): + fastjsonschema_validate(value) + + +def fast_not_compiled(value, json_schema): + fastjsonschema.compile(json_schema)(value) + + +validator_class = jsonschema.validators.validator_for(JSON_SCHEMA) +validator = validator_class(JSON_SCHEMA) + + +def jsonschema_compiled(value, _): + validator.validate(value) + + +with tempfile.NamedTemporaryFile('w', suffix='.py') as tmp_file: + tmp_file.write(fastjsonschema.compile_to_code(JSON_SCHEMA)) + tmp_file.flush() + spec = importlib.util.spec_from_file_location("temp.performance", tmp_file.name) + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + + +def fast_file(value, _): + module.validate(value) + jsonspec = load(JSON_SCHEMA) @@ -88,26 +118,28 @@ def t(func, valid_values=True): jsonschema, jsonspec, fast_compiled, + fast_file, fast_not_compiled, + jsonschema_compiled, ) """ if valid_values: - code = """ + code = dedent(""" for value in VALUES_OK: {}(value, JSON_SCHEMA) - """.format(func) + """.format(func)) else: - code = """ + code = dedent(""" try: for value in VALUES_BAD: {}(value, JSON_SCHEMA) except: pass - """.format(func) + """.format(func)) res = timeit.timeit(code, setup, number=NUMBER) - print('{:<20} {:<10} ==> {}'.format(module, 'valid' if valid_values else 'invalid', res)) + print('{:<20} {:<10} ==> {:10.7f}'.format(module, 'valid' if valid_values else 'invalid', res)) print('Number: {}'.format(NUMBER)) @@ -115,12 +147,18 @@ def t(func, valid_values=True): t('fast_compiled') t('fast_compiled', valid_values=False) +t('fast_file') +t('fast_file', valid_values=False) + t('fast_not_compiled') t('fast_not_compiled', valid_values=False) t('jsonschema.validate') t('jsonschema.validate', valid_values=False) +t('jsonschema_compiled') +t('jsonschema_compiled', valid_values=False) + t('jsonspec.validate') t('jsonspec.validate', valid_values=False) diff --git a/pylintrc b/pylintrc new file mode 100644 index 0000000..c64248d --- /dev/null +++ b/pylintrc @@ -0,0 +1,33 @@ + +[MASTER] +ignore=tests + +[MESSAGES CONTROL] +# missing-docstring can be removed after this issue is deployed https://github.com/PyCQA/pylint/issues/1164 +disable=duplicate-code,missing-docstring + +[REPORTS] +output-format=colorized + +[VARIABLES] +init-import=no +dummy-variables-rgx=_|dummy + +[TYPECHECK] +ignore-mixin-members=yes + +[BASIC] +no-docstring-rgx=_.* +docstring-min-length=3 +good-names=_ +bad-names=foo,bar,baz,foobar + +[DESIGN] +min-public-methods=1 +max-public-methods=20 + +[FORMAT] +max-line-length=120 + +[MISCELLANEOUS] +notes=FIXME,XXX,TODO diff --git a/schema.json b/schema.json new file mode 100644 index 0000000..a5babda --- /dev/null +++ b/schema.json @@ -0,0 +1,40 @@ +{ + "type": "array", + "items": [ + { + "type": "number", + "maximum": 10, + "exclusiveMaximum": true + }, + { + "type": "string", + "enum": ["hello", "world"] + }, + { + "type": "array", + "minItems": 1, + "maxItems": 3, + "items": [ + {"type": "number"}, + {"type": "string"}, + {"type": "boolean"} + ] + }, + { + "type": "object", + "required": ["a", "b"], + "minProperties": 3, + "properties": { + "a": {"type": ["null", "string"]}, + "b": {"type": ["null", "string"]}, + "c": {"type": ["null", "string"], "default": "abc"} + }, + "additionalProperties": {"type": "string"} + }, + {"not": {"type": ["null"]}}, + {"oneOf": [ + {"type": "number", "multipleOf": 3}, + {"type": "number", "multipleOf": 5} + ]} + ] +} diff --git a/setup.py b/setup.py index fc8acf0..e7720d1 100644 --- a/setup.py +++ b/setup.py @@ -1,25 +1,55 @@ #!/usr/bin/env python +import os try: from setuptools import setup except ImportError: from distutils.core import setup +with open(os.path.join(os.path.dirname(__file__), "README.rst")) as readme: + LONG_DESCRIPTION = readme.read() + +# https://packaging.python.org/en/latest/single_source_version.html +try: + execfile('fastjsonschema/version.py') +except NameError: + exec(open('fastjsonschema/version.py').read()) + setup( name='fastjsonschema', - version='1.1.1', + version=VERSION, packages=['fastjsonschema'], + extras_require={ + 'devel': [ + 'colorama', + 'jsonschema', + 'json-spec', + 'pylint', + 'pytest', + 'pytest-benchmark', + 'pytest-cache', + 'validictory', + ], + }, url='https://github.com/seznam/python-fastjsonschema', author='Michal Horejsek', author_email='horejsekmichal@gmail.com', description='Fastest Python implementation of JSON schema', + long_description=LONG_DESCRIPTION, license='BSD', classifiers=[ 'Programming Language :: Python', 'Programming Language :: Python :: 3', + "Programming Language :: Python :: 3.3", + "Programming Language :: Python :: 3.4", + "Programming Language :: Python :: 3.5", + "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: Implementation :: CPython", 'License :: OSI Approved :: BSD License', 'Operating System :: OS Independent', 'Development Status :: 5 - Production/Stable', diff --git a/tests/benchmarks/test_benchmark.py b/tests/benchmarks/test_benchmark.py new file mode 100644 index 0000000..31098b3 --- /dev/null +++ b/tests/benchmarks/test_benchmark.py @@ -0,0 +1,82 @@ +import pytest + +import fastjsonschema + + +JSON_SCHEMA = { + 'type': 'array', + 'items': [ + { + 'type': 'number', + 'exclusiveMaximum': 10, + }, + { + 'type': 'string', + 'enum': ['hello', 'world'], + }, + { + 'type': 'array', + 'minItems': 1, + 'maxItems': 3, + 'items': [ + {'type': 'number'}, + {'type': 'string'}, + {'type': 'boolean'}, + ], + }, + { + 'type': 'object', + 'required': ['a', 'b'], + 'minProperties': 3, + 'properties': { + 'a': {'type': ['null', 'string']}, + 'b': {'type': ['null', 'string']}, + 'c': {'type': ['null', 'string'], 'default': 'abc'} + }, + 'additionalProperties': {'type': 'string'}, + }, + {'not': {'type': ['null']}}, + {'oneOf': [ + {'type': 'number', 'multipleOf': 3}, + {'type': 'number', 'multipleOf': 5}, + ]}, + ], +} + + +fastjsonschema_validate = fastjsonschema.compile(JSON_SCHEMA) + + +@pytest.mark.benchmark(min_rounds=20) +@pytest.mark.parametrize('value', ( + [9, 'hello', [1, 'a', True], {'a': 'a', 'b': 'b', 'd': 'd'}, 42, 3], + [9, 'world', [1, 'a', True], {'a': 'a', 'b': 'b', 'd': 'd'}, 42, 3], + [9, 'world', [1, 'a', True], {'a': 'a', 'b': 'b', 'c': 'xy'}, 42, 3], + [9, 'world', [1, 'a', True], {'a': 'a', 'b': 'b', 'c': 'xy'}, 'str', 5], +)) +def test_benchmark_ok_values(benchmark, value): + @benchmark + def f(): + fastjsonschema_validate(value) + + +@pytest.mark.benchmark(min_rounds=20) +@pytest.mark.parametrize('value', ( + [10, 'world', [1, 'a', True], {'a': 'a', 'b': 'b', 'c': 'xy'}, 'str', 5], + [9, 'xxx', [1, 'a', True], {'a': 'a', 'b': 'b', 'c': 'xy'}, 'str', 5], + [9, 'hello', [], {'a': 'a', 'b': 'b', 'c': 'xy'}, 'str', 5], + [9, 'hello', [1, 2, 3], {'a': 'a', 'b': 'b', 'c': 'xy'}, 'str', 5], + [9, 'hello', [1, 'a', True], {'a': 'a', 'x': 'x', 'y': 'y'}, 'str', 5], + [9, 'hello', [1, 'a', True], {}, 'str', 5], + [9, 'hello', [1, 'a', True], {'a': 'a', 'b': 'b', 'x': 'x'}, None, 5], + [9, 'hello', [1, 'a', True], {'a': 'a', 'b': 'b', 'x': 'x'}, 42, 15], +)) +def test_benchmark_bad_values(benchmark, value): + @benchmark + def f(): + try: + fastjsonschema_validate(value) + except fastjsonschema.JsonSchemaException: + pass + else: + pytest.fail('Exception is not raised') diff --git a/tests/conftest.py b/tests/conftest.py index 87bfbb7..756429e 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,4 +1,3 @@ - import os import sys @@ -11,22 +10,29 @@ import pytest from fastjsonschema import JsonSchemaException, compile -from fastjsonschema.generator import CodeGenerator +from fastjsonschema.draft07 import CodeGeneratorDraft07 @pytest.fixture def asserter(): - def f(definition, value, expected): + def f(definition, value, expected, formats={}): # When test fails, it will show up code. - code_generator = CodeGenerator(definition) + code_generator = CodeGeneratorDraft07(definition, formats=formats) print(code_generator.func_code) pprint(code_generator.global_state) - validator = compile(definition) + # By default old tests are written for draft-04. + definition.setdefault('$schema', 'http://json-schema.org/draft-04/schema') + + validator = compile(definition, formats=formats) if isinstance(expected, JsonSchemaException): with pytest.raises(JsonSchemaException) as exc: validator(value) assert exc.value.message == expected.message + assert exc.value.value == (value if expected.value == '{data}' else expected.value) + assert exc.value.name == expected.name + assert exc.value.definition == (definition if expected.definition == '{definition}' else expected.definition) + assert exc.value.rule == expected.rule else: assert validator(value) == expected return f diff --git a/tests/json_schema/__init__.py b/tests/json_schema/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/json_schema/test_draft04.py b/tests/json_schema/test_draft04.py new file mode 100644 index 0000000..7b980b8 --- /dev/null +++ b/tests/json_schema/test_draft04.py @@ -0,0 +1,19 @@ +import pytest + +from .utils import template_test, resolve_param_values_and_ids + + +def pytest_generate_tests(metafunc): + param_values, param_ids = resolve_param_values_and_ids( + schema_version='http://json-schema.org/draft-04/schema', + suite_dir='JSON-Schema-Test-Suite/tests/draft4', + ignored_suite_files=[ + # Optional. + 'ecmascript-regex.json', + ], + ) + metafunc.parametrize(['schema_version', 'schema', 'data', 'is_valid'], param_values, ids=param_ids) + + +# Real test function to be used with parametrization by previous hook function. +test = template_test diff --git a/tests/json_schema/test_draft06.py b/tests/json_schema/test_draft06.py new file mode 100644 index 0000000..a62d75d --- /dev/null +++ b/tests/json_schema/test_draft06.py @@ -0,0 +1,19 @@ +import pytest + +from .utils import template_test, resolve_param_values_and_ids + + +def pytest_generate_tests(metafunc): + param_values, param_ids = resolve_param_values_and_ids( + schema_version='http://json-schema.org/draft-06/schema', + suite_dir='JSON-Schema-Test-Suite/tests/draft6', + ignored_suite_files=[ + # Optional. + 'ecmascript-regex.json', + ], + ) + metafunc.parametrize(['schema_version', 'schema', 'data', 'is_valid'], param_values, ids=param_ids) + + +# Real test function to be used with parametrization by previous hook function. +test = template_test diff --git a/tests/json_schema/test_draft07.py b/tests/json_schema/test_draft07.py new file mode 100644 index 0000000..bc7b291 --- /dev/null +++ b/tests/json_schema/test_draft07.py @@ -0,0 +1,21 @@ +import pytest + +from .utils import template_test, resolve_param_values_and_ids + + +def pytest_generate_tests(metafunc): + param_values, param_ids = resolve_param_values_and_ids( + schema_version='http://json-schema.org/draft-07/schema', + suite_dir='JSON-Schema-Test-Suite/tests/draft7', + ignored_suite_files=[ + # Optional. + 'ecmascript-regex.json', + 'idn-hostname.json', + 'iri.json', + ], + ) + metafunc.parametrize(['schema_version', 'schema', 'data', 'is_valid'], param_values, ids=param_ids) + + +# Real test function to be used with parametrization by previous hook function. +test = template_test diff --git a/tests/json_schema/utils.py b/tests/json_schema/utils.py new file mode 100644 index 0000000..ef6d516 --- /dev/null +++ b/tests/json_schema/utils.py @@ -0,0 +1,89 @@ +import json +from pathlib import Path + +import pytest +from urllib.request import urlopen + +from fastjsonschema import RefResolver, JsonSchemaException, compile, _get_code_generator_class + + +REMOTES = { + 'http://localhost:1234/integer.json': {'type': 'integer'}, + 'http://localhost:1234/name.json': { + 'type': 'string', + 'definitions': { + 'orNull': {'anyOf': [{'type': 'null'}, {'$ref': '#'}]}, + }, + }, + 'http://localhost:1234/subSchemas.json': { + 'integer': {'type': 'integer'}, + 'refToInteger': {'$ref': '#/integer'}, + }, + 'http://localhost:1234/folder/folderInteger.json': {'type': 'integer'} +} + + +def remotes_handler(uri): + if uri in REMOTES: + return REMOTES[uri] + req = urlopen(uri) + encoding = req.info().get_content_charset() or 'utf-8' + return json.loads(req.read().decode(encoding),) + + +def resolve_param_values_and_ids(schema_version, suite_dir, ignored_suite_files=[], ignore_tests=[]): + suite_dir_path = Path(suite_dir).resolve() + test_file_paths = sorted(set(suite_dir_path.glob("**/*.json"))) + + param_values = [] + param_ids = [] + for test_file_path in test_file_paths: + with test_file_path.open(encoding='UTF-8') as test_file: + test_cases = json.load(test_file) + for test_case in test_cases: + for test_data in test_case['tests']: + param_values.append(pytest.param( + schema_version, + test_case['schema'], + test_data['data'], + test_data['valid'], + marks=pytest.mark.xfail + if test_file_path.name in ignored_suite_files + or test_case['description'] in ignore_tests + else pytest.mark.none, + )) + param_ids.append('{} / {} / {}'.format( + test_file_path.name, + test_case['description'], + test_data['description'], + )) + return param_values, param_ids + + +def template_test(schema_version, schema, data, is_valid): + """ + Test function to be used (imported) in final test file to run the tests + which are generated by `pytest_generate_tests` hook. + """ + # For debug purposes. When test fails, it will print stdout. + resolver = RefResolver.from_schema(schema, handlers={'http': remotes_handler}) + + debug_generator = _get_code_generator_class(schema_version)(schema, resolver=resolver) + print(debug_generator.global_state_code) + print(debug_generator.func_code) + + # JSON schema test suits do not contain schema version. + # Our library needs to know that or it would use always the latest implementation. + if isinstance(schema, dict): + schema.setdefault('$schema', schema_version) + + validate = compile(schema, handlers={'http': remotes_handler}) + try: + result = validate(data) + print('Validate result:', result) + except JsonSchemaException: + if is_valid: + raise + else: + if not is_valid: + pytest.fail('Test should not pass') diff --git a/tests/test_array.py b/tests/test_array.py index 9448576..3d77435 100644 --- a/tests/test_array.py +++ b/tests/test_array.py @@ -1,10 +1,9 @@ - import pytest from fastjsonschema import JsonSchemaException -exc = JsonSchemaException('data must be array') +exc = JsonSchemaException('data must be array', value='{data}', name='data', definition='{definition}', rule='type') @pytest.mark.parametrize('value, expected', [ (0, exc), (None, exc), @@ -19,7 +18,7 @@ def test_array(asserter, value, expected): asserter({'type': 'array'}, value, expected) -exc = JsonSchemaException('data must contain less than or equal to 1 items') +exc = JsonSchemaException('data must contain less than or equal to 1 items', value='{data}', name='data', definition='{definition}', rule='maxItems') @pytest.mark.parametrize('value, expected', [ ([], []), ([1], [1]), @@ -33,7 +32,7 @@ def test_max_items(asserter, value, expected): }, value, expected) -exc = JsonSchemaException('data must contain at least 2 items') +exc = JsonSchemaException('data must contain at least 2 items', value='{data}', name='data', definition='{definition}', rule='minItems') @pytest.mark.parametrize('value, expected', [ ([], exc), ([1], exc), @@ -50,7 +49,7 @@ def test_min_items(asserter, value, expected): @pytest.mark.parametrize('value, expected', [ ([], []), ([1], [1]), - ([1, 1], JsonSchemaException('data must contain unique items')), + ([1, 1], JsonSchemaException('data must contain unique items', value='{data}', name='data', definition='{definition}', rule='uniqueItems')), ([1, 2, 3], [1, 2, 3]), ]) def test_unique_items(asserter, value, expected): @@ -60,10 +59,19 @@ def test_unique_items(asserter, value, expected): }, value, expected) +def test_min_and_unique_items(asserter): + value = None + asserter({ + 'type': ['array', 'null'], + 'minItems': 1, + 'uniqueItems': True, + }, value, value) + + @pytest.mark.parametrize('value, expected', [ ([], []), ([1], [1]), - ([1, 'a'], JsonSchemaException('data[1] must be number')), + ([1, 'a'], JsonSchemaException('data[1] must be number', value='a', name='data[1]', definition={'type': 'number'}, rule='type')), ]) def test_items_all_same(asserter, value, expected): asserter({ @@ -76,7 +84,7 @@ def test_items_all_same(asserter, value, expected): ([], []), ([1], [1]), ([1, 'a'], [1, 'a']), - ([1, 2], JsonSchemaException('data[1] must be string')), + ([1, 2], JsonSchemaException('data[1] must be string', value=2, name='data[1]', definition={'type': 'string'}, rule='type')), ([1, 'a', 2], [1, 'a', 2]), ([1, 'a', 'b'], [1, 'a', 'b']), ]) @@ -94,8 +102,8 @@ def test_different_items(asserter, value, expected): ([], []), ([1], [1]), ([1, 'a'], [1, 'a']), - ([1, 2], JsonSchemaException('data[1] must be string')), - ([1, 'a', 2], JsonSchemaException('data[2] must be string')), + ([1, 2], JsonSchemaException('data[1] must be string', value=2, name='data[1]', definition={'type': 'string'}, rule='type')), + ([1, 'a', 2], JsonSchemaException('data[2] must be string', value=2, name='data[2]', definition={'type': 'string'}, rule='type')), ([1, 'a', 'b'], [1, 'a', 'b']), ]) def test_different_items_with_additional_items(asserter, value, expected): @@ -113,9 +121,9 @@ def test_different_items_with_additional_items(asserter, value, expected): ([], []), ([1], [1]), ([1, 'a'], [1, 'a']), - ([1, 2], JsonSchemaException('data[1] must be string')), - ([1, 'a', 2], JsonSchemaException('data must contain only spcified items')), - ([1, 'a', 'b'], JsonSchemaException('data must contain only spcified items')), + ([1, 2], JsonSchemaException('data[1] must be string', value=2, name='data[1]', definition={'type': 'string'}, rule='type')), + ([1, 'a', 2], JsonSchemaException('data must contain only specified items', value='{data}', name='data', definition='{definition}', rule='items')), + ([1, 'a', 'b'], JsonSchemaException('data must contain only specified items', value='{data}', name='data', definition='{definition}', rule='items')), ]) def test_different_items_without_additional_items(asserter, value, expected): asserter({ @@ -126,3 +134,35 @@ def test_different_items_without_additional_items(asserter, value, expected): ], 'additionalItems': False, }, value, expected) + + +@pytest.mark.parametrize('value, expected', [ + ((), ()), + (('a',), ('a',)), + (('a', 'b'), ('a', 'b')), + (('a', 'b', 3), JsonSchemaException('data[2] must be string', value=3, name='data[2]', + definition={'type': 'string'}, rule='type')), +]) +def test_tuples_as_arrays(asserter, value, expected): + asserter({ + '$schema': 'http://json-schema.org/draft-06/schema', + 'type': 'array', + 'items': + {'type': 'string'}, + + }, value, expected) + + +@pytest.mark.parametrize('value, expected', [ + ({'a': [], 'b': ()}, {'a': [], 'b': ()}), + ({'a': (1, 2), 'b': (3, 4)}, {'a': (1, 2), 'b': (3, 4)}), +]) +def test_mixed_arrays(asserter, value, expected): + asserter({ + 'type': 'object', + 'properties': { + 'a': {'type': 'array'}, + 'b': {'type': 'array'}, + }, + }, value, expected) + diff --git a/tests/test_boolean.py b/tests/test_boolean.py index 4a23018..c3c1765 100644 --- a/tests/test_boolean.py +++ b/tests/test_boolean.py @@ -1,10 +1,9 @@ - import pytest from fastjsonschema import JsonSchemaException -exc = JsonSchemaException('data must be boolean') +exc = JsonSchemaException('data must be boolean', value='{data}', name='data', definition='{definition}', rule='type') @pytest.mark.parametrize('value, expected', [ (0, exc), (None, exc), diff --git a/tests/test_common.py b/tests/test_common.py index 6168a61..bb0133b 100644 --- a/tests/test_common.py +++ b/tests/test_common.py @@ -1,10 +1,9 @@ - import pytest from fastjsonschema import JsonSchemaException -exc = JsonSchemaException('data must be one of [1, 2, \'a\']') +exc = JsonSchemaException('data must be one of [1, 2, \'a\', "b\'c"]', value='{data}', name='data', definition='{definition}', rule='enum') @pytest.mark.parametrize('value, expected', [ (1, 1), (2, 2), @@ -13,10 +12,10 @@ ('aa', exc), ]) def test_enum(asserter, value, expected): - asserter({'enum': [1, 2, 'a']}, value, expected) + asserter({'enum': [1, 2, 'a', "b'c"]}, value, expected) -exc = JsonSchemaException('data must be string or number') +exc = JsonSchemaException('data must be string or number', value='{data}', name='data', definition='{definition}', rule='type') @pytest.mark.parametrize('value, expected', [ (0, 0), (None, exc), @@ -31,7 +30,7 @@ def test_types(asserter, value, expected): @pytest.mark.parametrize('value, expected', [ ('qwert', 'qwert'), - ('qwertz', JsonSchemaException('data must be shorter than or equal to 5 characters')), + ('qwertz', JsonSchemaException('data must be shorter than or equal to 5 characters', value='{data}', name='data', definition={'maxLength': 5}, rule='maxLength')), ]) def test_all_of(asserter, value, expected): asserter({'allOf': [ @@ -40,7 +39,7 @@ def test_all_of(asserter, value, expected): ]}, value, expected) -exc = JsonSchemaException('data must be valid by one of anyOf definition') +exc = JsonSchemaException('data must be valid by one of anyOf definition', value='{data}', name='data', definition='{definition}', rule='anyOf') @pytest.mark.parametrize('value, expected', [ (0, 0), (None, exc), @@ -56,7 +55,7 @@ def test_any_of(asserter, value, expected): ]}, value, expected) -exc = JsonSchemaException('data must be valid exactly by one of oneOf definition') +exc = JsonSchemaException('data must be valid exactly by one of oneOf definition', value='{data}', name='data', definition='{definition}', rule='oneOf') @pytest.mark.parametrize('value, expected', [ (0, exc), (2, exc), @@ -71,7 +70,7 @@ def test_one_of(asserter, value, expected): ]}, value, expected) -exc = JsonSchemaException('data must be valid exactly by one of oneOf definition') +exc = JsonSchemaException('data must be valid exactly by one of oneOf definition', value='{data}', name='data', definition='{definition}', rule='oneOf') @pytest.mark.parametrize('value, expected', [ (0, exc), (2, exc), @@ -90,7 +89,7 @@ def test_one_of_factorized(asserter, value, expected): @pytest.mark.parametrize('value, expected', [ - (0, JsonSchemaException('data must not be valid by not definition')), + (0, JsonSchemaException('data must not be valid by not definition', value='{data}', name='data', definition='{definition}', rule='not')), (True, True), ('abc', 'abc'), ([], []), diff --git a/tests/test_compile_to_code.py b/tests/test_compile_to_code.py new file mode 100644 index 0000000..509e044 --- /dev/null +++ b/tests/test_compile_to_code.py @@ -0,0 +1,86 @@ +import os +import pytest +import shutil + +from fastjsonschema import compile_to_code, compile as compile_spec + +@pytest.yield_fixture(autouse=True) +def run_around_tests(): + temp_dir = 'temp' + # Code that will run before your test, for example: + if not os.path.isdir(temp_dir): + os.makedirs(temp_dir) + # A test function will be run at this point + yield + # Code that will run after your test, for example: + shutil.rmtree(temp_dir) + + +def test_compile_to_code(): + code = compile_to_code({ + 'properties': { + 'a': {'type': 'string'}, + 'b': {'type': 'integer'}, + 'c': {'format': 'hostname'}, # Test generation of regex patterns to the file. + } + }) + with open('temp/schema_1.py', 'w') as f: + f.write(code) + from temp.schema_1 import validate + assert validate({ + 'a': 'a', + 'b': 1, + 'c': 'example.com', + }) == { + 'a': 'a', + 'b': 1, + 'c': 'example.com', + } + +def test_compile_to_code_ipv6_regex(): + code = compile_to_code({ + 'properties': { + 'ip': {'format': 'ipv6'}, + } + }) + with open('temp/schema_2.py', 'w') as f: + f.write(code) + from temp.schema_2 import validate + assert validate({ + 'ip': '2001:0db8:85a3:0000:0000:8a2e:0370:7334' + }) == { + 'ip': '2001:0db8:85a3:0000:0000:8a2e:0370:7334' + } + +# https://github.com/horejsek/python-fastjsonschema/issues/74 +def test_compile_complex_one_of_all_of(): + compile_spec({ + "oneOf": [ + { + "required": [ + "schema" + ] + }, + { + "required": [ + "content" + ], + "allOf": [ + { + "not": { + "required": [ + "style" + ] + } + }, + { + "not": { + "required": [ + "explode" + ] + } + } + ] + } + ] + }) diff --git a/tests/test_const.py b/tests/test_const.py new file mode 100644 index 0000000..352df07 --- /dev/null +++ b/tests/test_const.py @@ -0,0 +1,14 @@ +import pytest + + +@pytest.mark.parametrize('value', ( + 'foo', + 42, + False, + [1, 2, 3] +)) +def test_const(asserter, value): + asserter({ + '$schema': 'http://json-schema.org/draft-06/schema', + 'const': value, + }, value, value) diff --git a/tests/test_default.py b/tests/test_default.py index 54e40bb..28449f1 100644 --- a/tests/test_default.py +++ b/tests/test_default.py @@ -1,11 +1,10 @@ - import pytest from fastjsonschema import JsonSchemaException @pytest.mark.parametrize('value, expected', [ - (None, JsonSchemaException('data must be object')), + (None, JsonSchemaException('data must be object', value='{data}', name='data', definition='{definition}', rule='type')), ({}, {'a': '', 'b': 42, 'c': {}, 'd': []}), ({'a': 'abc'}, {'a': 'abc', 'b': 42, 'c': {}, 'd': []}), ({'b': 123}, {'a': '', 'b': 123, 'c': {}, 'd': []}), @@ -24,7 +23,7 @@ def test_default_in_object(asserter, value, expected): @pytest.mark.parametrize('value, expected', [ - (None, JsonSchemaException('data must be array')), + (None, JsonSchemaException('data must be array', value='{data}', name='data', definition='{definition}', rule='type')), ([], ['', 42]), (['abc'], ['abc', 42]), (['abc', 123], ['abc', 123]), diff --git a/tests/test_exceptions.py b/tests/test_exceptions.py new file mode 100644 index 0000000..8e8d0a6 --- /dev/null +++ b/tests/test_exceptions.py @@ -0,0 +1,30 @@ +import pytest + +from fastjsonschema import JsonSchemaException + + +@pytest.mark.parametrize('value, expected', [ + ('data', ['data']), + ('data[0]', ['data', '0']), + ('data.foo', ['data', 'foo']), + ('data[1].bar', ['data', '1', 'bar']), + ('data.foo[2]', ['data', 'foo', '2']), + ('data.foo.bar[1][2]', ['data', 'foo', 'bar', '1', '2']), + ('data[1][2].foo.bar', ['data', '1', '2', 'foo', 'bar']), +]) +def test_exception_variable_path(value, expected): + exc = JsonSchemaException('msg', name=value) + assert exc.path == expected + + +@pytest.mark.parametrize('definition, rule, expected_rule_definition', [ + (None, None, None), + ({}, None, None), + ({'type': 'string'}, None, None), + ({'type': 'string'}, 'unique', None), + ({'type': 'string'}, 'type', 'string'), + (None, 'type', None), +]) +def test_exception_rule_definition(definition, rule, expected_rule_definition): + exc = JsonSchemaException('msg', definition=definition, rule=rule) + assert exc.rule_definition == expected_rule_definition diff --git a/tests/test_format.py b/tests/test_format.py new file mode 100644 index 0000000..c309b57 --- /dev/null +++ b/tests/test_format.py @@ -0,0 +1,57 @@ +import datetime +import re + +import pytest + +from fastjsonschema import JsonSchemaException + + +exc = JsonSchemaException('data must be date-time', value='{data}', name='data', definition='{definition}', rule='format') +@pytest.mark.parametrize('value, expected', [ + ('', exc), + ('bla', exc), + ('2018-02-05T14:17:10.00', exc), + ('2018-02-05T14:17:10.00Z\n', exc), + ('2018-02-05T14:17:10.00Z', '2018-02-05T14:17:10.00Z'), + ('2018-02-05T14:17:10Z', '2018-02-05T14:17:10Z'), +]) +def test_datetime(asserter, value, expected): + asserter({'type': 'string', 'format': 'date-time'}, value, expected) + + +exc = JsonSchemaException('data must be hostname', value='{data}', name='data', definition='{definition}', rule='format') +@pytest.mark.parametrize('value, expected', [ + ('', exc), + ('LDhsjf878&d', exc), + ('bla.bla-', exc), + ('example.example.com-', exc), + ('example.example.com\n', exc), + ('localhost', 'localhost'), + ('example.com', 'example.com'), + ('example.de', 'example.de'), + ('example.fr', 'example.fr'), + ('example.example.com', 'example.example.com'), +]) +def test_hostname(asserter, value, expected): + asserter({'type': 'string', 'format': 'hostname'}, value, expected) + + +exc = JsonSchemaException('data must be custom-format', value='{data}', name='data', definition='{definition}', rule='format') +@pytest.mark.parametrize('value,expected,custom_format', [ + ('', exc, r'^[ab]$'), + ('', exc, lambda value: value in ('a', 'b')), + ('a', 'a', r'^[ab]$'), + ('a', 'a', lambda value: value in ('a', 'b')), + ('c', exc, r'^[ab]$'), + ('c', exc, lambda value: value in ('a', 'b')), +]) +def test_custom_format(asserter, value, expected, custom_format): + asserter({'format': 'custom-format'}, value, expected, formats={ + 'custom-format': custom_format, + }) + + +def test_custom_format_override(asserter): + asserter({'format': 'date-time'}, 'a', 'a', formats={ + 'date-time': r'^[ab]$', + }) diff --git a/tests/test_integration.py b/tests/test_integration.py index 57af252..27135f9 100644 --- a/tests/test_integration.py +++ b/tests/test_integration.py @@ -1,9 +1,48 @@ - import pytest from fastjsonschema import JsonSchemaException +definition = { + 'type': 'array', + 'items': [ + { + 'type': 'number', + 'maximum': 10, + 'exclusiveMaximum': True, + }, + { + 'type': 'string', + 'enum': ['hello', 'world'], + }, + { + 'type': 'array', + 'minItems': 1, + 'maxItems': 3, + 'items': [ + {'type': 'number'}, + {'type': 'string'}, + {'type': 'boolean'}, + ], + }, + { + 'type': 'object', + 'required': ['a', 'b'], + 'minProperties': 3, + 'properties': { + 'a': {'type': ['null', 'string']}, + 'b': {'type': ['null', 'string']}, + 'c': {'type': ['null', 'string'], 'default': 'abc'} + }, + 'additionalProperties': {'type': 'string'}, + }, + {'not': {'type': ['null']}}, + {'oneOf': [ + {'type': 'number', 'multipleOf': 3}, + {'type': 'number', 'multipleOf': 5}, + ]}, + ], +} @pytest.mark.parametrize('value, expected', [ ( [9, 'hello', [1, 'a', True], {'a': 'a', 'b': 'b', 'd': 'd'}, 42, 3], @@ -13,6 +52,10 @@ [9, 'world', [1], {'a': 'a', 'b': 'b', 'd': 'd'}, 42, 3], [9, 'world', [1], {'a': 'a', 'b': 'b', 'c': 'abc', 'd': 'd'}, 42, 3], ), + ( + (9, 'world', (1,), {'a': 'a', 'b': 'b', 'd': 'd'}, 42, 3), + (9, 'world', (1,), {'a': 'a', 'b': 'b', 'c': 'abc', 'd': 'd'}, 42, 3), + ), ( [9, 'world', [1], {'a': 'a', 'b': 'b', 'c': 'xy'}, 42, 3], [9, 'world', [1], {'a': 'a', 'b': 'b', 'c': 'xy'}, 42, 3], @@ -27,75 +70,60 @@ ), ( [10, 'world', [1], {'a': 'a', 'b': 'b', 'c': 'xy'}, 'str', 5], - JsonSchemaException('data[0] must be smaller than 10'), + JsonSchemaException('data[0] must be smaller than 10', value=10, name='data[0]', definition=definition['items'][0], rule='maximum'), ), ( [9, 'xxx', [1], {'a': 'a', 'b': 'b', 'c': 'xy'}, 'str', 5], - JsonSchemaException('data[1] must be one of [\'hello\', \'world\']'), + JsonSchemaException('data[1] must be one of [\'hello\', \'world\']', value='xxx', name='data[1]', definition=definition['items'][1], rule='enum'), ), ( [9, 'hello', [], {'a': 'a', 'b': 'b', 'c': 'xy'}, 'str', 5], - JsonSchemaException('data[2] must contain at least 1 items'), + JsonSchemaException('data[2] must contain at least 1 items', value=[], name='data[2]', definition=definition['items'][2], rule='minItems'), ), ( [9, 'hello', [1, 2, 3], {'a': 'a', 'b': 'b', 'c': 'xy'}, 'str', 5], - JsonSchemaException('data[2][1] must be string'), + JsonSchemaException('data[2][1] must be string', value=2, name='data[2][1]', definition={'type': 'string'}, rule='type'), ), ( [9, 'hello', [1], {'a': 'a', 'x': 'x', 'y': 'y'}, 'str', 5], - JsonSchemaException('data[3] must contain [\'a\', \'b\'] properties'), + JsonSchemaException('data[3] must contain [\'a\', \'b\'] properties', value={'a': 'a', 'x': 'x', 'y': 'y'}, name='data[3]', definition=definition['items'][3], rule='required'), ), ( [9, 'hello', [1], {}, 'str', 5], - JsonSchemaException('data[3] must contain at least 3 properties'), + JsonSchemaException('data[3] must contain at least 3 properties', value={}, name='data[3]', definition=definition['items'][3], rule='minProperties'), ), ( [9, 'hello', [1], {'a': 'a', 'b': 'b', 'x': 'x'}, None, 5], - JsonSchemaException('data[4] must not be valid by not definition'), + JsonSchemaException('data[4] must not be valid by not definition', value=None, name='data[4]', definition=definition['items'][4], rule='not'), ), ( [9, 'hello', [1], {'a': 'a', 'b': 'b', 'x': 'x'}, 42, 15], - JsonSchemaException('data[5] must be valid exactly by one of oneOf definition'), + JsonSchemaException('data[5] must be valid exactly by one of oneOf definition', value=15, name='data[5]', definition=definition['items'][5], rule='oneOf'), ), ]) def test_integration(asserter, value, expected): + asserter(definition, value, expected) + + +def test_any_of_with_patterns(asserter): asserter({ - 'type': 'array', - 'items': [ - { - 'type': 'number', - 'maximum': 10, - 'exclusiveMaximum': True, - }, - { - 'type': 'string', - 'enum': ['hello', 'world'], - }, - { - 'type': 'array', - 'minItems': 1, - 'maxItems': 3, - 'items': [ - {'type': 'number'}, - {'type': 'string'}, - {'type': 'boolean'}, - ], - }, - { - 'type': 'object', - 'required': ['a', 'b'], - 'minProperties': 3, - 'properties': { - 'a': {'type': ['null', 'string']}, - 'b': {'type': ['null', 'string']}, - 'c': {'type': ['null', 'string'], 'default': 'abc'} - }, - 'additionalProperties': {'type': 'string'}, - }, - {'not': {'type': ['null']}}, - {'oneOf': [ - {'type': 'number', 'multipleOf': 3}, - {'type': 'number', 'multipleOf': 5}, - ]}, - ], - }, value, expected) + 'type': 'object', + 'properties': { + 'hash': { + 'anyOf': [ + { + 'type': 'string', + 'pattern': '^AAA' + }, + { + 'type': 'string', + 'pattern': '^BBB' + } + ] + } + } + }, { + 'hash': 'AAAXXX', + }, { + 'hash': 'AAAXXX', + }) diff --git a/tests/test_null.py b/tests/test_null.py index 22db265..969d12e 100644 --- a/tests/test_null.py +++ b/tests/test_null.py @@ -1,10 +1,9 @@ - import pytest from fastjsonschema import JsonSchemaException -exc = JsonSchemaException('data must be null') +exc = JsonSchemaException('data must be null', value='{data}', name='data', definition='{definition}', rule='type') @pytest.mark.parametrize('value, expected', [ (0, exc), (None, None), diff --git a/tests/test_number.py b/tests/test_number.py index 72ee2c3..12f5861 100644 --- a/tests/test_number.py +++ b/tests/test_number.py @@ -1,4 +1,3 @@ - import pytest from fastjsonschema import JsonSchemaException @@ -22,11 +21,11 @@ def number_type(request): ]) def test_number(asserter, number_type, value, expected): if isinstance(expected, JsonSchemaException): - expected = JsonSchemaException(expected.message.format(number_type=number_type)) + expected = JsonSchemaException(expected.message.format(number_type=number_type), value='{data}', name='data', definition='{definition}', rule='type') asserter({'type': number_type}, value, expected) -exc = JsonSchemaException('data must be smaller than or equal to 10') +exc = JsonSchemaException('data must be smaller than or equal to 10', value='{data}', name='data', definition='{definition}', rule='maximum') @pytest.mark.parametrize('value, expected', [ (-5, -5), (5, 5), @@ -42,7 +41,7 @@ def test_maximum(asserter, number_type, value, expected): }, value, expected) -exc = JsonSchemaException('data must be smaller than 10') +exc = JsonSchemaException('data must be smaller than 10', value='{data}', name='data', definition='{definition}', rule='maximum') @pytest.mark.parametrize('value, expected', [ (-5, -5), (5, 5), @@ -59,7 +58,7 @@ def test_exclusive_maximum(asserter, number_type, value, expected): }, value, expected) -exc = JsonSchemaException('data must be bigger than or equal to 10') +exc = JsonSchemaException('data must be bigger than or equal to 10', value='{data}', name='data', definition='{definition}', rule='minimum') @pytest.mark.parametrize('value, expected', [ (-5, exc), (9, exc), @@ -74,7 +73,7 @@ def test_minimum(asserter, number_type, value, expected): }, value, expected) -exc = JsonSchemaException('data must be bigger than 10') +exc = JsonSchemaException('data must be bigger than 10', value='{data}', name='data', definition='{definition}', rule='minimum') @pytest.mark.parametrize('value, expected', [ (-5, exc), (9, exc), @@ -90,7 +89,7 @@ def test_exclusive_minimum(asserter, number_type, value, expected): }, value, expected) -exc = JsonSchemaException('data must be multiple of 3') +exc = JsonSchemaException('data must be multiple of 3', value='{data}', name='data', definition='{definition}', rule='multipleOf') @pytest.mark.parametrize('value, expected', [ (-4, exc), (-3, -3), @@ -112,6 +111,34 @@ def test_multiple_of(asserter, number_type, value, expected): }, value, expected) +exc = JsonSchemaException('data must be multiple of 0.0001', value='{data}', name='data', definition='{definition}', rule='multipleOf') +@pytest.mark.parametrize('value, expected', [ + (0.00751, exc), + (0.0075, 0.0075), +]) +def test_multiple_of_float(asserter, value, expected): + asserter({ + 'type': 'number', + 'multipleOf': 0.0001, + }, value, expected) + + +exc = JsonSchemaException('data must be multiple of 0.01', value='{data}', name='data', definition='{definition}', rule='multipleOf') +@pytest.mark.parametrize('value, expected', [ + (0, 0), + (0.01, 0.01), + (0.1, 0.1), + (19.01, 19.01), + (0.001, exc), + (19.001, exc), +]) +def test_multiple_of_float_1_5(asserter, value, expected): + asserter({ + 'type': 'number', + 'multipleOf': 0.01, + }, value, expected) + + @pytest.mark.parametrize('value', ( 1.0, 0.1, @@ -121,7 +148,7 @@ def test_multiple_of(asserter, number_type, value, expected): def test_integer_is_not_number(asserter, value): asserter({ 'type': 'integer', - }, value, JsonSchemaException('data must be integer')) + }, value, JsonSchemaException('data must be integer', value='{data}', name='data', definition='{definition}', rule='type')) @pytest.mark.parametrize('value', ( diff --git a/tests/test_object.py b/tests/test_object.py index 493abc5..181b453 100644 --- a/tests/test_object.py +++ b/tests/test_object.py @@ -1,10 +1,10 @@ - import pytest -from fastjsonschema import JsonSchemaException +import fastjsonschema +from fastjsonschema import JsonSchemaDefinitionException, JsonSchemaException -exc = JsonSchemaException('data must be object') +exc = JsonSchemaException('data must be object', value='{data}', name='data', definition='{definition}', rule='type') @pytest.mark.parametrize('value, expected', [ (0, exc), (None, exc), @@ -22,7 +22,7 @@ def test_object(asserter, value, expected): @pytest.mark.parametrize('value, expected', [ ({}, {}), ({'a': 1}, {'a': 1}), - ({'a': 1, 'b': 2}, JsonSchemaException('data must contain less than or equal to 1 properties')), + ({'a': 1, 'b': 2}, JsonSchemaException('data must contain less than or equal to 1 properties', value='{data}', name='data', definition='{definition}', rule='maxProperties')), ]) def test_max_properties(asserter, value, expected): asserter({ @@ -32,7 +32,7 @@ def test_max_properties(asserter, value, expected): @pytest.mark.parametrize('value, expected', [ - ({}, JsonSchemaException('data must contain at least 1 properties')), + ({}, JsonSchemaException('data must contain at least 1 properties', value='{data}', name='data', definition='{definition}', rule='minProperties')), ({'a': 1}, {'a': 1}), ({'a': 1, 'b': 2}, {'a': 1, 'b': 2}), ]) @@ -43,7 +43,7 @@ def test_min_properties(asserter, value, expected): }, value, expected) -exc = JsonSchemaException('data must contain [\'a\', \'b\'] properties') +exc = JsonSchemaException('data must contain [\'a\', \'b\'] properties', value='{data}', name='data', definition='{definition}', rule='required') @pytest.mark.parametrize('value, expected', [ ({}, exc), ({'a': 1}, exc), @@ -60,7 +60,7 @@ def test_required(asserter, value, expected): ({}, {}), ({'a': 1}, {'a': 1}), ({'a': 1, 'b': ''}, {'a': 1, 'b': ''}), - ({'a': 1, 'b': 2}, JsonSchemaException('data.b must be string')), + ({'a': 1, 'b': 2}, JsonSchemaException('data.b must be string', value=2, name='data.b', definition={'type': 'string'}, rule='type')), ({'a': 1, 'b': '', 'any': True}, {'a': 1, 'b': '', 'any': True}), ]) def test_properties(asserter, value, expected): @@ -73,13 +73,22 @@ def test_properties(asserter, value, expected): }, value, expected) +def test_invalid_properties(asserter): + with pytest.raises(JsonSchemaDefinitionException): + fastjsonschema.compile({ + 'properties': { + 'item': ['wrong'], + }, + }) + + @pytest.mark.parametrize('value, expected', [ ({}, {}), ({'a': 1}, {'a': 1}), ({'a': 1, 'b': ''}, {'a': 1, 'b': ''}), - ({'a': 1, 'b': 2}, JsonSchemaException('data.b must be string')), + ({'a': 1, 'b': 2}, JsonSchemaException('data.b must be string', value=2, name='data.b', definition={'type': 'string'}, rule='type')), ({'a': 1, 'b': '', 'additional': ''}, {'a': 1, 'b': '', 'additional': ''}), - ({'a': 1, 'b': '', 'any': True}, JsonSchemaException('data.any must be string')), + ({'a': 1, 'b': '', 'any': True}, JsonSchemaException('data.any must be string', value=True, name='data.any', definition={'type': 'string'}, rule='type')), ]) def test_properties_with_additional_properties(asserter, value, expected): asserter({ @@ -96,8 +105,10 @@ def test_properties_with_additional_properties(asserter, value, expected): ({}, {}), ({'a': 1}, {'a': 1}), ({'a': 1, 'b': ''}, {'a': 1, 'b': ''}), - ({'a': 1, 'b': 2}, JsonSchemaException('data.b must be string')), - ({'a': 1, 'b': '', 'any': True}, JsonSchemaException('data must contain only spcified properties')), + ({'a': 1, 'b': 2}, JsonSchemaException('data.b must be string', value=2, name='data.b', definition={'type': 'string'}, rule='type')), + ({'a': 1, 'b': '', 'any': True}, JsonSchemaException('data must not contain {\'any\'} properties', value='{data}', name='data', definition='{definition}', rule='additionalProperties')), + ({'cd': True}, JsonSchemaException('data must not contain {\'cd\'} properties', value='{data}', name='data', definition='{definition}', rule='additionalProperties')), + ({'c_d': True}, {'c_d': True}), ]) def test_properties_without_additional_properties(asserter, value, expected): asserter({ @@ -105,17 +116,17 @@ def test_properties_without_additional_properties(asserter, value, expected): 'properties': { 'a': {'type': 'number'}, 'b': {'type': 'string'}, + 'c_d': {'type': 'boolean'}, }, 'additionalProperties': False, }, value, expected) -@pytest.mark.xfail @pytest.mark.parametrize('value, expected', [ ({}, {}), ({'a': 1}, {'a': 1}), ({'xa': 1}, {'xa': 1}), - ({'xa': ''}, JsonSchemaException('data.xa must be number')), + ({'xa': ''}, JsonSchemaException('data.xa must be number', value='', name='data.xa', definition={'type': 'number'}, rule='type')), ({'xbx': ''}, {'xbx': ''}), ]) def test_pattern_properties(asserter, value, expected): @@ -127,3 +138,69 @@ def test_pattern_properties(asserter, value, expected): }, 'additionalProperties': False, }, value, expected) + + +@pytest.mark.parametrize('value, expected', [ + ({}, {}), + ({'a': 1}, {'a': 1}), + ({'b': True}, {'b': True}), + ({'c': ''}, {'c': ''}), + ({'d': 1}, JsonSchemaException('data.d must be string', value=1, name='data.d', definition={'type': 'string'}, rule='type')), +]) +def test_additional_properties(asserter, value, expected): + asserter({ + 'type': 'object', + "properties": { + "a": {"type": "number"}, + "b": {"type": "boolean"}, + }, + "additionalProperties": {"type": "string"} + }, value, expected) + + +@pytest.mark.parametrize('value, expected', [ + ({'id': 1}, {'id': 1}), + ({'id': 'a'}, JsonSchemaException('data.id must be integer', value='a', name='data.id', definition={'type': 'integer'}, rule='type')), +]) +def test_object_with_id_property(asserter, value, expected): + asserter({ + "type": "object", + "properties": { + "id": {"type": "integer"} + } + }, value, expected) + + +@pytest.mark.parametrize('value, expected', [ + ({'$ref': 'ref://to.somewhere'}, {'$ref': 'ref://to.somewhere'}), + ({'$ref': 1}, JsonSchemaException('data.$ref must be string', value=1, name='data.$ref', definition={'type': 'string'}, rule='type')), +]) +def test_object_with_ref_property(asserter, value, expected): + asserter({ + "type": "object", + "properties": { + "$ref": {"type": "string"} + } + }, value, expected) + + +@pytest.mark.parametrize('value, expected', [ + ({}, {}), + ({'foo': 'foo'}, JsonSchemaException('data missing dependency bar for foo', value={'foo': 'foo'}, name='data', definition='{definition}', rule='dependencies')), + ({'foo': 'foo', 'bar': 'bar'}, {'foo': 'foo', 'bar': 'bar'}), +]) +def test_dependencies(asserter, value, expected): + asserter({ + 'type': 'object', + "properties": { + "foo": { + "type": "string" + }, + "bar": { + "type": "string" + } + }, + "dependencies": { + "foo": ["bar"], + }, + }, value, expected) \ No newline at end of file diff --git a/tests/test_pattern_properties.py b/tests/test_pattern_properties.py new file mode 100644 index 0000000..ae86946 --- /dev/null +++ b/tests/test_pattern_properties.py @@ -0,0 +1,70 @@ +import pytest + + +def test_dont_override_variable_names(asserter): + value = { + 'foo:bar': { + 'baz': { + 'bat': {}, + }, + 'bit': {}, + }, + } + asserter({ + 'type': 'object', + 'patternProperties': { + '^foo:': { + 'type': 'object', + 'properties': { + 'baz': { + 'type': 'object', + 'patternProperties': { + '^b': {'type': 'object'}, + }, + }, + 'bit': {'type': 'object'}, + }, + }, + }, + }, value, value) + + +def test_clear_variables(asserter): + value = { + 'bar': {'baz': 'foo'} + } + asserter({ + 'type': 'object', + 'patternProperties': { + 'foo': {'type': 'object', 'required': ['baz']}, + 'bar': {'type': 'object', 'required': ['baz']} + } + }, value, value) + + +def test_pattern_with_escape(asserter): + value = { + '\\n': {} + } + asserter({ + 'type': 'object', + 'patternProperties': { + '\\\\n': {'type': 'object'} + } + }, value, value) + + +def test_pattern_with_escape_no_warnings(asserter): + value = { + 'bar': {} + } + + with pytest.warns(None) as record: + asserter({ + 'type': 'object', + 'patternProperties': { + '\\w+': {'type': 'object'} + } + }, value, value) + + assert len(record) == 0 diff --git a/tests/test_security.py b/tests/test_security.py new file mode 100644 index 0000000..7ee39f1 --- /dev/null +++ b/tests/test_security.py @@ -0,0 +1,63 @@ +import pytest + +from fastjsonschema import JsonSchemaDefinitionException, compile + + +@pytest.mark.parametrize('schema', [ + {'type': 'validate(10)'}, + {'enum': 'validate(10)'}, + {'minLength': 'validate(10)'}, + {'maxLength': 'validate(10)'}, + {'minimum': 'validate(10)'}, + {'maximum': 'validate(10)'}, + {'multipleOf': 'validate(10)'}, + {'minItems': 'validate(10)'}, + {'maxItems': 'validate(10)'}, + {'minProperties': 'validate(10)'}, + {'maxProperties': 'validate(10)'}, + {'required': 'validate(10)'}, + {'exclusiveMinimum': 'validate(10)'}, + {'exclusiveMaximum': 'validate(10)'}, +]) +def test_not_generate_code_from_definition(schema): + with pytest.raises(JsonSchemaDefinitionException): + compile({ + '$schema': 'http://json-schema.org/draft-07/schema', + **schema + }) + + +@pytest.mark.parametrize('schema,value', [ + ({'const': 'validate(10)'}, 'validate(10)'), + ({'pattern': '" + validate("10") + "'}, '" validate"10" "'), + ({'pattern': "' + validate('10') + '"}, '\' validate\'10\' \''), + ({'pattern': "' + validate(\"10\") + '"}, '\' validate"10" \''), + ({'properties': { + 'validate(10)': {'type': 'string'}, + }}, {'validate(10)': '10'}), + ({'patternProperties': { + 'validate(10)': {'type': 'string'}, + }}, {'validate(10)': '10'}), +]) +def test_generate_code_with_proper_variable_names(asserter, schema, value): + asserter({ + '$schema': 'http://json-schema.org/draft-07/schema', + **schema + }, value, value) + + +def test_generate_code_without_overriding_variables(asserter): + # We use variable name by property name. In the code is automatically generated + # FOO_keys which could colide with keys parameter. Then the variable is reused and + # for example additionalProperties feature is not working well. We need to make + # sure the name not colide. + value = { + 'keys': [1, 2, 3], + } + asserter({ + 'type': 'object', + 'properties': { + 'keys': {'type': 'array'}, + }, + 'additionalProperties': False, + }, value, value) diff --git a/tests/test_string.py b/tests/test_string.py index 93bf123..139ec6c 100644 --- a/tests/test_string.py +++ b/tests/test_string.py @@ -1,10 +1,9 @@ - import pytest from fastjsonschema import JsonSchemaException -exc = JsonSchemaException('data must be string') +exc = JsonSchemaException('data must be string', value='{data}', name='data', definition='{definition}', rule='type') @pytest.mark.parametrize('value, expected', [ (0, exc), (None, exc), @@ -18,7 +17,7 @@ def test_string(asserter, value, expected): asserter({'type': 'string'}, value, expected) -exc = JsonSchemaException('data must be shorter than or equal to 5 characters') +exc = JsonSchemaException('data must be shorter than or equal to 5 characters', value='{data}', name='data', definition='{definition}', rule='maxLength') @pytest.mark.parametrize('value, expected', [ ('', ''), ('qwer', 'qwer'), @@ -33,7 +32,7 @@ def test_max_length(asserter, value, expected): }, value, expected) -exc = JsonSchemaException('data must be longer than or equal to 5 characters') +exc = JsonSchemaException('data must be longer than or equal to 5 characters', value='{data}', name='data', definition='{definition}', rule='minLength') @pytest.mark.parametrize('value, expected', [ ('', exc), ('qwer', exc), @@ -48,15 +47,49 @@ def test_min_length(asserter, value, expected): }, value, expected) -exc = JsonSchemaException('data must match pattern ^[ab]*[^ab]+(c{2}|d)$') +exc = JsonSchemaException('data must match pattern ^[ab]*[^ab]+(c{2}|d)$', value='{data}', name='data', definition='{definition}', rule='pattern') @pytest.mark.parametrize('value, expected', [ ('', exc), ('aacc', exc), ('aaccc', 'aaccc'), ('aacd', 'aacd'), + ('aacd\n', exc), ]) def test_pattern(asserter, value, expected): asserter({ 'type': 'string', 'pattern': '^[ab]*[^ab]+(c{2}|d)$', }, value, expected) + + +@pytest.mark.parametrize('pattern', [ + ' ', + '\\x20', +]) +def test_pattern_with_space(asserter, pattern): + asserter({ + 'type': 'string', + 'pattern': pattern, + }, ' ', ' ') + + +def test_pattern_with_escape_no_warnings(asserter): + with pytest.warns(None) as record: + asserter({ + 'type': 'string', + 'pattern': '\\s' + }, ' ', ' ') + + assert len(record) == 0 + + +exc = JsonSchemaException('data must be a valid regex', value='{data}', name='data', definition='{definition}', rule='format') +@pytest.mark.parametrize('value, expected', [ + ('[a-z]', '[a-z]'), + ('[a-z', exc), +]) +def test_regex_pattern(asserter, value, expected): + asserter({ + 'format': 'regex', + 'type': 'string' + }, value, expected) diff --git a/tox.ini b/tox.ini new file mode 100644 index 0000000..cd81793 --- /dev/null +++ b/tox.ini @@ -0,0 +1,19 @@ +# tox (https://tox.readthedocs.io/) is a tool for running tests +# in multiple virtualenvs. This configuration file will run the +# test suite on all supported python versions. To use it, "pip install tox" +# and then run "tox" from this directory. + +[tox] +envlist = py{34,35,36,37},lint + +[testenv] +deps = + pytest +commands = + pytest -m "not benchmark" + +[testenv:lint] +deps = + pylint +commands = + pylint fastjsonschema