From c471156458b0c7971f9c5801a3f7d30d06a5457e Mon Sep 17 00:00:00 2001 From: anandgupta42 Date: Thu, 9 Jan 2025 10:55:15 -0800 Subject: [PATCH 1/7] fix: Update CI configuration for PyPy compatibility - Add proper GitHub Actions workflow configuration - Simplify tox configuration - Add explicit PyPy support - Fix dependency installation issues --- .github/workflows/ci.yml | 28 ++++++++++++++ tox.ini | 80 ++++++++-------------------------------- 2 files changed, 44 insertions(+), 64 deletions(-) create mode 100644 .github/workflows/ci.yml diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 00000000..36d97cf0 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,28 @@ +name: CI + +on: + push: + branches: [ main, master ] + pull_request: + branches: [ main, master ] + +jobs: + pypy38-cover: + runs-on: ubuntu-22.04 + steps: + - uses: actions/checkout@v3 + + - name: Set up PyPy + uses: actions/setup-python@v4 + with: + python-version: "pypy-3.8" + architecture: x64 + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + python -m pip install --upgrade setuptools wheel + python -m pip install tox tox-gh-actions + + - name: Test with tox + run: tox \ No newline at end of file diff --git a/tox.ini b/tox.ini index 8b1227db..9650320d 100644 --- a/tox.ini +++ b/tox.ini @@ -10,72 +10,24 @@ passenv = ; a generative tox configuration, see: https://tox.wiki/en/latest/user_guide.html#generative-environments [tox] -envlist = - clean, - check, - docs, - {py38,py39,py310,py311,py312,pypy38,pypy39,pypy310}-{cover,nocov}, - report -ignore_basepython_conflict = true +envlist = + py{38,39,310,311,312} + pypy{38,39,310} [testenv] -basepython = - pypy38: {env:TOXPYTHON:pypy3.8} - pypy39: {env:TOXPYTHON:pypy3.9} - pypy310: {env:TOXPYTHON:pypy3.10} - py38: {env:TOXPYTHON:python3.8} - py39: {env:TOXPYTHON:python3.9} - py310: {env:TOXPYTHON:python3.10} - py311: {env:TOXPYTHON:python3.11} - py312: {env:TOXPYTHON:python3.12} - {bootstrap,clean,check,report,docs,codecov}: {env:TOXPYTHON:python3} -setenv = - PYTHONPATH={toxinidir}/tests - PYTHONUNBUFFERED=yes -passenv = - * -usedevelop = - cover: true - nocov: false -deps = - pytest - cover: pytest-cov -commands = - nocov: {posargs:pytest -vv --ignore=src} - cover: {posargs:pytest --cov --cov-report=term-missing --cov-report=xml -vv} - -[testenv:check] deps = - docutils - check-manifest - pre-commit - readme-renderer - pygments - isort -skip_install = true + pytest>=6.0 + pytest-cov + # Add any other test dependencies +pip_pre = true commands = - python setup.py check --strict --metadata --restructuredtext - check-manifest . - pre-commit run --all-files --show-diff-on-failure + pytest {posargs:tests} -[testenv:docs] -usedevelop = true -deps = - -r{toxinidir}/docs/requirements.txt -commands = - sphinx-build {posargs:-E} -b html docs dist/docs - sphinx-build -b linkcheck docs dist/docs - -[testenv:report] -deps = - coverage -skip_install = true -commands = - coverage report - coverage html - -[testenv:clean] -commands = coverage erase -skip_install = true -deps = - coverage +[gh-actions] +python = + pypy-3.8: pypy38 + 3.8: py38 + 3.9: py39 + 3.10: py310 + 3.11: py311 + 3.12: py312 From 8835e6ab1eddadec49cb9aaaeda06ccbd1a27d9e Mon Sep 17 00:00:00 2001 From: anandgupta42 Date: Thu, 9 Jan 2025 10:57:25 -0800 Subject: [PATCH 2/7] fix: Restore full test matrix in CI configuration - Add matrix strategy for all Python versions - Include all operating systems - Restore check and docs jobs - Fix PyPy configuration --- .github/workflows/ci.yml | 63 ++++++++++++++++++++++++++++++++++++---- 1 file changed, 58 insertions(+), 5 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 36d97cf0..3d6e2f46 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -7,15 +7,46 @@ on: branches: [ main, master ] jobs: - pypy38-cover: - runs-on: ubuntu-22.04 + test: + name: ${{ matrix.tox-env }} (${{ matrix.os }}) + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-22.04, windows-latest, macos-latest] + tox-env: + - py38-cover + - py38-nocov + - py39-cover + - py39-nocov + - py310-cover + - py310-nocov + - py311-cover + - py311-nocov + - py312-cover + - py312-nocov + - pypy38-cover + - pypy38-nocov + - pypy39-cover + - pypy39-nocov + - pypy310-cover + - pypy310-nocov + steps: - uses: actions/checkout@v3 - - name: Set up PyPy + - name: Set up Python uses: actions/setup-python@v4 with: - python-version: "pypy-3.8" + python-version: | + pypy-3.8 + pypy-3.9 + pypy-3.10 + 3.8 + 3.9 + 3.10 + 3.11 + 3.12 architecture: x64 - name: Install dependencies @@ -25,4 +56,26 @@ jobs: python -m pip install tox tox-gh-actions - name: Test with tox - run: tox \ No newline at end of file + env: + TOXENV: ${{ matrix.tox-env }} + run: tox + + check: + runs-on: ubuntu-22.04 + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + with: + python-version: "3.x" + - run: pip install tox + - run: tox -e check + + docs: + runs-on: ubuntu-22.04 + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + with: + python-version: "3.x" + - run: pip install tox + - run: tox -e docs \ No newline at end of file From 0ff817952dea108e6085fbed57098d0790a6648e Mon Sep 17 00:00:00 2001 From: anandgupta42 Date: Thu, 9 Jan 2025 11:04:18 -0800 Subject: [PATCH 3/7] fix: Refine CI matrix configuration - Map each tox environment to specific Python version - Add proper matrix includes - Improve error reporting with verbose tox output - Fix dependency installation steps --- .github/workflows/ci.yml | 59 ++++++++++++++++++++++++++++++---------- 1 file changed, 45 insertions(+), 14 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 3d6e2f46..5208195e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -31,6 +31,39 @@ jobs: - pypy39-nocov - pypy310-cover - pypy310-nocov + include: + - tox-env: py38-cover + python-version: "3.8" + - tox-env: py38-nocov + python-version: "3.8" + - tox-env: py39-cover + python-version: "3.9" + - tox-env: py39-nocov + python-version: "3.9" + - tox-env: py310-cover + python-version: "3.10" + - tox-env: py310-nocov + python-version: "3.10" + - tox-env: py311-cover + python-version: "3.11" + - tox-env: py311-nocov + python-version: "3.11" + - tox-env: py312-cover + python-version: "3.12" + - tox-env: py312-nocov + python-version: "3.12" + - tox-env: pypy38-cover + python-version: "pypy-3.8" + - tox-env: pypy38-nocov + python-version: "pypy-3.8" + - tox-env: pypy39-cover + python-version: "pypy-3.9" + - tox-env: pypy39-nocov + python-version: "pypy-3.9" + - tox-env: pypy310-cover + python-version: "pypy-3.10" + - tox-env: pypy310-nocov + python-version: "pypy-3.10" steps: - uses: actions/checkout@v3 @@ -38,15 +71,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v4 with: - python-version: | - pypy-3.8 - pypy-3.9 - pypy-3.10 - 3.8 - 3.9 - 3.10 - 3.11 - 3.12 + python-version: ${{ matrix.python-version }} architecture: x64 - name: Install dependencies @@ -58,7 +83,7 @@ jobs: - name: Test with tox env: TOXENV: ${{ matrix.tox-env }} - run: tox + run: tox -v check: runs-on: ubuntu-22.04 @@ -66,8 +91,11 @@ jobs: - uses: actions/checkout@v3 - uses: actions/setup-python@v4 with: - python-version: "3.x" - - run: pip install tox + python-version: "3.11" + - name: Install dependencies + run: | + python -m pip install --upgrade pip + python -m pip install tox - run: tox -e check docs: @@ -76,6 +104,9 @@ jobs: - uses: actions/checkout@v3 - uses: actions/setup-python@v4 with: - python-version: "3.x" - - run: pip install tox + python-version: "3.11" + - name: Install dependencies + run: | + python -m pip install --upgrade pip + python -m pip install tox - run: tox -e docs \ No newline at end of file From aee1fa4245d3c8de1b0ef81b4c14a255771f478e Mon Sep 17 00:00:00 2001 From: anandgupta42 Date: Thu, 9 Jan 2025 11:08:16 -0800 Subject: [PATCH 4/7] fixed ci/cd --- .github/workflows/ci.yml | 59 ++---------------------------- tox.ini | 79 ++++++++++++++++++++++++++++++++++------ 2 files changed, 71 insertions(+), 67 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 5208195e..9198eec4 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -8,67 +8,18 @@ on: jobs: test: - name: ${{ matrix.tox-env }} (${{ matrix.os }}) + name: ${{ matrix.python-version }} / ${{ matrix.os }} runs-on: ${{ matrix.os }} strategy: fail-fast: false matrix: os: [ubuntu-22.04, windows-latest, macos-latest] - tox-env: - - py38-cover - - py38-nocov - - py39-cover - - py39-nocov - - py310-cover - - py310-nocov - - py311-cover - - py311-nocov - - py312-cover - - py312-nocov - - pypy38-cover - - pypy38-nocov - - pypy39-cover - - pypy39-nocov - - pypy310-cover - - pypy310-nocov - include: - - tox-env: py38-cover - python-version: "3.8" - - tox-env: py38-nocov - python-version: "3.8" - - tox-env: py39-cover - python-version: "3.9" - - tox-env: py39-nocov - python-version: "3.9" - - tox-env: py310-cover - python-version: "3.10" - - tox-env: py310-nocov - python-version: "3.10" - - tox-env: py311-cover - python-version: "3.11" - - tox-env: py311-nocov - python-version: "3.11" - - tox-env: py312-cover - python-version: "3.12" - - tox-env: py312-nocov - python-version: "3.12" - - tox-env: pypy38-cover - python-version: "pypy-3.8" - - tox-env: pypy38-nocov - python-version: "pypy-3.8" - - tox-env: pypy39-cover - python-version: "pypy-3.9" - - tox-env: pypy39-nocov - python-version: "pypy-3.9" - - tox-env: pypy310-cover - python-version: "pypy-3.10" - - tox-env: pypy310-nocov - python-version: "pypy-3.10" + python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "pypy-3.8", "pypy-3.9", "pypy-3.10"] steps: - uses: actions/checkout@v3 - - name: Set up Python + - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} @@ -81,9 +32,7 @@ jobs: python -m pip install tox tox-gh-actions - name: Test with tox - env: - TOXENV: ${{ matrix.tox-env }} - run: tox -v + run: tox --skip-missing-interpreters false check: runs-on: ubuntu-22.04 diff --git a/tox.ini b/tox.ini index 9650320d..d43e7b7a 100644 --- a/tox.ini +++ b/tox.ini @@ -10,24 +10,79 @@ passenv = ; a generative tox configuration, see: https://tox.wiki/en/latest/user_guide.html#generative-environments [tox] -envlist = - py{38,39,310,311,312} - pypy{38,39,310} +envlist = + clean, + check, + docs, + py{38,39,310,311,312}-{cover,nocov}, + pypy{38,39,310}-{cover,nocov}, + report +skip_missing_interpreters = true [testenv] +basepython = + py38: python3.8 + py39: python3.9 + py310: python3.10 + py311: python3.11 + py312: python3.12 + pypy38: pypy3.8 + pypy39: pypy3.9 + pypy310: pypy3.10 + {clean,check,docs,report}: python3 +setenv = + PYTHONPATH={toxinidir}/tests + PYTHONUNBUFFERED=yes + cover: SETUPPY_CFLAGS=-coverage +passenv = + * deps = pytest>=6.0 pytest-cov - # Add any other test dependencies -pip_pre = true commands = - pytest {posargs:tests} + {posargs:pytest --cov --cov-report=term-missing -vv tests} + +[testenv:check] +deps = + docutils + check-manifest + flake8 + readme-renderer + pygments + isort +skip_install = true +commands = + python setup.py check --strict --metadata --restructuredtext + check-manifest {toxinidir} + flake8 src tests setup.py + isort --verbose --check-only --diff --filter-files src tests setup.py + +[testenv:docs] +deps = + -r{toxinidir}/docs/requirements.txt +commands = + sphinx-build {posargs:-E} -b html docs dist/docs + sphinx-build -b linkcheck docs dist/docs + +[testenv:clean] +commands = coverage erase +skip_install = true +deps = coverage + +[testenv:report] +deps = coverage +skip_install = true +commands = + coverage report + coverage html [gh-actions] python = - pypy-3.8: pypy38 - 3.8: py38 - 3.9: py39 - 3.10: py310 - 3.11: py311 - 3.12: py312 + 3.8: py38-cover,py38-nocov + 3.9: py39-cover,py39-nocov + 3.10: py310-cover,py310-nocov + 3.11: py311-cover,py311-nocov + 3.12: py312-cover,py312-nocov + pypy-3.8: pypy38-cover,pypy38-nocov + pypy-3.9: pypy39-cover,pypy39-nocov + pypy-3.10: pypy310-cover,pypy310-nocov From c377c5c3e30752fa0c2a7fbefb55c34e5f596afd Mon Sep 17 00:00:00 2001 From: anandgupta42 Date: Thu, 9 Jan 2025 11:17:55 -0800 Subject: [PATCH 5/7] fix: Add gettext installation for macOS - Install gettext using Homebrew on macOS runners - Force link gettext to resolve library dependency - Conditional execution only on macOS --- .github/workflows/ci.yml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 9198eec4..ebee7609 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -19,6 +19,13 @@ jobs: steps: - uses: actions/checkout@v3 + # Install gettext on macOS + - name: Install gettext on macOS + if: runner.os == 'macOS' + run: | + brew install gettext + brew link gettext --force + - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v4 with: From d06928433b5b82c1ea23e9a01f084ba02bc595c6 Mon Sep 17 00:00:00 2001 From: anandgupta42 Date: Thu, 9 Jan 2025 11:25:01 -0800 Subject: [PATCH 6/7] fixed ci/cd --- .github/workflows/ci.yml | 10 +++++++++- tox.ini | 4 +--- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ebee7609..1f884de3 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -14,7 +14,15 @@ jobs: fail-fast: false matrix: os: [ubuntu-22.04, windows-latest, macos-latest] - python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "pypy-3.8", "pypy-3.9", "pypy-3.10"] + python-version: [ + "3.8", + "3.9", + "3.10", + "3.11", + "3.12", + "pypy-3.9", + "pypy-3.10" + ] steps: - uses: actions/checkout@v3 diff --git a/tox.ini b/tox.ini index d43e7b7a..c3705df5 100644 --- a/tox.ini +++ b/tox.ini @@ -15,7 +15,7 @@ envlist = check, docs, py{38,39,310,311,312}-{cover,nocov}, - pypy{38,39,310}-{cover,nocov}, + pypy{39,310}-{cover,nocov}, report skip_missing_interpreters = true @@ -26,7 +26,6 @@ basepython = py310: python3.10 py311: python3.11 py312: python3.12 - pypy38: pypy3.8 pypy39: pypy3.9 pypy310: pypy3.10 {clean,check,docs,report}: python3 @@ -83,6 +82,5 @@ python = 3.10: py310-cover,py310-nocov 3.11: py311-cover,py311-nocov 3.12: py312-cover,py312-nocov - pypy-3.8: pypy38-cover,pypy38-nocov pypy-3.9: pypy39-cover,pypy39-nocov pypy-3.10: pypy310-cover,pypy310-nocov From 73bf2690e7c9d0eb3b2635a91305295842accdef Mon Sep 17 00:00:00 2001 From: anandgupta42 Date: Thu, 9 Jan 2025 17:11:28 -0800 Subject: [PATCH 7/7] fixed ci/cd --- setup.cfg | 5 ++ setup.py | 6 +- src/datapilot/clients/altimate/utils.py | 14 +++- src/datapilot/core/platforms/dbt/executor.py | 7 +- .../checks/check_macro_args_have_desc.py | 6 +- .../insights/checks/check_macro_has_desc.py | 6 +- .../checks/check_model_has_tests_by_group.py | 11 ++- .../checks/check_model_has_tests_by_name.py | 11 ++- .../checks/check_model_has_tests_by_type.py | 6 +- .../check_model_materialization_by_childs.py | 6 +- .../checks/check_model_name_contract.py | 5 +- .../checks/check_model_parents_and_childs.py | 15 +++- .../checks/check_source_has_freshness.py | 5 +- .../checks/check_source_has_meta_keys.py | 5 +- .../checks/check_source_has_tests_by_group.py | 9 ++- .../checks/check_source_has_tests_by_name.py | 6 +- .../checks/check_source_has_tests_by_type.py | 6 +- .../downstream_models_dependent_on_source.py | 3 +- .../dbt/insights/modelling/root_model.py | 5 +- src/datapilot/core/platforms/dbt/utils.py | 80 +++++++------------ src/datapilot/utils/utils.py | 18 ++++- tests/core/platform/dbt/test_utils.py | 24 +++--- tests/utils/test_utils.py | 12 ++- 23 files changed, 176 insertions(+), 95 deletions(-) create mode 100644 setup.cfg diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 00000000..533c9bd6 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,5 @@ +[flake8] +max-line-length = 140 +exclude = .tox,*.egg,build,data +select = E,W,F +ignore = W503 \ No newline at end of file diff --git a/setup.py b/setup.py index be8a6f10..02dcde7f 100755 --- a/setup.py +++ b/setup.py @@ -2,8 +2,10 @@ import re from pathlib import Path -from setuptools import find_packages -from setuptools import setup +from setuptools import ( + find_packages, + setup, +) def read(*names, **kwargs): diff --git a/src/datapilot/clients/altimate/utils.py b/src/datapilot/clients/altimate/utils.py index cc4b4fa5..2edaea59 100644 --- a/src/datapilot/clients/altimate/utils.py +++ b/src/datapilot/clients/altimate/utils.py @@ -55,7 +55,15 @@ def validate_permissions( return api_client.validate_upload_to_integration() -def onboard_file(api_token, tenant, dbt_core_integration_id, dbt_core_integration_environment, file_type, file_path, backend_url) -> Dict: +def onboard_file( + api_token, + tenant, + dbt_core_integration_id, + dbt_core_integration_environment, + file_type, + file_path, + backend_url, +) -> Dict: api_client = APIClient(api_token, base_url=backend_url, tenant=tenant) params = { @@ -84,7 +92,7 @@ def onboard_file(api_token, tenant, dbt_core_integration_id, dbt_core_integratio api_client.log("Error getting signed URL.") return { "ok": False, - "message": "Error in uploading the manifest. ", + "message": "Error in uploading the manifest.", } @@ -101,7 +109,7 @@ def start_dbt_ingestion(api_token, tenant, dbt_core_integration_id, dbt_core_int api_client.log("Error starting dbt ingestion worker") return { "ok": False, - "message": "Error starting dbt ingestion worker. ", + "message": "Error starting dbt ingestion worker.", } diff --git a/src/datapilot/core/platforms/dbt/executor.py b/src/datapilot/core/platforms/dbt/executor.py index 8d6d5cfd..f8286818 100644 --- a/src/datapilot/core/platforms/dbt/executor.py +++ b/src/datapilot/core/platforms/dbt/executor.py @@ -88,10 +88,11 @@ def __init__( ) if not self.selected_models: raise AltimateCLIArgumentError( - f"Invalid values provided in the --select argument. Could not find models associated with pattern: --select {' '.join(selected_models)}" + "Invalid values provided in the --select argument. " + f"Could not find models associated with pattern: --select {' '.join(selected_models)}" ) - self.excluded_models = None - self.excluded_models_flag = False + self.excluded_models = None + self.excluded_models_flag = False def _check_if_skipped(self, insight): if self.config.get("disabled_insights", False): diff --git a/src/datapilot/core/platforms/dbt/insights/checks/check_macro_args_have_desc.py b/src/datapilot/core/platforms/dbt/insights/checks/check_macro_args_have_desc.py index ee2b3973..70abf1b1 100644 --- a/src/datapilot/core/platforms/dbt/insights/checks/check_macro_args_have_desc.py +++ b/src/datapilot/core/platforms/dbt/insights/checks/check_macro_args_have_desc.py @@ -11,7 +11,11 @@ class CheckMacroArgsHaveDesc(ChecksInsight): NAME = "Check macro arguments has description" ALIAS = "check_macro_args_have_desc" DESCRIPTION = "Macro arguments should have a description. " - REASON_TO_FLAG = "Clear descriptions for macro arguments are crucial as they prevent misunderstandings, enhance user comprehension, and simplify maintenance. This leads to more accurate data analysis and efficient workflows." + REASON_TO_FLAG = ( + "Clear descriptions for macro arguments are crucial as they prevent misunderstandings, " + "enhance user comprehension, and simplify maintenance. " + "This leads to more accurate data analysis and efficient workflows." + ) def _build_failure_result( self, diff --git a/src/datapilot/core/platforms/dbt/insights/checks/check_macro_has_desc.py b/src/datapilot/core/platforms/dbt/insights/checks/check_macro_has_desc.py index afc853d2..2103d15c 100644 --- a/src/datapilot/core/platforms/dbt/insights/checks/check_macro_has_desc.py +++ b/src/datapilot/core/platforms/dbt/insights/checks/check_macro_has_desc.py @@ -11,7 +11,11 @@ class CheckMacroHasDesc(ChecksInsight): NAME = "Macro has documentation" ALIAS = "check_macro_has_desc" DESCRIPTION = "Macros should be documented." - REASON_TO_FLAG = "Undocumented macros can cause misunderstandings and inefficiencies in data modeling and analysis, as they make it difficult to understand their purpose and usage. Clear descriptions are vital for accuracy and streamlined workflow." + REASON_TO_FLAG = ( + "Undocumented macros can cause misunderstandings and inefficiencies in data modeling and analysis, " + "as they make it difficult to understand their purpose and usage. " + "Clear descriptions are vital for accuracy and streamlined workflow." + ) def _build_failure_result( self, diff --git a/src/datapilot/core/platforms/dbt/insights/checks/check_model_has_tests_by_group.py b/src/datapilot/core/platforms/dbt/insights/checks/check_model_has_tests_by_group.py index 66dd1fdb..2780b08e 100644 --- a/src/datapilot/core/platforms/dbt/insights/checks/check_model_has_tests_by_group.py +++ b/src/datapilot/core/platforms/dbt/insights/checks/check_model_has_tests_by_group.py @@ -50,9 +50,16 @@ def generate(self, *args, **kwargs) -> List[DBTModelInsightResponse]: def _build_failure_result(self, model_unique_id: str, missing_test_groups: List[Dict]) -> DBTInsightResult: missing_test_group_str = "" for test in missing_test_groups: - missing_test_group_str += f"Test Group: {test.get(self.TEST_GROUP_STR)}, Min Count: {test.get(self.TEST_COUNT_STR)}, Actual Count: {test.get('actual_count')}\n" + missing_test_group_str += ( + f"Test Group: {test.get(self.TEST_GROUP_STR)}, " + f"Min Count: {test.get(self.TEST_COUNT_STR)}, " + f"Actual Count: {test.get('actual_count')}\n" + ) - failure_message = f"The model `{model_unique_id}` does not have enough tests for the following groups:\n{missing_test_group_str}. " + failure_message = ( + f"The model `{model_unique_id}` does not have enough tests for the following groups:\n" + f"{missing_test_group_str}. " + ) recommendation = ( "Add tests with the specified groups for each model listed above. " "Having tests with specific groups ensures proper validation and data integrity." diff --git a/src/datapilot/core/platforms/dbt/insights/checks/check_model_has_tests_by_name.py b/src/datapilot/core/platforms/dbt/insights/checks/check_model_has_tests_by_name.py index 3e5eb04d..9b271e4c 100644 --- a/src/datapilot/core/platforms/dbt/insights/checks/check_model_has_tests_by_name.py +++ b/src/datapilot/core/platforms/dbt/insights/checks/check_model_has_tests_by_name.py @@ -48,9 +48,16 @@ def generate(self, *args, **kwargs) -> List[DBTModelInsightResponse]: def _build_failure_result(self, model_unique_id: str, missing_tests: List[Dict]) -> DBTInsightResult: tests_str = "" for test in missing_tests: - tests_str += f"Test Name: {test.get(self.TEST_NAME_STR)}, Min Count: {test.get(self.TEST_COUNT_STR)}, Actual Count: {test.get('actual_count')}\n" + tests_str += ( + f"Test Name: {test.get(self.TEST_NAME_STR)}, " + f"Min Count: {test.get(self.TEST_COUNT_STR)}, " + f"Actual Count: {test.get('actual_count')}\n" + ) - failure_message = f"The model `{model_unique_id}` does not have enough tests:\n{tests_str}. " + failure_message = ( + f"The model `{model_unique_id}` does not have enough tests:\n" + f"{tests_str}. " + ) recommendation = ( "Add tests with the specified names for each model listed above. " "Having tests with specific names ensures proper validation and data integrity." diff --git a/src/datapilot/core/platforms/dbt/insights/checks/check_model_has_tests_by_type.py b/src/datapilot/core/platforms/dbt/insights/checks/check_model_has_tests_by_type.py index a5ce95c8..1bc204c2 100644 --- a/src/datapilot/core/platforms/dbt/insights/checks/check_model_has_tests_by_type.py +++ b/src/datapilot/core/platforms/dbt/insights/checks/check_model_has_tests_by_type.py @@ -49,7 +49,11 @@ def generate(self, *args, **kwargs) -> List[DBTModelInsightResponse]: def _build_failure_result(self, model_unique_id: str, missing_tests: List[Dict]) -> DBTInsightResult: missing_test_type_str = "" for test in missing_tests: - missing_test_type_str += f"Test type: {test.get(self.TEST_TYPE_STR)}, Min Count: {test.get(self.TEST_COUNT_STR)}, Actual Count: {test.get('actual_count')}\n" + missing_test_type_str += ( + f"Test type: {test.get(self.TEST_TYPE_STR)}, " + f"Min Count: {test.get(self.TEST_COUNT_STR)}, " + f"Actual Count: {test.get('actual_count')}\n" + ) failure_message = f"The model `{model_unique_id}` does not have enough tests for the following types:\n{missing_test_type_str}. " recommendation = ( diff --git a/src/datapilot/core/platforms/dbt/insights/checks/check_model_materialization_by_childs.py b/src/datapilot/core/platforms/dbt/insights/checks/check_model_materialization_by_childs.py index 33d6a445..81831360 100644 --- a/src/datapilot/core/platforms/dbt/insights/checks/check_model_materialization_by_childs.py +++ b/src/datapilot/core/platforms/dbt/insights/checks/check_model_materialization_by_childs.py @@ -11,7 +11,11 @@ class CheckModelMaterializationByChilds(ChecksInsight): NAME = "Model materialization by children" ALIAS = "check_model_materialization_by_childs" DESCRIPTION = "Fewer children than threshold ideally should be view or ephemeral, more or equal should be table or incremental." - REASON_TO_FLAG = "The model is flagged due to inappropriate materialization: models with child counts above the threshold require robust and efficient data processing, hence they should be materialized as tables or incrementals for optimized query performance and data management." + REASON_TO_FLAG = ( + "The model is flagged due to inappropriate materialization: models with child counts above the threshold " + "require robust and efficient data processing, hence they should be materialized as tables or incrementals " + "for optimized query performance and data management." + ) THRESHOLD_CHILDS_STR = "threshold_childs" def _build_failure_result_view_materialization( diff --git a/src/datapilot/core/platforms/dbt/insights/checks/check_model_name_contract.py b/src/datapilot/core/platforms/dbt/insights/checks/check_model_name_contract.py index 988c5be9..27df82f7 100644 --- a/src/datapilot/core/platforms/dbt/insights/checks/check_model_name_contract.py +++ b/src/datapilot/core/platforms/dbt/insights/checks/check_model_name_contract.py @@ -122,7 +122,10 @@ def get_config_schema(cls): }, "required": [cls.PATTERN_STR, cls.FOLDER_STR], }, - "description": "A list of regex patterns to check the model name against. Each pattern is applied to the folder specified. If no pattern is found for the folder, the default pattern is used.", + "description": ( + "A list of regex patterns to check the model name against. Each pattern is applied to the folder specified. " + "If no pattern is found for the folder, the default pattern is used." + ), "default": [], }, }, diff --git a/src/datapilot/core/platforms/dbt/insights/checks/check_model_parents_and_childs.py b/src/datapilot/core/platforms/dbt/insights/checks/check_model_parents_and_childs.py index 2ab27ed3..bc78cd4f 100644 --- a/src/datapilot/core/platforms/dbt/insights/checks/check_model_parents_and_childs.py +++ b/src/datapilot/core/platforms/dbt/insights/checks/check_model_parents_and_childs.py @@ -66,7 +66,8 @@ def generate(self, *args, **kwargs) -> List[DBTModelInsightResponse]: if not self.max_childs and not self.max_parents: self.logger.info( - "max_children and max_parents are required values in the configuration. Please provide the required values. Skipping the insight." + "max_children and max_parents are required values in the configuration. " + "Please provide the required values. Skipping the insight." ) return insights @@ -98,10 +99,18 @@ def _check_model_parents_and_childs(self, model_unique_id: str) -> Optional[str] parents = node.depends_on.nodes message = "" if len(parents) < self.min_parents or len(parents) > self.max_parents: - message += f"The model:{model_unique_id} doesn't have the required number of parents.\n Min parents: {self.min_parents}, Max parents: {self.max_parents}. It has f{len(parents)} parents\n" + message += ( + f"The model:{model_unique_id} doesn't have the required number of parents.\n" + f"Min parents: {self.min_parents}, Max parents: {self.max_parents}. " + f"It has {len(parents)} parents\n" + ) if len(children) < self.min_childs or len(children) > self.max_childs: - message += f"The model:{model_unique_id} doesn't have the required number of childs.\n Min childs: {self.min_childs}, Max childs: {self.max_childs}. It has f{len(children)} childs\n" + message += ( + f"The model:{model_unique_id} doesn't have the required number of childs.\n" + f"Min childs: {self.min_childs}, Max childs: {self.max_childs}. " + f"It has {len(children)} childs\n" + ) return message diff --git a/src/datapilot/core/platforms/dbt/insights/checks/check_source_has_freshness.py b/src/datapilot/core/platforms/dbt/insights/checks/check_source_has_freshness.py index b4f3eaee..b0c7cae9 100644 --- a/src/datapilot/core/platforms/dbt/insights/checks/check_source_has_freshness.py +++ b/src/datapilot/core/platforms/dbt/insights/checks/check_source_has_freshness.py @@ -82,7 +82,10 @@ def get_config_schema(cls): "properties": { cls.FRESHNESS_STR: { "type": "array", - "description": "The freshness options that should be defined for the source. If not provided, all freshness options are allowed.", + "description": ( + "The freshness options that should be defined for the source. " + "If not provided, all freshness options are allowed." + ), "items": { "type": "string", "enum": ["error_after", "warn_after"], diff --git a/src/datapilot/core/platforms/dbt/insights/checks/check_source_has_meta_keys.py b/src/datapilot/core/platforms/dbt/insights/checks/check_source_has_meta_keys.py index f988e641..ef353104 100644 --- a/src/datapilot/core/platforms/dbt/insights/checks/check_source_has_meta_keys.py +++ b/src/datapilot/core/platforms/dbt/insights/checks/check_source_has_meta_keys.py @@ -14,7 +14,10 @@ class CheckSourceHasMetaKeys(ChecksInsight): NAME = "Source has required metadata keys" ALIAS = "check_source_has_meta_keys" DESCRIPTION = "Check if the source has required metadata keys" - REASON_TO_FLAG = "Missing meta keys in the source can lead to inconsistency in metadata management and understanding of the source. It's important to ensure that the source includes all the required meta keys as per the configuration." + REASON_TO_FLAG = ( + "Missing meta keys in the source can lead to inconsistency in metadata management and understanding of the source. " + "It's important to ensure that the source includes all the required meta keys as per the configuration." + ) META_KEYS_STR = "meta_keys" ALLOW_EXTRA_KEYS_STR = "allow_extra_keys" diff --git a/src/datapilot/core/platforms/dbt/insights/checks/check_source_has_tests_by_group.py b/src/datapilot/core/platforms/dbt/insights/checks/check_source_has_tests_by_group.py index c8a7710d..f25cc87c 100644 --- a/src/datapilot/core/platforms/dbt/insights/checks/check_source_has_tests_by_group.py +++ b/src/datapilot/core/platforms/dbt/insights/checks/check_source_has_tests_by_group.py @@ -48,10 +48,15 @@ def generate(self, *args, **kwargs) -> List[DBTModelInsightResponse]: def _build_failure_result(self, source_unique_id: str, missing_test_groups: List[Dict]) -> DBTInsightResult: missing_test_group_str = "" for test in missing_test_groups: - missing_test_group_str += f"Test Group: {test.get(self.TEST_GROUP_STR)}, Min Count: {test.get(self.TEST_COUNT_STR)}, Actual Count: {test.get('actual_count')}\n" + missing_test_group_str += ( + f"Test Group: {test.get(self.TEST_GROUP_STR)}, " + f"Min Count: {test.get(self.TEST_COUNT_STR)}, " + f"Actual Count: {test.get('actual_count')}\n" + ) failure_message = ( - f"The source `{source_unique_id}` does not have enough tests for the following groups:\n{missing_test_group_str}. " + f"The source `{source_unique_id}` does not have enough tests for the following groups:\n" + f"{missing_test_group_str}. " ) recommendation = ( "Add tests with the specified groups for each source listed above. " diff --git a/src/datapilot/core/platforms/dbt/insights/checks/check_source_has_tests_by_name.py b/src/datapilot/core/platforms/dbt/insights/checks/check_source_has_tests_by_name.py index 0964336f..53ffec92 100644 --- a/src/datapilot/core/platforms/dbt/insights/checks/check_source_has_tests_by_name.py +++ b/src/datapilot/core/platforms/dbt/insights/checks/check_source_has_tests_by_name.py @@ -48,7 +48,11 @@ def generate(self, *args, **kwargs) -> List[DBTModelInsightResponse]: def _build_failure_result(self, source_unique_id: str, missing_tests: List[Dict]) -> DBTInsightResult: tests_str = "" for test in missing_tests: - tests_str += f"Test Name: {test.get(self.TEST_NAME_STR)}, Min Count: {test.get(self.TEST_COUNT_STR)}, Actual Count: {test.get('actual_count')}\n" + tests_str += ( + f"Test Name: {test.get(self.TEST_NAME_STR)}, " + f"Min Count: {test.get(self.TEST_COUNT_STR)}, " + f"Actual Count: {test.get('actual_count')}\n" + ) failure_message = f"The source `{source_unique_id}` does not have enough tests:\n{tests_str}. " recommendation = ( diff --git a/src/datapilot/core/platforms/dbt/insights/checks/check_source_has_tests_by_type.py b/src/datapilot/core/platforms/dbt/insights/checks/check_source_has_tests_by_type.py index b78e1140..e7a93bf9 100644 --- a/src/datapilot/core/platforms/dbt/insights/checks/check_source_has_tests_by_type.py +++ b/src/datapilot/core/platforms/dbt/insights/checks/check_source_has_tests_by_type.py @@ -47,7 +47,11 @@ def generate(self, *args, **kwargs) -> List[DBTModelInsightResponse]: def _build_failure_result(self, source_unique_id: str, missing_tests) -> DBTInsightResult: missing_test_type_str = "" for test in missing_tests: - missing_test_type_str += f"Test type: {test.get(self.TEST_TYPE_STR)}, Min Count: {test.get(self.TEST_COUNT_STR)}, Actual Count: {test.get('actual_count')}\n" + missing_test_type_str += ( + f"Test type: {test.get(self.TEST_TYPE_STR)}, " + f"Min Count: {test.get(self.TEST_COUNT_STR)}, " + f"Actual Count: {test.get('actual_count')}\n" + ) failure_message = f"The source `{source_unique_id}` does not have enough tests for the following types:\n{missing_test_type_str}. " recommendation = ( diff --git a/src/datapilot/core/platforms/dbt/insights/modelling/downstream_models_dependent_on_source.py b/src/datapilot/core/platforms/dbt/insights/modelling/downstream_models_dependent_on_source.py index 1025fe5a..5d1c8739 100644 --- a/src/datapilot/core/platforms/dbt/insights/modelling/downstream_models_dependent_on_source.py +++ b/src/datapilot/core/platforms/dbt/insights/modelling/downstream_models_dependent_on_source.py @@ -108,6 +108,7 @@ def generate(self, *args, **kwargs) -> List[DBTModelInsightResponse]: ) ) self.logger.debug( - f"Finished generating insights for DBTDownstreamModelsDependentOnSource. Found {len(insights)} models with direct source dependencies" + "Finished generating insights for DBTDownstreamModelsDependentOnSource. " + f"Found {len(insights)} models with direct source dependencies" ) return insights diff --git a/src/datapilot/core/platforms/dbt/insights/modelling/root_model.py b/src/datapilot/core/platforms/dbt/insights/modelling/root_model.py index b6c4d04d..9946dc62 100644 --- a/src/datapilot/core/platforms/dbt/insights/modelling/root_model.py +++ b/src/datapilot/core/platforms/dbt/insights/modelling/root_model.py @@ -15,7 +15,10 @@ class DBTRootModel(DBTModellingInsight): NAME = "Root model traceability" ALIAS = "root_model" - DESCRIPTION = "Identifies models in a dbt project with 0 direct parents, meaning these models cannot be traced back to a declared source or model." + DESCRIPTION = ( + "Identifies models in a dbt project with 0 direct parents, " + "meaning these models cannot be traced back to a declared source or model." + ) REASON_TO_FLAG = ( "Best Practice is to ensure all models can be traced back to a source or another model in the project. " "Root models with no direct parents can lead to challenges in tracking data lineage and understanding" diff --git a/src/datapilot/core/platforms/dbt/utils.py b/src/datapilot/core/platforms/dbt/utils.py index 6f43dff6..d1875a7b 100644 --- a/src/datapilot/core/platforms/dbt/utils.py +++ b/src/datapilot/core/platforms/dbt/utils.py @@ -265,67 +265,47 @@ def get_hard_coded_references(sql_code): # Define regex patterns to match different types of hard-coded references from_hard_coded_references = { "from_var_1": r"""(?ix) - - # first matching group # from or join followed by at least 1 whitespace character - (from | join)\s + - - # second matching group - # opening {{, 0 or more whitespace character(s), var, 0 or more whitespace character(s), an opening parenthesis, 0 or more whitespace character(s), 1 or 0 quotation mark - ({{\s * var\s * \(\s *[\'\"]?) + (from | join)\s+ - # third matching group - # at least 1 of anything except a parenthesis or quotation mark - ([^)\'\"]+) + # second matching group + # opening {{, 0 or more whitespace character(s), var, 0 or more whitespace character(s) + # an opening parenthesis, 0 or more whitespace character(s), 1 or 0 quotation mark + ({{\s*var\s*\(\s*[\'\"]?) - # fourth matching group - # 1 or 0 quotation mark, 0 or more whitespace character(s) - ([\'\"]?\s*) + # third matching group + # at least 1 of anything except a parenthesis or quotation mark + ([^)\'\"]+) - # fifth matching group - # a closing parenthesis, 0 or more whitespace character(s), closing }} - (\)\s *}}) + # fourth matching group + # 1 or 0 quotation mark, 0 or more whitespace character(s) + ([\'\"]?\s*) + # fifth matching group + # a closing parenthesis, 0 or more whitespace character(s), closing }} + (\)\s*}}) """, "from_var_2": r"""(?ix) + # first matching group + # from or join followed by at least 1 whitespace character + (from|join)\s+ - # first matching group - # from or join followed by at least 1 whitespace character - ( - from | join)\s + - - # second matching group - # opening {{, 0 or more whitespace character(s), var, 0 or more whitespace character(s), an opening parenthesis, 0 or more whitespace character(s), 1 or 0 quotation mark - ({{\s * var\s * \(\s *[\'\"]?) - - # third matching group - # at least 1 of anything except a parenthesis or quotation mark - ([^)\'\"]+) - - # fourth matching group - # 1 or 0 quotation mark, 0 or more whitespace character(s) - ([\'\"]?\s*) - - # fifth matching group - # a comma - (,) - - # sixth matching group - # 0 or more whitespace character(s), 1 or 0 quotation mark - (\s *[\'\"]?) - - # seventh matching group - # at least 1 of anything except a parenthesis or quotation mark - ([^)\'\"]+) + # second matching group + # opening {{, 0 or more whitespace character(s), var, 0 or more whitespace character(s) + # an opening parenthesis, 0 or more whitespace character(s), 1 or 0 quotation mark + ({{\s*var\s*\(\s*[\'\"]?) - # eighth matching group - # 1 or 0 quotation mark, 0 or more whitespace character(s) - ([\'\"]?\s*) + # third matching group + # at least 1 of anything except a parenthesis or quotation mark + ([^)\'\"]+) - # ninth matching group - # a closing parenthesis, 0 or more whitespace character(s), closing }} - (\)\s *}}) + # fourth matching group + # 1 or 0 quotation mark, 0 or more whitespace character(s) + ([\'\"]?\s*) + # fifth matching group + # a closing parenthesis, 0 or more whitespace character(s), closing }} + (\)\s*}}) """, "from_table_1": r"""(?ix) diff --git a/src/datapilot/utils/utils.py b/src/datapilot/utils/utils.py index feb0cb7e..16ea3978 100644 --- a/src/datapilot/utils/utils.py +++ b/src/datapilot/utils/utils.py @@ -273,9 +273,21 @@ def generate_partial_manifest_catalog(changed_files, base_path: str = "./"): nodes_str = ",\n".join(json.dumps(data) for data in nodes_data + sources_data) query = ( - "{% set result = {} %}{% set nodes = [" - + nodes_str - + '] %}{% for n in nodes %}{% if n["resource_type"] == "source" %}{% set columns = adapter.get_columns_in_relation(source(n["name"], n["table"])) %}{% else %}{% set columns = adapter.get_columns_in_relation(ref(n["name"])) %}{% endif %}{% set new_columns = [] %}{% for column in columns %}{% do new_columns.append({"column": column.name, "dtype": column.dtype}) %}{% endfor %}{% do result.update({n["unique_id"]:new_columns}) %}{% endfor %}{{ tojson(result) }}' + "{% set result = {} %}" + "{% set nodes = [" + nodes_str + '] %}' + "{% for n in nodes %}" + "{% if n['resource_type'] == 'source' %}" + "{% set columns = adapter.get_columns_in_relation(source(n['name'], n['table'])) %}" + "{% else %}" + "{% set columns = adapter.get_columns_in_relation(ref(n['name'])) %}" + "{% endif %}" + "{% set new_columns = [] %}" + "{% for column in columns %}" + "{% do new_columns.append({'column': column.name, 'dtype': column.dtype}) %}" + "{% endfor %}" + "{% do result.update({n['unique_id']:new_columns}) %}" + "{% endfor %}" + "{{ tojson(result) }}" ) dbt_compile_output = run_macro(query, base_path) diff --git a/tests/core/platform/dbt/test_utils.py b/tests/core/platform/dbt/test_utils.py index 1625b82b..41e82a2b 100644 --- a/tests/core/platform/dbt/test_utils.py +++ b/tests/core/platform/dbt/test_utils.py @@ -2,16 +2,20 @@ import pytest -from datapilot.core.platforms.dbt.constants import BASE -from datapilot.core.platforms.dbt.constants import INTERMEDIATE -from datapilot.core.platforms.dbt.constants import MART -from datapilot.core.platforms.dbt.constants import OTHER -from datapilot.core.platforms.dbt.constants import STAGING -from datapilot.core.platforms.dbt.utils import MODEL_TYPE_PATTERNS -from datapilot.core.platforms.dbt.utils import _check_model_naming_convention -from datapilot.core.platforms.dbt.utils import classify_model_type_by_folder -from datapilot.core.platforms.dbt.utils import classify_model_type_by_name -from datapilot.core.platforms.dbt.utils import get_hard_coded_references +from datapilot.core.platforms.dbt.constants import ( + BASE, + INTERMEDIATE, + MART, + OTHER, + STAGING, +) +from datapilot.core.platforms.dbt.utils import ( + MODEL_TYPE_PATTERNS, + _check_model_naming_convention, + classify_model_type_by_folder, + classify_model_type_by_name, + get_hard_coded_references, +) @pytest.mark.parametrize( diff --git a/tests/utils/test_utils.py b/tests/utils/test_utils.py index 5eea78a3..13be3f46 100644 --- a/tests/utils/test_utils.py +++ b/tests/utils/test_utils.py @@ -2,10 +2,14 @@ import pytest -from datapilot.core.platforms.dbt.utils import get_manifest_wrapper -from datapilot.core.platforms.dbt.utils import get_models -from datapilot.utils.utils import extract_folders_in_path -from datapilot.utils.utils import is_superset_path +from datapilot.core.platforms.dbt.utils import ( + get_manifest_wrapper, + get_models, +) +from datapilot.utils.utils import ( + extract_folders_in_path, + is_superset_path, +) test_cases = [ (Path("/home/user/documents/file.txt"), ["home", "user", "documents"]),