From e89d81f249dc2a3b2bd891c6e65e16e651405e05 Mon Sep 17 00:00:00 2001 From: Jeremy Goodsitt Date: Mon, 9 Feb 2026 23:33:29 -0600 Subject: [PATCH 1/4] refactor: move from deprecated pkg_resources --- dataprofiler/labelers/base_data_labeler.py | 20 +++++++++++++------ dataprofiler/labelers/data_labelers.py | 7 +++++-- dataprofiler/labelers/data_processing.py | 8 ++++++-- .../tests/labelers/test_char_tf_load_model.py | 11 ++++++---- .../test_character_level_cnn_model.py | 5 +++-- .../tests/labelers/test_column_name_model.py | 6 ++++-- .../tests/labelers/test_data_labelers.py | 8 +++++--- .../tests/labelers/test_data_processing.py | 6 ++++-- ...st_integration_column_name_data_labeler.py | 5 +++-- .../test_integration_regex_data_labeler.py | 5 +++-- .../tests/labelers/test_regex_model.py | 5 +++-- 11 files changed, 57 insertions(+), 29 deletions(-) diff --git a/dataprofiler/labelers/base_data_labeler.py b/dataprofiler/labelers/base_data_labeler.py index 201f78998..c80754560 100644 --- a/dataprofiler/labelers/base_data_labeler.py +++ b/dataprofiler/labelers/base_data_labeler.py @@ -1,15 +1,17 @@ """Contains abstract classes from which labeler classes will inherit.""" + from __future__ import annotations +import importlib.resources import json import os import sys import warnings +from pathlib import Path from typing import cast import numpy as np import pandas as pd -import pkg_resources from dataprofiler._typing import DataArray @@ -17,7 +19,8 @@ from . import data_processing from .base_model import BaseModel -default_labeler_dir = pkg_resources.resource_filename("resources", "labelers") +with importlib.resources.as_file(importlib.resources.files("resources")) as base: + default_labeler_dir = Path(base) / "labelers" class BaseDataLabeler: @@ -246,7 +249,8 @@ def set_params(self, params: dict) -> None: self._postprocessor.set_params(**params["postprocessor"]) self.check_pipeline( - skip_postprocessor=self._postprocessor is None, error_on_mismatch=False + skip_postprocessor=self._postprocessor is None, + error_on_mismatch=False, ) def add_label(self, label: str, same_as: str = None) -> None: @@ -438,7 +442,9 @@ def get_parameter_overlap_mismatches( messages.append( "Preprocessor and postprocessor value for `{}` do not " "match. {} != {}".format( - param, preprocessor_params[param], postprocessor_params[param] + param, + preprocessor_params[param], + postprocessor_params[param], ) ) if messages: @@ -490,7 +496,8 @@ def _load_parameters(dirpath: str, load_options: dict = None) -> dict[str, dict] "The load_options preprocessor class does not " "match the required DataLabeler preprocessor." "\n {} != {}".format( - processor_class.__class__.__name__, param_processor_class + processor_class.__class__.__name__, + param_processor_class, ) ) params["preprocessor"]["class"] = load_options.get("preprocessor_class") @@ -505,7 +512,8 @@ def _load_parameters(dirpath: str, load_options: dict = None) -> dict[str, dict] raise ValueError( "The load_options postprocessor class does not match " "the required DataLabeler postprocessor.\n {} != {}".format( - processor_class.__class__.__name__, param_processor_class + processor_class.__class__.__name__, + param_processor_class, ) ) params["postprocessor"]["class"] = load_options.get("postprocessor_class") diff --git a/dataprofiler/labelers/data_labelers.py b/dataprofiler/labelers/data_labelers.py index a6d9932b7..2cbe2dccc 100644 --- a/dataprofiler/labelers/data_labelers.py +++ b/dataprofiler/labelers/data_labelers.py @@ -1,17 +1,20 @@ """Module to train and choose between structured and unstructured data labelers.""" + from __future__ import annotations +import importlib.resources import os +from pathlib import Path import pandas as pd -import pkg_resources from .. import data_readers from .base_data_labeler import BaseDataLabeler, TrainableDataLabeler from .base_model import BaseModel from .data_processing import BaseDataPostprocessor, BaseDataPreprocessor -default_labeler_dir = pkg_resources.resource_filename("resources", "labelers") +with importlib.resources.as_file(importlib.resources.files("resources")) as base: + default_labeler_dir = Path(base) / "labelers" def train_structured_labeler( diff --git a/dataprofiler/labelers/data_processing.py b/dataprofiler/labelers/data_processing.py index d53980a35..6fa439380 100644 --- a/dataprofiler/labelers/data_processing.py +++ b/dataprofiler/labelers/data_processing.py @@ -1,8 +1,10 @@ """Contains pre-built processors for data labeling/processing.""" + from __future__ import annotations import abc import copy +import importlib import inspect import json import math @@ -11,13 +13,15 @@ import types import warnings from collections import Counter +from pathlib import Path from typing import Any, Generator, Iterable, TypeVar, cast import numpy as np import numpy.typing as npt -import pkg_resources -default_labeler_dir = pkg_resources.resource_filename("resources", "labelers") +with importlib.resources.as_file(importlib.resources.files("resources")) as base: + default_labeler_dir = Path(base) / "labelers" + Processor = TypeVar("Processor", bound="BaseDataProcessor") diff --git a/dataprofiler/tests/labelers/test_char_tf_load_model.py b/dataprofiler/tests/labelers/test_char_tf_load_model.py index c6d70f740..35ad1311f 100644 --- a/dataprofiler/tests/labelers/test_char_tf_load_model.py +++ b/dataprofiler/tests/labelers/test_char_tf_load_model.py @@ -1,19 +1,20 @@ +import importlib.resources import json import os import unittest from io import StringIO +from pathlib import Path from unittest import mock import numpy as np import pandas as pd -import pkg_resources import tensorflow as tf from dataprofiler.labelers.char_load_tf_model import CharLoadTFModel _file_dir = os.path.dirname(os.path.abspath(__file__)) -_resource_labeler_dir = pkg_resources.resource_filename("resources", "labelers") - +with importlib.resources.as_file(importlib.resources.files("resources")) as base: + _resource_labeler_dir = Path(base) / "labelers" mock_model_parameters = { "model_path": "project/example/path/fake_model.h5", @@ -303,7 +304,9 @@ def test_param_validation(self, *mocks): "fake_extra_param": "fails", } model = CharLoadTFModel( - self.model_path, label_mapping=self.label_mapping, parameters=parameters + self.model_path, + label_mapping=self.label_mapping, + parameters=parameters, ) model._construct_model() self.assertDictEqual(parameters, model._parameters) diff --git a/dataprofiler/tests/labelers/test_character_level_cnn_model.py b/dataprofiler/tests/labelers/test_character_level_cnn_model.py index e120a9754..ee99809d4 100644 --- a/dataprofiler/tests/labelers/test_character_level_cnn_model.py +++ b/dataprofiler/tests/labelers/test_character_level_cnn_model.py @@ -1,12 +1,13 @@ +import importlib import json import os import unittest from io import StringIO +from pathlib import Path from unittest import mock import numpy as np import pandas as pd -import pkg_resources import tensorflow as tf from dataprofiler.labelers.character_level_cnn_model import ( @@ -15,7 +16,7 @@ ) _file_dir = os.path.dirname(os.path.abspath(__file__)) -_resource_labeler_dir = pkg_resources.resource_filename("resources", "labelers") +_resource_labeler_dir = Path(importlib.resources.files("resources")) / "labelers" mock_model_parameters = { diff --git a/dataprofiler/tests/labelers/test_column_name_model.py b/dataprofiler/tests/labelers/test_column_name_model.py index 58f90839e..ca6be0d6c 100644 --- a/dataprofiler/tests/labelers/test_column_name_model.py +++ b/dataprofiler/tests/labelers/test_column_name_model.py @@ -1,18 +1,20 @@ +import importlib import json import os import sys import unittest from io import StringIO +from pathlib import Path from unittest import mock import numpy as np -import pkg_resources import dataprofiler as dp from dataprofiler.labelers.column_name_model import ColumnNameModel _file_dir = os.path.dirname(os.path.abspath(__file__)) -_resource_labeler_dir = pkg_resources.resource_filename("resources", "labelers") +_resource_labeler_dir = Path(importlib.resources.files("resources")) / "labelers" + mock_model_parameters = { "true_positive_dict": [ diff --git a/dataprofiler/tests/labelers/test_data_labelers.py b/dataprofiler/tests/labelers/test_data_labelers.py index bbde1c506..1ac6f277a 100644 --- a/dataprofiler/tests/labelers/test_data_labelers.py +++ b/dataprofiler/tests/labelers/test_data_labelers.py @@ -149,11 +149,13 @@ def test_load_from_library(self, *mocks): @mock.patch("tensorflow.keras.models.load_model") def test_load_from_disk(self, *mocks): - import pkg_resources + import importlib + from pathlib import Path - default_labeler_dir = pkg_resources.resource_filename( - "resources", "labelers/structured_model" + default_labeler_dir = ( + Path(importlib.resources.files("resources")) / "labelers/structured_model" ) + data_labeler = dp.DataLabeler.load_from_disk(default_labeler_dir) self.assertIsInstance(data_labeler, BaseDataLabeler) diff --git a/dataprofiler/tests/labelers/test_data_processing.py b/dataprofiler/tests/labelers/test_data_processing.py index 00b4b088b..5ee50996c 100644 --- a/dataprofiler/tests/labelers/test_data_processing.py +++ b/dataprofiler/tests/labelers/test_data_processing.py @@ -1,13 +1,14 @@ +import importlib import json import os import random import re import unittest from io import StringIO +from pathlib import Path from unittest import mock import numpy as np -import pkg_resources from dataprofiler.labelers.data_processing import ( BaseDataProcessor, @@ -224,7 +225,8 @@ def test_load_from_library(self, mocked_load, *mocks): BaseDataProcessor.load_from_library("default") # assert called with proper load_processor dirpath - default_labeler_dir = pkg_resources.resource_filename("resources", "labelers") + default_labeler_dir = Path(importlib.resources.files("resources")) / "labelers" + mocked_load.assert_called_with(os.path.join(default_labeler_dir, "default")) @mock.patch("builtins.open") diff --git a/dataprofiler/tests/labelers/test_integration_column_name_data_labeler.py b/dataprofiler/tests/labelers/test_integration_column_name_data_labeler.py index bcc136ae3..f20dfd99d 100644 --- a/dataprofiler/tests/labelers/test_integration_column_name_data_labeler.py +++ b/dataprofiler/tests/labelers/test_integration_column_name_data_labeler.py @@ -1,7 +1,8 @@ +import importlib import unittest +from pathlib import Path import numpy as np -import pkg_resources import dataprofiler as dp from dataprofiler.labelers.column_name_model import ColumnNameModel @@ -11,7 +12,7 @@ DirectPassPreprocessor, ) -default_labeler_dir = pkg_resources.resource_filename("resources", "labelers") +default_labeler_dir = Path(importlib.resources.files("resources")) / "labelers" class TestColumnNameDataLabeler(unittest.TestCase): diff --git a/dataprofiler/tests/labelers/test_integration_regex_data_labeler.py b/dataprofiler/tests/labelers/test_integration_regex_data_labeler.py index 1cb753723..0c3c7bb70 100644 --- a/dataprofiler/tests/labelers/test_integration_regex_data_labeler.py +++ b/dataprofiler/tests/labelers/test_integration_regex_data_labeler.py @@ -1,12 +1,13 @@ +import importlib import os import unittest +from pathlib import Path import numpy as np -import pkg_resources from dataprofiler.labelers.data_labelers import BaseDataLabeler -default_labeler_dir = pkg_resources.resource_filename("resources", "labelers") +default_labeler_dir = Path(importlib.resources.files("resources")) / "labelers" class TestRegexDataLabeler(unittest.TestCase): diff --git a/dataprofiler/tests/labelers/test_regex_model.py b/dataprofiler/tests/labelers/test_regex_model.py index 6a279307e..4534572a9 100644 --- a/dataprofiler/tests/labelers/test_regex_model.py +++ b/dataprofiler/tests/labelers/test_regex_model.py @@ -1,16 +1,17 @@ +import importlib import json import os import unittest from io import StringIO +from pathlib import Path from unittest import mock import numpy as np -import pkg_resources from dataprofiler.labelers.regex_model import RegexModel _file_dir = os.path.dirname(os.path.abspath(__file__)) -_resource_labeler_dir = pkg_resources.resource_filename("resources", "labelers") +_resource_labeler_dir = Path(importlib.resources.files("resources")) / "labelers" mock_model_parameters = { From e11fe175cf31c27d0489a3f5a1bcbfe89ee58d7f Mon Sep 17 00:00:00 2001 From: Jeremy Goodsitt Date: Mon, 9 Feb 2026 23:40:12 -0600 Subject: [PATCH 2/4] fix: base str traversal --- .../tests/labelers/test_character_level_cnn_model.py | 3 ++- dataprofiler/tests/labelers/test_column_name_model.py | 3 ++- dataprofiler/tests/labelers/test_data_labelers.py | 7 ++++--- dataprofiler/tests/labelers/test_data_processing.py | 5 ++++- .../labelers/test_integration_column_name_data_labeler.py | 3 ++- .../tests/labelers/test_integration_regex_data_labeler.py | 3 ++- dataprofiler/tests/labelers/test_regex_model.py | 3 ++- 7 files changed, 18 insertions(+), 9 deletions(-) diff --git a/dataprofiler/tests/labelers/test_character_level_cnn_model.py b/dataprofiler/tests/labelers/test_character_level_cnn_model.py index ee99809d4..7a0ce5465 100644 --- a/dataprofiler/tests/labelers/test_character_level_cnn_model.py +++ b/dataprofiler/tests/labelers/test_character_level_cnn_model.py @@ -16,7 +16,8 @@ ) _file_dir = os.path.dirname(os.path.abspath(__file__)) -_resource_labeler_dir = Path(importlib.resources.files("resources")) / "labelers" +with importlib.resources.as_file(importlib.resources.files("resources")) as base: + _resource_labeler_dir = Path(base) / "labelers" mock_model_parameters = { diff --git a/dataprofiler/tests/labelers/test_column_name_model.py b/dataprofiler/tests/labelers/test_column_name_model.py index ca6be0d6c..7b3e81422 100644 --- a/dataprofiler/tests/labelers/test_column_name_model.py +++ b/dataprofiler/tests/labelers/test_column_name_model.py @@ -13,7 +13,8 @@ from dataprofiler.labelers.column_name_model import ColumnNameModel _file_dir = os.path.dirname(os.path.abspath(__file__)) -_resource_labeler_dir = Path(importlib.resources.files("resources")) / "labelers" +with importlib.resources.as_file(importlib.resources.files("resources")) as base: + _resource_labeler_dir = Path(base) / "labelers" mock_model_parameters = { diff --git a/dataprofiler/tests/labelers/test_data_labelers.py b/dataprofiler/tests/labelers/test_data_labelers.py index 1ac6f277a..bcc833434 100644 --- a/dataprofiler/tests/labelers/test_data_labelers.py +++ b/dataprofiler/tests/labelers/test_data_labelers.py @@ -152,9 +152,10 @@ def test_load_from_disk(self, *mocks): import importlib from pathlib import Path - default_labeler_dir = ( - Path(importlib.resources.files("resources")) / "labelers/structured_model" - ) + with importlib.resources.as_file( + importlib.resources.files("resources") + ) as base: + default_labeler_dir = Path(base) / "labelers/structured_model" data_labeler = dp.DataLabeler.load_from_disk(default_labeler_dir) self.assertIsInstance(data_labeler, BaseDataLabeler) diff --git a/dataprofiler/tests/labelers/test_data_processing.py b/dataprofiler/tests/labelers/test_data_processing.py index 5ee50996c..562c67dd9 100644 --- a/dataprofiler/tests/labelers/test_data_processing.py +++ b/dataprofiler/tests/labelers/test_data_processing.py @@ -225,7 +225,10 @@ def test_load_from_library(self, mocked_load, *mocks): BaseDataProcessor.load_from_library("default") # assert called with proper load_processor dirpath - default_labeler_dir = Path(importlib.resources.files("resources")) / "labelers" + with importlib.resources.as_file( + importlib.resources.files("resources") + ) as base: + default_labeler_dir = Path(base) / "labelers" mocked_load.assert_called_with(os.path.join(default_labeler_dir, "default")) diff --git a/dataprofiler/tests/labelers/test_integration_column_name_data_labeler.py b/dataprofiler/tests/labelers/test_integration_column_name_data_labeler.py index f20dfd99d..df774fc50 100644 --- a/dataprofiler/tests/labelers/test_integration_column_name_data_labeler.py +++ b/dataprofiler/tests/labelers/test_integration_column_name_data_labeler.py @@ -12,7 +12,8 @@ DirectPassPreprocessor, ) -default_labeler_dir = Path(importlib.resources.files("resources")) / "labelers" +with importlib.resources.as_file(importlib.resources.files("resources")) as base: + default_labeler_dir = Path(base) / "labelers" class TestColumnNameDataLabeler(unittest.TestCase): diff --git a/dataprofiler/tests/labelers/test_integration_regex_data_labeler.py b/dataprofiler/tests/labelers/test_integration_regex_data_labeler.py index 0c3c7bb70..c817a8a7f 100644 --- a/dataprofiler/tests/labelers/test_integration_regex_data_labeler.py +++ b/dataprofiler/tests/labelers/test_integration_regex_data_labeler.py @@ -7,7 +7,8 @@ from dataprofiler.labelers.data_labelers import BaseDataLabeler -default_labeler_dir = Path(importlib.resources.files("resources")) / "labelers" +with importlib.resources.as_file(importlib.resources.files("resources")) as base: + default_labeler_dir = Path(base) / "labelers" class TestRegexDataLabeler(unittest.TestCase): diff --git a/dataprofiler/tests/labelers/test_regex_model.py b/dataprofiler/tests/labelers/test_regex_model.py index 4534572a9..507483710 100644 --- a/dataprofiler/tests/labelers/test_regex_model.py +++ b/dataprofiler/tests/labelers/test_regex_model.py @@ -11,7 +11,8 @@ from dataprofiler.labelers.regex_model import RegexModel _file_dir = os.path.dirname(os.path.abspath(__file__)) -_resource_labeler_dir = Path(importlib.resources.files("resources")) / "labelers" +with importlib.resources.as_file(importlib.resources.files("resources")) as base: + _resource_labeler_dir = Path(base) / "labelers" mock_model_parameters = { From 83ccf099b99c23274aedafdf030f7f30915185cf Mon Sep 17 00:00:00 2001 From: Jeremy Goodsitt Date: Tue, 10 Feb 2026 00:45:55 -0600 Subject: [PATCH 3/4] fix: to use func --- dataprofiler/labelers/base_data_labeler.py | 7 ++---- dataprofiler/labelers/data_labelers.py | 6 ++--- dataprofiler/labelers/utils.py | 23 +++++++++++++++++++ .../tests/labelers/test_char_tf_load_model.py | 6 ++--- .../test_character_level_cnn_model.py | 6 ++--- .../tests/labelers/test_column_name_model.py | 6 ++--- .../tests/labelers/test_data_labelers.py | 8 ++----- .../tests/labelers/test_data_processing.py | 12 +++++----- ...st_integration_column_name_data_labeler.py | 6 ++--- .../test_integration_regex_data_labeler.py | 15 ++++++++---- .../tests/labelers/test_regex_model.py | 9 ++++---- 11 files changed, 57 insertions(+), 47 deletions(-) diff --git a/dataprofiler/labelers/base_data_labeler.py b/dataprofiler/labelers/base_data_labeler.py index c80754560..871c050ef 100644 --- a/dataprofiler/labelers/base_data_labeler.py +++ b/dataprofiler/labelers/base_data_labeler.py @@ -2,12 +2,10 @@ from __future__ import annotations -import importlib.resources import json import os import sys import warnings -from pathlib import Path from typing import cast import numpy as np @@ -16,11 +14,10 @@ from dataprofiler._typing import DataArray from .. import data_readers -from . import data_processing +from . import data_processing, utils from .base_model import BaseModel -with importlib.resources.as_file(importlib.resources.files("resources")) as base: - default_labeler_dir = Path(base) / "labelers" +default_labeler_dir = utils.find_resources_dir() / "labelers" class BaseDataLabeler: diff --git a/dataprofiler/labelers/data_labelers.py b/dataprofiler/labelers/data_labelers.py index 2cbe2dccc..5d69fb1a6 100644 --- a/dataprofiler/labelers/data_labelers.py +++ b/dataprofiler/labelers/data_labelers.py @@ -2,19 +2,17 @@ from __future__ import annotations -import importlib.resources import os -from pathlib import Path import pandas as pd from .. import data_readers +from . import utils from .base_data_labeler import BaseDataLabeler, TrainableDataLabeler from .base_model import BaseModel from .data_processing import BaseDataPostprocessor, BaseDataPreprocessor -with importlib.resources.as_file(importlib.resources.files("resources")) as base: - default_labeler_dir = Path(base) / "labelers" +default_labeler_dir = utils.find_resources_dir() / "labelers" def train_structured_labeler( diff --git a/dataprofiler/labelers/utils.py b/dataprofiler/labelers/utils.py index 2d587f7b4..ad4e7fcc8 100644 --- a/dataprofiler/labelers/utils.py +++ b/dataprofiler/labelers/utils.py @@ -1,6 +1,9 @@ """Contains functions for checking for installations/dependencies.""" + import sys +import sysconfig import warnings +from pathlib import Path from typing import Any, Callable, List @@ -50,3 +53,23 @@ def new_f(*args: Any, **kwds: Any) -> Any: return new_f return check_module + + +def find_resources_dir() -> Path: + """Return the path to the package resources for the labeler.""" + # 1) Installed location from data_files: /resources + prefix = Path(sysconfig.get_path("data")) + installed = prefix / "resources" + if installed.exists(): + return installed + + # 2) Source tree fallback (works in editable installs / tests) + # Adjust the anchor file to something inside your package. + here = Path(__file__).resolve() + # Walk upwards to find repo root that contains "resources/labelers" + for parent in [here, *here.parents]: + candidate = parent / "resources" + if candidate.exists(): + return candidate + + raise FileNotFoundError("Could not locate resources (installed or source tree).") diff --git a/dataprofiler/tests/labelers/test_char_tf_load_model.py b/dataprofiler/tests/labelers/test_char_tf_load_model.py index 35ad1311f..61173e571 100644 --- a/dataprofiler/tests/labelers/test_char_tf_load_model.py +++ b/dataprofiler/tests/labelers/test_char_tf_load_model.py @@ -1,20 +1,18 @@ -import importlib.resources import json import os import unittest from io import StringIO -from pathlib import Path from unittest import mock import numpy as np import pandas as pd import tensorflow as tf +from dataprofiler.labelers import utils as labeler_utils from dataprofiler.labelers.char_load_tf_model import CharLoadTFModel _file_dir = os.path.dirname(os.path.abspath(__file__)) -with importlib.resources.as_file(importlib.resources.files("resources")) as base: - _resource_labeler_dir = Path(base) / "labelers" +default_labeler_dir = labeler_utils.find_resources_dir() / "labelers" mock_model_parameters = { "model_path": "project/example/path/fake_model.h5", diff --git a/dataprofiler/tests/labelers/test_character_level_cnn_model.py b/dataprofiler/tests/labelers/test_character_level_cnn_model.py index 7a0ce5465..530dda2ac 100644 --- a/dataprofiler/tests/labelers/test_character_level_cnn_model.py +++ b/dataprofiler/tests/labelers/test_character_level_cnn_model.py @@ -1,23 +1,21 @@ -import importlib import json import os import unittest from io import StringIO -from pathlib import Path from unittest import mock import numpy as np import pandas as pd import tensorflow as tf +from dataprofiler.labelers import utils as labeler_utils from dataprofiler.labelers.character_level_cnn_model import ( CharacterLevelCnnModel, EncodingLayer, ) _file_dir = os.path.dirname(os.path.abspath(__file__)) -with importlib.resources.as_file(importlib.resources.files("resources")) as base: - _resource_labeler_dir = Path(base) / "labelers" +_resource_labeler_dir = labeler_utils.find_resources_dir() / "labelers" mock_model_parameters = { diff --git a/dataprofiler/tests/labelers/test_column_name_model.py b/dataprofiler/tests/labelers/test_column_name_model.py index 7b3e81422..e3326a394 100644 --- a/dataprofiler/tests/labelers/test_column_name_model.py +++ b/dataprofiler/tests/labelers/test_column_name_model.py @@ -1,20 +1,18 @@ -import importlib import json import os import sys import unittest from io import StringIO -from pathlib import Path from unittest import mock import numpy as np import dataprofiler as dp +from dataprofiler.labelers import utils as labeler_utils from dataprofiler.labelers.column_name_model import ColumnNameModel _file_dir = os.path.dirname(os.path.abspath(__file__)) -with importlib.resources.as_file(importlib.resources.files("resources")) as base: - _resource_labeler_dir = Path(base) / "labelers" +_resource_labeler_dir = labeler_utils.find_resources_dir() / "labelers" mock_model_parameters = { diff --git a/dataprofiler/tests/labelers/test_data_labelers.py b/dataprofiler/tests/labelers/test_data_labelers.py index bcc833434..c52187bef 100644 --- a/dataprofiler/tests/labelers/test_data_labelers.py +++ b/dataprofiler/tests/labelers/test_data_labelers.py @@ -149,13 +149,9 @@ def test_load_from_library(self, *mocks): @mock.patch("tensorflow.keras.models.load_model") def test_load_from_disk(self, *mocks): - import importlib - from pathlib import Path + from dataprofiler.labelers import utils as labeler_utils - with importlib.resources.as_file( - importlib.resources.files("resources") - ) as base: - default_labeler_dir = Path(base) / "labelers/structured_model" + default_labeler_dir = labeler_utils.find_resources_dir() / "labelers" data_labeler = dp.DataLabeler.load_from_disk(default_labeler_dir) self.assertIsInstance(data_labeler, BaseDataLabeler) diff --git a/dataprofiler/tests/labelers/test_data_processing.py b/dataprofiler/tests/labelers/test_data_processing.py index 562c67dd9..9d386d8d9 100644 --- a/dataprofiler/tests/labelers/test_data_processing.py +++ b/dataprofiler/tests/labelers/test_data_processing.py @@ -1,15 +1,17 @@ -import importlib +pass import json import os import random import re import unittest from io import StringIO -from pathlib import Path + +pass from unittest import mock import numpy as np +from dataprofiler.labelers import utils as labeler_utils from dataprofiler.labelers.data_processing import ( BaseDataProcessor, CharEncodedPreprocessor, @@ -225,10 +227,8 @@ def test_load_from_library(self, mocked_load, *mocks): BaseDataProcessor.load_from_library("default") # assert called with proper load_processor dirpath - with importlib.resources.as_file( - importlib.resources.files("resources") - ) as base: - default_labeler_dir = Path(base) / "labelers" + + default_labeler_dir = labeler_utils.find_resources_dir() / "labelers" mocked_load.assert_called_with(os.path.join(default_labeler_dir, "default")) diff --git a/dataprofiler/tests/labelers/test_integration_column_name_data_labeler.py b/dataprofiler/tests/labelers/test_integration_column_name_data_labeler.py index df774fc50..5d2307458 100644 --- a/dataprofiler/tests/labelers/test_integration_column_name_data_labeler.py +++ b/dataprofiler/tests/labelers/test_integration_column_name_data_labeler.py @@ -1,10 +1,9 @@ -import importlib import unittest -from pathlib import Path import numpy as np import dataprofiler as dp +from dataprofiler.labelers import utils as labeler_utils from dataprofiler.labelers.column_name_model import ColumnNameModel from dataprofiler.labelers.data_labelers import BaseDataLabeler from dataprofiler.labelers.data_processing import ( @@ -12,8 +11,7 @@ DirectPassPreprocessor, ) -with importlib.resources.as_file(importlib.resources.files("resources")) as base: - default_labeler_dir = Path(base) / "labelers" +default_labeler_dir = labeler_utils.find_resources_dir() / "labelers" class TestColumnNameDataLabeler(unittest.TestCase): diff --git a/dataprofiler/tests/labelers/test_integration_regex_data_labeler.py b/dataprofiler/tests/labelers/test_integration_regex_data_labeler.py index c817a8a7f..7c729ccaf 100644 --- a/dataprofiler/tests/labelers/test_integration_regex_data_labeler.py +++ b/dataprofiler/tests/labelers/test_integration_regex_data_labeler.py @@ -1,21 +1,26 @@ -import importlib import os import unittest -from pathlib import Path import numpy as np +from dataprofiler.labelers import utils as labeler_utils from dataprofiler.labelers.data_labelers import BaseDataLabeler -with importlib.resources.as_file(importlib.resources.files("resources")) as base: - default_labeler_dir = Path(base) / "labelers" +default_labeler_dir = labeler_utils.find_resources_dir() / "labelers" class TestRegexDataLabeler(unittest.TestCase): @classmethod def setUpClass(cls) -> None: cls.data = np.array( - ["123 Fake St.", "1/2/2020", "nice.", "4/3/22", "abc", "333-44-2341"] + [ + "123 Fake St.", + "1/2/2020", + "nice.", + "4/3/22", + "abc", + "333-44-2341", + ] ).reshape((-1,)) cls.data_labeler = BaseDataLabeler.load_from_disk( os.path.join(default_labeler_dir, "regex_model") diff --git a/dataprofiler/tests/labelers/test_regex_model.py b/dataprofiler/tests/labelers/test_regex_model.py index 507483710..91a2dfff3 100644 --- a/dataprofiler/tests/labelers/test_regex_model.py +++ b/dataprofiler/tests/labelers/test_regex_model.py @@ -1,18 +1,16 @@ -import importlib import json import os import unittest from io import StringIO -from pathlib import Path from unittest import mock import numpy as np +from dataprofiler.labelers import utils as labeler_utils from dataprofiler.labelers.regex_model import RegexModel _file_dir = os.path.dirname(os.path.abspath(__file__)) -with importlib.resources.as_file(importlib.resources.files("resources")) as base: - _resource_labeler_dir = Path(base) / "labelers" +_resource_labeler_dir = labeler_utils.find_resources_dir() / "labelers" mock_model_parameters = { @@ -163,7 +161,8 @@ def test_param_validation(self): for invalid_param_set in invalid_parameters: with self.assertRaises(ValueError): RegexModel( - label_mapping=self.label_mapping, parameters=invalid_param_set + label_mapping=self.label_mapping, + parameters=invalid_param_set, ) @mock.patch("sys.stdout", new_callable=StringIO) From 8a0af0bda6d1bf486cf34c7de8079eb04642003b Mon Sep 17 00:00:00 2001 From: Jeremy Goodsitt Date: Tue, 10 Feb 2026 00:53:29 -0600 Subject: [PATCH 4/4] fix: add missing change --- dataprofiler/labelers/data_processing.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/dataprofiler/labelers/data_processing.py b/dataprofiler/labelers/data_processing.py index 6fa439380..c4517a0e3 100644 --- a/dataprofiler/labelers/data_processing.py +++ b/dataprofiler/labelers/data_processing.py @@ -4,7 +4,6 @@ import abc import copy -import importlib import inspect import json import math @@ -13,14 +12,14 @@ import types import warnings from collections import Counter -from pathlib import Path from typing import Any, Generator, Iterable, TypeVar, cast import numpy as np import numpy.typing as npt -with importlib.resources.as_file(importlib.resources.files("resources")) as base: - default_labeler_dir = Path(base) / "labelers" +from . import utils + +default_labeler_dir = utils.find_resources_dir() / "labelers" Processor = TypeVar("Processor", bound="BaseDataProcessor")