capitalone · JGSweets · Aug 21, 2025 · Aug 21, 2025 · Aug 21, 2025 · Aug 22, 2025
@@ -21,7 +21,7 @@ repos:
   # Flake8: complexity and style checking
   # https://flake8.pycqa.org/en/latest/user/using-hooks.html
   - repo: https://github.com/pycqa/flake8
-    rev: 4.0.1
+    rev: 7.3.0
     hooks:
       - id: flake8
         additional_dependencies: [flake8-docstrings]
@@ -82,11 +82,10 @@ repos:
 
             # requirements-ml.txt
             scikit-learn>=0.23.2,
-            'keras>=2.4.3,<=3.4.0',
+            "keras<=3.4.0; python_version <='3.9'",
+            "keras>=3.11.0; python_version > '3.9'",
             rapidfuzz>=2.6.1,
-            "tensorflow>=2.6.4,<2.15.0; sys.platform != 'darwin'",
-            "tensorflow>=2.6.4,<2.15.0; sys_platform == 'darwin' and platform_machine != 'arm64'",
-            "tensorflow-macos>=2.6.4,<2.15.0; sys_platform == 'darwin' and platform_machine == 'arm64'",
+            tensorflow>=2.15.0,
             tqdm>=4.0.0,
 
             # requirements-reports.txt
@@ -101,7 +100,7 @@ repos:
             pytest-xdist>=2.1.0,
             pytest-forked>=1.3.0,
             toolz>=0.10.0,
-            'memray>=1.7.0,<1.12.0',
+            'memray>=1.18.0',
           ]
   # Check-manifest: ensures required non-Python files are included in MANIFEST.in
   # https://github.com/mgedmin/check-manifest/blob/master/.pre-commit-hooks.yaml

@@ -1,4 +1,5 @@
 """Contains abstract classes for labeling data."""
+
 from __future__ import annotations
 
 import abc
@@ -78,7 +79,7 @@ def __eq__(self, other: object) -> bool:
         :rtype: bool
         """
         if (
-            type(self) != type(other)
+            type(self) is not type(other)
             or not isinstance(other, BaseModel)
             or self._parameters != other._parameters
             or self._label_mapping != other._label_mapping

@@ -573,7 +573,7 @@ def _construct_model(self) -> None:
 
         # Compile the model
         softmax_output_layer_name = self._model.output_names[0]
-        losses = {softmax_output_layer_name: "categorical_crossentropy"}
+        losses = ["categorical_crossentropy", None, None]
 
         # use f1 score metric
         f1_score_training = labeler_utils.F1Score(
@@ -635,7 +635,7 @@ def _reconstruct_model(self) -> None:
 
         # Compile the model
         softmax_output_layer_name = self._model.output_names[0]
-        losses = {softmax_output_layer_name: "categorical_crossentropy"}
+        losses = ["categorical_crossentropy", None, None]
 
         # use f1 score metric
         f1_score_training = labeler_utils.F1Score(
@@ -699,13 +699,14 @@ def fit(
         f1_report: dict = {}
 
         self._model.reset_metrics()
-        softmax_output_layer_name = self._model.output_names[0]
+        # softmax_output_layer_name = self._model.output_names[0]
 
         start_time = time.time()
         batch_id = 0
         for x_train, y_train in train_data:
             model_results = self._model.train_on_batch(
-                x_train, {softmax_output_layer_name: y_train}
+                x_train,
+                y_train,
             )
             sys.stdout.flush()
             if verbose:

@@ -1,4 +1,5 @@
 """Contains pre-built processors for data labeling/processing."""
+
 from __future__ import annotations
 
 import abc
@@ -70,7 +71,7 @@ def __eq__(self, other: object) -> bool:
         :rtype: bool
         """
         if (
-            type(self) != type(other)
+            type(self) is not type(other)
             or not isinstance(other, BaseDataProcessor)
             or self._parameters != other._parameters
         ):
@@ -173,9 +174,11 @@ def process(
         labels: np.ndarray | None = None,
         label_mapping: dict[str, int] | None = None,
         batch_size: int = 32,
-    ) -> Generator[tuple[np.ndarray, np.ndarray] | np.ndarray, None, None] | tuple[
-        np.ndarray, np.ndarray
-    ] | np.ndarray:
+    ) -> (
+        Generator[tuple[np.ndarray, np.ndarray] | np.ndarray, None, None]
+        | tuple[np.ndarray, np.ndarray]
+        | np.ndarray
+    ):
         """Preprocess data."""
         raise NotImplementedError()
 
@@ -377,7 +380,16 @@ def _find_nearest_sentence_break_before_ind(
         sentence: str,
         start_ind: int,
         min_ind: int = 0,
-        separators: tuple[str, ...] = (" ", "\n", ",", "\t", "\r", "\x00", "\x01", ";"),
+        separators: tuple[str, ...] = (
+            " ",
+            "\n",
+            ",",
+            "\t",
+            "\r",
+            "\x00",
+            "\x01",
+            ";",
+        ),
     ) -> int:
         """
         Find nearest separator before the start_ind and return the index.
@@ -531,7 +543,8 @@ def gen_none() -> Generator[None, None, None]:
 
                     # pad the data until fits maximum length
                     pad_len = max(
-                        max_length - separate_ind + buffer_ind, max_length - sample_len
+                        max_length - separate_ind + buffer_ind,
+                        max_length - sample_len,
                     )
 
                     # Only add the buffer up until maximum length
@@ -891,7 +904,17 @@ def __init__(
         flatten_separator: str = " ",
         use_word_level_argmax: bool = False,
         output_format: str = "character_argmax",
-        separators: tuple[str, ...] = (" ", ",", ";", "'", '"', ":", "\n", "\t", "."),
+        separators: tuple[str, ...] = (
+            " ",
+            ",",
+            ";",
+            "'",
+            '"',
+            ":",
+            "\n",
+            "\t",
+            ".",
+        ),
         word_level_min_percent: float = 0.75,
     ) -> None:
         """
@@ -1185,7 +1208,11 @@ def convert_to_NER_format(
             if begin_idx != -1:
                 # Add last sample
                 sample_output.append(
-                    (begin_idx, curr_idx + 1, reverse_label_mapping[(int(curr_label))])
+                    (
+                        begin_idx,
+                        curr_idx + 1,
+                        reverse_label_mapping[(int(curr_label))],
+                    )
                 )
             # Add to total output list
             output_result.append(sample_output)
@@ -1194,7 +1221,10 @@ def convert_to_NER_format(
 
     @staticmethod
     def match_sentence_lengths(
-        data: np.ndarray, results: dict, flatten_separator: str, inplace: bool = True
+        data: np.ndarray,
+        results: dict,
+        flatten_separator: str,
+        inplace: bool = True,
     ) -> dict:
         """
         Convert results from model into same ragged data shapes as original data.
@@ -1516,7 +1546,10 @@ def process(
             np_unstruct_labels = None
 
         return super().process(
-            np.array(unstructured_data), np_unstruct_labels, label_mapping, batch_size
+            np.array(unstructured_data),
+            np_unstruct_labels,
+            label_mapping,
+            batch_size,
         )
 
 
@@ -1586,7 +1619,7 @@ def __eq__(self, other: object) -> bool:
         :rtype: bool
         """
         if (
-            type(self) != type(other)
+            type(self) is not type(other)
             or not isinstance(other, StructCharPostprocessor)
             or self._parameters["default_label"] != other._parameters["default_label"]
             or self._parameters["pad_label"] != other._parameters["pad_label"]
@@ -1662,7 +1695,10 @@ def help(cls) -> None:
 
     @staticmethod
     def match_sentence_lengths(
-        data: np.ndarray, results: dict, flatten_separator: str, inplace: bool = True
+        data: np.ndarray,
+        results: dict,
+        flatten_separator: str,
+        inplace: bool = True,
     ) -> dict:
         """
         Convert results from model into same ragged data shapes as original data.
@@ -1947,9 +1983,11 @@ def _validate_parameters(self, parameters: dict) -> None:
                 # being changed and is already set
                 aggregation_func = parameters.get(
                     "aggregation_func",
-                    self._parameters.get("aggregation_func")
-                    if hasattr(self, "_parameters")
-                    else None,
+                    (
+                        self._parameters.get("aggregation_func")
+                        if hasattr(self, "_parameters")
+                        else None
+                    ),
                 )
                 if value is None and aggregation_func == "priority":
                     errors.append(

@@ -1,4 +1,5 @@
 """Contains function for generating plugins data."""
+
 from collections import defaultdict
 from typing import Any, DefaultDict, Dict
 
@@ -19,9 +20,9 @@ def __inner_factory_function(fn):
         Actual population of plugin_dict.
 
         :param fn: Plugin function
-        :return: function
+        :return: functions
         """
-        global plugins_dict
+        # global plugins_dict
         plugins_dict[typ][name] = fn
         return fn
 

@@ -1,7 +1,7 @@
 check-manifest>=0.50
 black>=24.3.0
 isort==5.12.0
-pre-commit==2.19.0
+pre-commit==4.3.0
 tox==3.25.1
 tox-conda==0.10.2
 types-setuptools==67.7.0.1

@@ -1,7 +1,6 @@
 scikit-learn>=0.23.2
-keras<=3.4.0
+keras<=3.4.0; python_version <='3.9'
+keras>=3.11.0; python_version > '3.9'
 rapidfuzz>=2.6.1
-tensorflow>=2.16.0; sys.platform != 'darwin'
-tensorflow>=2.16.0; sys_platform == 'darwin' and platform_machine != 'arm64'
-tensorflow-macos>=2.16.0; sys_platform == 'darwin' and platform_machine == 'arm64'
+tensorflow>=2.16.0
 tqdm>=4.0.0
@@ -6,4 +6,4 @@ pytest-cov>=2.8.1
 pytest-xdist>=2.1.0
 pytest-forked>=1.3.0
 toolz>=0.10.0
-memray>=1.7.0,<1.12.0
+memray>=1.18.0