From 4062dfd61e332b6bd07679850390b8d574ebe255 Mon Sep 17 00:00:00 2001
From: rileyh <rileyh@umn.edu>
Date: Tue, 20 Jan 2026 15:19:53 +0000
Subject: [PATCH 1/2] Bump the version to 4.2.2

---
 pyproject.toml           | 2 +-
 sphinx-docs/changelog.md | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 725717e..f5ca849 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "hlink"
-version = "4.2.1"
+version = "4.2.2"
 description = "Fast supervised pyspark record linkage software"
 readme = "README.md"
 requires-python = ">=3.10"
diff --git a/sphinx-docs/changelog.md b/sphinx-docs/changelog.md
index 427a9c7..b87f89b 100644
--- a/sphinx-docs/changelog.md
+++ b/sphinx-docs/changelog.md
@@ -3,7 +3,7 @@
 The format of this changelog is based on [Keep A Changelog][keep-a-changelog].
 Hlink adheres to semantic versioning as much as possible.
 
-## Not Yet Released
+## v4.2.2 (2026-01-20)
 
 ### Added
 

From adaeef8f129cf23c9600a5477024780692b0cd7b Mon Sep 17 00:00:00 2001
From: rileyh <rileyh@umn.edu>
Date: Tue, 20 Jan 2026 15:30:04 +0000
Subject: [PATCH 2/2] Reformat with black v26

---
 examples/tutorial/tutorial.py                    |  6 ++----
 hlink/linking/core/column_mapping.py             |  1 -
 .../hh_matching/link_step_block_on_households.py |  1 -
 .../link_step_train_test_models.py               | 16 +++++++---------
 .../link_step_create_comparison_features.py      |  6 ++----
 hlink/linking/util.py                            |  1 -
 hlink/tests/core/column_mapping_test.py          |  1 -
 hlink/tests/core/substitutions_test.py           |  6 ++----
 hlink/tests/core/transforms_test.py              |  1 -
 9 files changed, 13 insertions(+), 26 deletions(-)

diff --git a/examples/tutorial/tutorial.py b/examples/tutorial/tutorial.py
index d52fc2b..47415ac 100644
--- a/examples/tutorial/tutorial.py
+++ b/examples/tutorial/tutorial.py
@@ -9,8 +9,7 @@
 
 
 def parse_args() -> argparse.Namespace:
-    parser = argparse.ArgumentParser(
-        description="""
+    parser = argparse.ArgumentParser(description="""
         This script links two very small example datasets that live in the data
         subdirectory. It reads in the tutorial_config.toml configuration file
         and runs hlink's preprocessing and matching steps to find some potential
@@ -18,8 +17,7 @@ def parse_args() -> argparse.Namespace:
 
         For a detailed walkthrough of the tutorial, please see the README.md
         file in the same directory as this script.
-        """
-    )
+        """)
 
     parser.add_argument(
         "--clean", action="store_true", help="drop existing Spark tables on startup"
diff --git a/hlink/linking/core/column_mapping.py b/hlink/linking/core/column_mapping.py
index f9506ba..ba58f97 100755
--- a/hlink/linking/core/column_mapping.py
+++ b/hlink/linking/core/column_mapping.py
@@ -79,7 +79,6 @@ def transform_reverse(input_col: Column, transform: Mapping[str, Any], context:
 )
 from pyspark.sql.types import LongType
 
-
 ColumnMappingTransform: TypeAlias = Callable[
     [Column, Mapping[str, Any], Mapping[str, Any]], Column
 ]
diff --git a/hlink/linking/hh_matching/link_step_block_on_households.py b/hlink/linking/hh_matching/link_step_block_on_households.py
index 738af50..f1a75b3 100644
--- a/hlink/linking/hh_matching/link_step_block_on_households.py
+++ b/hlink/linking/hh_matching/link_step_block_on_households.py
@@ -10,7 +10,6 @@
 from hlink.linking.link_step import LinkStep
 from hlink.linking.util import set_job_description
 
-
 logger = logging.getLogger(__name__)
 
 
diff --git a/hlink/linking/model_exploration/link_step_train_test_models.py b/hlink/linking/model_exploration/link_step_train_test_models.py
index 26137d3..a3f9aa3 100644
--- a/hlink/linking/model_exploration/link_step_train_test_models.py
+++ b/hlink/linking/model_exploration/link_step_train_test_models.py
@@ -843,11 +843,11 @@ def _aggregate_per_threshold_results(
     mcc = [r.mcc for r in prediction_results if not math.isnan(r.mcc)]
     f_measure = [r.f_measure for r in prediction_results if not math.isnan(r.f_measure)]
 
-    (precision_mean, precision_sd) = _compute_mean_and_stdev(precision)
-    (recall_mean, recall_sd) = _compute_mean_and_stdev(recall)
-    (pr_auc_mean, pr_auc_sd) = _compute_mean_and_stdev(pr_auc)
-    (mcc_mean, mcc_sd) = _compute_mean_and_stdev(mcc)
-    (f_measure_mean, f_measure_sd) = _compute_mean_and_stdev(f_measure)
+    precision_mean, precision_sd = _compute_mean_and_stdev(precision)
+    recall_mean, recall_sd = _compute_mean_and_stdev(recall)
+    pr_auc_mean, pr_auc_sd = _compute_mean_and_stdev(pr_auc)
+    mcc_mean, mcc_sd = _compute_mean_and_stdev(mcc)
+    f_measure_mean, f_measure_sd = _compute_mean_and_stdev(f_measure)
 
     new_desc = pd.DataFrame(
         {
@@ -962,8 +962,7 @@ def _handle_param_grid_attribute(training_settings: dict[str, Any]) -> dict[str,
 def _get_model_parameters(training_settings: dict[str, Any]) -> list[dict[str, Any]]:
     if "param_grid" in training_settings:
         print(
-            dedent(
-                """\
+            dedent("""\
                 Deprecation Warning: training.param_grid is deprecated.
 
                 Please use training.model_parameter_search instead by replacing
@@ -971,8 +970,7 @@ def _get_model_parameters(training_settings: dict[str, Any]) -> list[dict[str, A
                 `param_grid = True` with `model_parameter_search = {strategy = "grid"}` or
                 `param_grid = False` with `model_parameter_search = {strategy = "explicit"}`
 
-                [deprecated_in_version=4.0.0]"""
-            ),
+                [deprecated_in_version=4.0.0]"""),
             file=sys.stderr,
         )
 
diff --git a/hlink/linking/training/link_step_create_comparison_features.py b/hlink/linking/training/link_step_create_comparison_features.py
index 7e73575..b9727d1 100644
--- a/hlink/linking/training/link_step_create_comparison_features.py
+++ b/hlink/linking/training/link_step_create_comparison_features.py
@@ -42,8 +42,7 @@ def _create_training_features(self):
         dep_var = config[training_conf]["dependent_var"]
         if training_conf == "hh_training":
             hh_col = config[training_conf].get("hh_col", "serialp")
-            tdl = self.task.spark.sql(
-                f"""SELECT
+            tdl = self.task.spark.sql(f"""SELECT
                                     td.{id_col}_a,
                                     td.{id_col}_b,
                                     td.{dep_var},
@@ -57,8 +56,7 @@ def _create_training_features(self):
                                     left join
                                     prepped_df_b pdfb
                                     on pdfb.{id_col} = td.{id_col}_b
-                                """
-            )
+                                """)
         else:
             tdl = self.task.spark.table(f"{table_prefix}training_data").select(
                 f"{id_col}_a", f"{id_col}_b", dep_var
diff --git a/hlink/linking/util.py b/hlink/linking/util.py
index f239be7..cd80d4d 100644
--- a/hlink/linking/util.py
+++ b/hlink/linking/util.py
@@ -1,7 +1,6 @@
 from contextlib import contextmanager
 from math import ceil
 
-
 MIN_PARTITIONS = 200
 MAX_PARTITIONS = 10000
 
diff --git a/hlink/tests/core/column_mapping_test.py b/hlink/tests/core/column_mapping_test.py
index 3653a57..a4f47cc 100644
--- a/hlink/tests/core/column_mapping_test.py
+++ b/hlink/tests/core/column_mapping_test.py
@@ -5,7 +5,6 @@
 
 from hlink.linking.core.column_mapping import apply_transform, select_column_mapping
 
-
 TEST_DF_1 = pd.DataFrame(
     {
         "id": [0, 1, 2, 3, 4, 5],
diff --git a/hlink/tests/core/substitutions_test.py b/hlink/tests/core/substitutions_test.py
index 043d70c..6c122ef 100644
--- a/hlink/tests/core/substitutions_test.py
+++ b/hlink/tests/core/substitutions_test.py
@@ -24,13 +24,11 @@ def test_load_substitutions(tmp_path: Path) -> None:
 
 def test_generate_substitutions(spark: SparkSession, tmp_path: Path) -> None:
     tmp_file = tmp_path / "substitutions.csv"
-    tmp_file.write_text(
-        """rose,rosie
+    tmp_file.write_text("""rose,rosie
         sophia,sophy
         sophia,sofia
         amanda,mandy
-        jane,jean"""
-    )
+        jane,jean""")
 
     df = spark.createDataFrame(
         [("agnes", 2), ("mandy", 2), ("sophy", 2), ("rosie", 2), ("jean", 1)],
diff --git a/hlink/tests/core/transforms_test.py b/hlink/tests/core/transforms_test.py
index 08b8e82..141a119 100644
--- a/hlink/tests/core/transforms_test.py
+++ b/hlink/tests/core/transforms_test.py
@@ -5,7 +5,6 @@
 from hlink.linking.core.transforms import apply_transform, generate_transforms
 from hlink.linking.link_task import LinkTask
 
-
 ignore_apply_transform_dep_warning = pytest.mark.filterwarnings(
     r"ignore:\s*This is a deprecated alias for hlink.linking.core.column_mapping.apply_transform"
 )