ceph · harriscr · Jan 26, 2026
diff --git a/.gitignore b/.gitignore
@@ -3,4 +3,5 @@
 *.venv
 *.code-workspace
 .devcontainer
-*.toml
+*.toml
+.coverage
diff --git a/post_processing/common.py b/post_processing/common.py
@@ -9,6 +9,7 @@
 from logging import Logger, getLogger
 from math import sqrt
 from pathlib import Path
+from re import Pattern
 from typing import Any, Optional, Union
 
 from post_processing.post_processing_types import CommonFormatDataType
@@ -32,6 +33,24 @@
 PLOT_FILE_EXTENSION_WITH_DOT: str = f".{PLOT_FILE_EXTENSION}"
 DATA_FILE_EXTENSION_WITH_DOT: str = f".{DATA_FILE_EXTENSION}"
 
+# Regex patterns for stripping confidential data
+_IPV4_PATTERN: Pattern[str] = re.compile(r"\b(?:[0-9]{1,3}\.){3}[0-9]{1,3}\b")
+_IPV6_PATTERN: Pattern[str] = re.compile(
+    r"\b(?:[0-9a-fA-F]{1,4}:){7}[0-9a-fA-F]{1,4}|"
+    r"\s::(?:[0-9a-fA-F]{1,4}:){0,6}[0-9a-fA-F]{1,4}|"
+    r"\b[0-9a-fA-F]{1,4}::(?:[0-9a-fA-F]{1,4}:){0,5}[0-9a-fA-F]{1,4}|"
+    r"\b[0-9a-fA-F]{1,4}:[0-9a-fA-F]{1,4}::(?:[0-9a-fA-F]{1,4}:){0,4}[0-9a-fA-F]{1,4}|"
+    r"\b(?:[0-9a-fA-F]{1,4}:){0,2}[0-9a-fA-F]{1,4}::(?:[0-9a-fA-F]{1,4}:){0,3}[0-9a-fA-F]{1,4}|"
+    r"\b(?:[0-9a-fA-F]{1,4}:){0,3}[0-9a-fA-F]{1,4}::(?:[0-9a-fA-F]{1,4}:){0,2}[0-9a-fA-F]{1,4}|"
+    r"\b(?:[0-9a-fA-F]{1,4}:){0,4}[0-9a-fA-F]{1,4}::(?:[0-9a-fA-F]{1,4}:)?[0-9a-fA-F]{1,4}|"
+    r"\b(?:[0-9a-fA-F]{1,4}:){0,5}[0-9a-fA-F]{1,4}::[0-9a-fA-F]{1,4}|"
+    r"\b(?:[0-9a-fA-F]{1,4}:){0,6}[0-9a-fA-F]{1,4}::"
+)
+_HOSTNAME_PATTERN: Pattern[str] = re.compile(
+    r"(?:^|\s)([a-z0-9-]{1,61}\.(?:[a-z0-9-]{1,61}\.){0,6}[a-z0-9-]{1,61})(?=\s|$|[,:\[\]\"'])",
+    re.IGNORECASE | re.MULTILINE,
+)
+
 
 def get_blocksize_percentage_operation_from_file_name(file_name: str) -> tuple[str, str, str]:
     """
@@ -114,10 +133,10 @@ def get_latency_throughput_from_file(file_path: Path) -> tuple[str, str]:
 def get_resource_details_from_file(file_path: Path) -> tuple[str, str]:
     """
     Return the max CPU and max memory value from an intermediate file.
-    
+
     Args:
         file_path: Path to the intermediate format data file
-    
+
     Returns:
         A tuple of (max_cpu, max_memory) as formatted strings
     """
@@ -144,43 +163,25 @@ def strip_confidential_data_from_yaml(yaml_data: str) -> str:
 
     Currently handles hostnames, IPv4 addresses and IPv6 addresses
     """
-    filtered_text: str = yaml_data
-
-    ip_v4_pattern = re.compile(r"\b(?:[0-9]{1,3}\.){3}[0-9]{1,3}\b")
-    ip_v6_pattern = re.compile(
-        r"\b(?:[0-9a-fA-F]{1,4}:){7}[0-9a-fA-F]{1,4}|\s::(?:[0-9a-fA-F]{1,4}:){0,6}[0-9a-fA-F]{1,4}|$"
-        + r"\b[0-9a-fA-F]{1,4}::(?:[0-9a-fA-F]{1,4}:){0,5}[0-9a-fA-F]{1,4}|$"
-        + r"\b[0-9a-fA-F]{1,4}:[0-9a-fA-F]{1,4}::(?:[0-9a-fA-F]{1,4}:){0,4}[0-9a-fA-F]{1,4}|$"
-        + r"\b(?:[0-9a-fA-F]{1,4}:){0,2}[0-9a-fA-F]{1,4}::(?:[0-9a-fA-F]{1,4}:){0,3}[0-9a-fA-F]{1,4}|$"
-        + r"\b(?:[0-9a-fA-F]{1,4}:){0,3}[0-9a-fA-F]{1,4}::(?:[0-9a-fA-F]{1,4}:){0,2}[0-9a-fA-F]{1,4}|$"
-        + r"\b(?:[0-9a-fA-F]{1,4}:){0,4}[0-9a-fA-F]{1,4}::(?:[0-9a-fA-F]{1,4}:)?[0-9a-fA-F]{1,4}|$"
-        + r"\b(?:[0-9a-fA-F]{1,4}:){0,5}[0-9a-fA-F]{1,4}::[0-9a-fA-F]{1,4}|$"
-        + r"\b(?:[0-9a-fA-F]{1,4}:){0,6}[0-9a-fA-F]{1,4}::$"
-    )
-    hostname_pattern = re.compile(r"\s(?:[a-z0-9-]{1,61}\.){1,7}[a-z0-9-]{1,61}", re.IGNORECASE)
-
-    ip_addresses_to_replace: list[str] = ip_v4_pattern.findall(yaml_data)
-    ip_addresses_to_replace.extend(ip_v6_pattern.findall(yaml_data))
+    # Replace all IPv4 addresses
+    filtered_text: str = _IPV4_PATTERN.sub("--- IP Address ---", yaml_data)
 
-    unique_ip_addresses_to_replace: list[str] = []
-    for item in ip_addresses_to_replace:
-        if item.strip() not in unique_ip_addresses_to_replace:
-            unique_ip_addresses_to_replace.append(item.strip())
+    # Replace all IPv6 addresses
+    filtered_text = _IPV6_PATTERN.sub("--- IP Address ---", filtered_text)
 
-    for item in unique_ip_addresses_to_replace:
-        filtered_text = filtered_text.replace(item, "--- IP Address --")
+    # Replace hostnames with numbered identifiers using a callback
+    hostname_map: dict[str, str] = {}
 
-    hostnames_to_replace: list[str] = hostname_pattern.findall(yaml_data)
+    def replace_hostname(match: re.Match[str]) -> str:
+        # Group 1 contains the hostname, group 0 includes leading whitespace
+        hostname = match.group(1)
+        if hostname not in hostname_map:
+            hostname_map[hostname] = f"--- server{len(hostname_map) + 1} ---"
+        # Preserve any leading whitespace from the original match
+        leading = match.group(0)[: match.start(1) - match.start(0)]
+        return leading + hostname_map[hostname]
 
-    unique_host_names_to_replace: list[str] = []
-    for item in hostnames_to_replace:
-        if item.strip() not in unique_host_names_to_replace:
-            unique_host_names_to_replace.append(item.strip())
-
-    count: int = 1
-    for value in unique_host_names_to_replace:
-        filtered_text = filtered_text.replace(value.strip(), f"--- server{count} ---")
-        count += 1
+    filtered_text = _HOSTNAME_PATTERN.sub(replace_hostname, filtered_text)
 
     return filtered_text
 
@@ -243,23 +244,27 @@ def recursive_search(data_to_search: dict[str, Any], search_key: str) -> Optiona
         if isinstance(value, list):
             for item in value:  # pyright: ignore[reportUnknownVariableType]
                 if isinstance(item, dict):
-                    return recursive_search(item, search_key)  # pyright: ignore[reportUnknownArgumentType]
+                    result = recursive_search(item, search_key)  # pyright: ignore[reportUnknownArgumentType]
+                    if result is not None:
+                        return result
         if isinstance(value, dict):
-            return recursive_search(value, search_key)  # pyright: ignore[reportUnknownArgumentType]
+            result = recursive_search(value, search_key)  # pyright: ignore[reportUnknownArgumentType]
+            if result is not None:
+                return result
 
     return None
 
 
 def get_blocksize(blocksize_value: str) -> str:
     """
     Extract the numeric blocksize value from a string, removing any unit suffix.
-    
+
     Args:
         blocksize_value: Blocksize string that may include a unit suffix (e.g., "4K", "1024")
-    
+
     Returns:
         The numeric blocksize value as a string without units
-        
+
     Example:
         >>> get_blocksize("4K")
         "4"

diff --git a/post_processing/run_results/benchmarks/benchmark_result.py b/post_processing/run_results/benchmarks/benchmark_result.py
@@ -26,6 +26,8 @@ class BenchmarkResult(ABC):
     def __init__(self, file_path: Path) -> None:
         self._resource_file_path: Path = file_path
         self._data: dict[str, Any] = self._read_results_from_file()
+        if not self._data:
+            raise ValueError(f"File {file_path} is empty")
 
         self._global_options: dict[str, str] = self._get_global_options(self._data["global options"])
         self._iodepth = self._get_iodepth(f"{self._data['global options']['iodepth']}")
@@ -39,18 +41,11 @@ def __init__(self, file_path: Path) -> None:
     def source(self) -> str:
         """
         Get the source/type identifier for the benchmark tool.
-        
+
         Returns:
             A string identifier for the benchmark source (e.g., "fio", "cosbench")
         """
 
-    # @abstractmethod
-    # def _parse(self, data: dict[str, Any]) -> None:
-    #    """
-    #    Read the resource usage data from the read data and return the
-    #    relevant resource usage statistics
-    #    """
-
     @abstractmethod
     def _get_global_options(self, fio_global_options: dict[str, str]) -> dict[str, str]:
         """

diff --git a/tests/test_axis_plotter.py b/tests/test_axis_plotter.py
@@ -0,0 +1,112 @@
+"""
+Unit tests for the post_processing/plotter module classes
+"""
+
+# pyright: strict, reportPrivateUsage=false
+#
+# We are OK to ignore private use in unit tests as the whole point of the tests
+# is to validate the functions contained in the module
+
+import unittest
+from unittest.mock import MagicMock
+
+from matplotlib.axes import Axes
+
+from post_processing.plotter.axis_plotter import AxisPlotter
+
+
+class ConcreteAxisPlotter(AxisPlotter):
+    """Concrete implementation of AxisPlotter for testing"""
+
+    def plot(self, x_data: list[float], colour: str = "") -> None:
+        """Concrete implementation of abstract plot method"""
+        self._plot(x_data, self._main_axes, colour)
+
+    def add_y_data(self, data_value: str) -> None:
+        """Concrete implementation of abstract add_y_data method"""
+        self._y_data.append(float(data_value))
+
+
+class TestAxisPlotter(unittest.TestCase):
+    """Test cases for AxisPlotter base class"""
+
+    def setUp(self) -> None:
+        """Set up test fixtures"""
+        self.mock_axes = MagicMock(spec=Axes)
+        self.plotter = ConcreteAxisPlotter(self.mock_axes)
+
+    def test_initialization(self) -> None:
+        """Test AxisPlotter initialization"""
+        self.assertEqual(self.plotter._main_axes, self.mock_axes)
+        self.assertEqual(self.plotter._y_data, [])
+        self.assertEqual(self.plotter._y_label, "")
+        self.assertEqual(self.plotter._label, "")
+
+    def test_y_label_property_getter(self) -> None:
+        """Test y_label property getter"""
+        self.plotter._y_label = "Test Label"
+        self.assertEqual(self.plotter.y_label, "Test Label")
+
+    def test_y_label_property_setter(self) -> None:
+        """Test y_label property setter"""
+        self.plotter.y_label = "New Label"
+        self.assertEqual(self.plotter._y_label, "New Label")
+
+    def test_y_label_setter_warning_on_overwrite(self) -> None:
+        """Test that setting y_label twice logs a warning"""
+        self.plotter.y_label = "First Label"
+
+        with self.assertLogs("plotter", level="WARNING") as log_context:
+            self.plotter.y_label = "Second Label"
+
+        self.assertIn("Y label value already set", log_context.output[0])
+        self.assertEqual(self.plotter._y_label, "Second Label")
+
+    def test_plot_label_property_getter(self) -> None:
+        """Test plot_label property getter"""
+        self.plotter._label = "Test Plot Label"
+        self.assertEqual(self.plotter.plot_label, "Test Plot Label")
+
+    def test_plot_label_property_setter(self) -> None:
+        """Test plot_label property setter"""
+        self.plotter.plot_label = "New Plot Label"
+        self.assertEqual(self.plotter._label, "New Plot Label")
+
+    def test_plot_label_setter_warning_on_overwrite(self) -> None:
+        """Test that setting plot_label twice logs a warning"""
+        self.plotter.plot_label = "First Label"
+
+        with self.assertLogs("plotter", level="WARNING") as log_context:
+            self.plotter.plot_label = "Second Label"
+
+        self.assertIn("Plot label value already set", log_context.output[0])
+        self.assertEqual(self.plotter._label, "Second Label")
+
+    def test_add_y_data(self) -> None:
+        """Test adding y-axis data"""
+        self.plotter.add_y_data("10.5")
+        self.plotter.add_y_data("20.3")
+        self.plotter.add_y_data("30.7")
+
+        self.assertEqual(len(self.plotter._y_data), 3)
+        self.assertAlmostEqual(self.plotter._y_data[0], 10.5)
+        self.assertAlmostEqual(self.plotter._y_data[1], 20.3)
+        self.assertAlmostEqual(self.plotter._y_data[2], 30.7)
+
+    def test_plot_calls_internal_plot(self) -> None:
+        """Test that plot method calls _plot"""
+        self.plotter.y_label = "Test Y"
+        self.plotter.plot_label = "Test Plot"
+        self.plotter.add_y_data("10")
+        self.plotter.add_y_data("20")
+
+        x_data = [1.0, 2.0]
+        self.plotter.plot(x_data, "blue")
+
+        # Verify axes methods were called
+        self.mock_axes.set_ylabel.assert_called_once_with("Test Y")
+        self.mock_axes.tick_params.assert_called_once()
+        self.mock_axes.plot.assert_called_once()
+
+
+# Made with Bob