From e00b1c506655c862857515bbba2f9f57fda18005 Mon Sep 17 00:00:00 2001 From: Kristian Evers Date: Tue, 3 Jun 2025 22:06:18 +0200 Subject: [PATCH 1/2] Make all coordinates 4D This affects a larger chunk of the code but is mainly a matter of extending coordinate vectors and matrices a bit and only selecting the spatial parts of the coordinates in most cases where operations are done on the coordinates. I bit more convoluted but essential. Typing extending to include both CoordinateMatrix and Matrix, as well as CoordinateVector and Vector. Some matrices and vectors contain coordinates and can be expected to be either Nx4 or 4x1. Other matrices and vectors can be shaped arbitrarily. --- src/transformo/_typing.py | 8 +++-- src/transformo/core.py | 2 +- src/transformo/datatypes.py | 6 ++-- src/transformo/operators/helmert.py | 37 ++++++++++++++++-------- src/transformo/operators/proj.py | 10 ++++--- src/transformo/presenters/coordinates.py | 2 +- src/transformo/transformer.py | 6 ++-- test/datasources/test_datasource.py | 6 ++-- test/operators/test_helmert.py | 6 +++- test/operators/test_operator.py | 19 +++++++++++- test/test_pipeline.py | 4 +-- 11 files changed, 72 insertions(+), 34 deletions(-) diff --git a/src/transformo/_typing.py b/src/transformo/_typing.py index 4694e67..689dd7b 100644 --- a/src/transformo/_typing.py +++ b/src/transformo/_typing.py @@ -12,8 +12,10 @@ import numpy as np import numpy.typing as npt -Vector = Annotated[npt.NDArray[np.floating], Literal[3, 1]] -CoordinateMatrix = Annotated[npt.NDArray[np.floating], Literal["N", 3]] +Vector = Annotated[npt.NDArray[np.floating], Literal["N", 1]] +Matrix = Annotated[npt.NDArray[np.floating], Literal["M", "N"]] +CoordinateVector = Annotated[npt.NDArray[np.floating], Literal[4, 1]] +CoordinateMatrix = Annotated[npt.NDArray[np.floating], Literal["N", 4]] ParameterValue = str | float | None @@ -46,7 +48,7 @@ def from_str( # pylint: disable=too-many-arguments ) -> CoordinateLike: ... @property - def vector(self) -> np.typing.ArrayLike: ... + def vector(self) -> CoordinateVector: ... @property def stddev(self) -> np.typing.ArrayLike: ... diff --git a/src/transformo/core.py b/src/transformo/core.py index c8f5c43..a7c644b 100644 --- a/src/transformo/core.py +++ b/src/transformo/core.py @@ -81,7 +81,7 @@ class DataSource(pydantic.BaseModel): # station-based overrides overrides: dict[str, CoordinateOverrides] | None = pydantic.Field( - default_factory=dict + default_factory=dict.__call__ ) # coordinates are not included in pipeline serialization diff --git a/src/transformo/datatypes.py b/src/transformo/datatypes.py index 6f0414a..c5cd92e 100644 --- a/src/transformo/datatypes.py +++ b/src/transformo/datatypes.py @@ -8,7 +8,7 @@ import pydantic from pydantic.dataclasses import dataclass -from transformo._typing import ParameterValue +from transformo._typing import CoordinateVector, ParameterValue from transformo.transformer import Transformer @@ -64,9 +64,9 @@ def from_str( # pylint: disable=too-many-arguments ) @property - def vector(self) -> np.typing.ArrayLike: + def vector(self) -> CoordinateVector: """Coordinate given as Numpy vector (1D array).""" - return np.array([self.x, self.y, self.z]) + return np.array([self.x, self.y, self.z, self.t]) @property def stddev(self) -> np.typing.ArrayLike: diff --git a/src/transformo/operators/helmert.py b/src/transformo/operators/helmert.py index c878afb..30d04c6 100644 --- a/src/transformo/operators/helmert.py +++ b/src/transformo/operators/helmert.py @@ -12,7 +12,7 @@ import numpy as np from numpy import cos, sin -from transformo._typing import CoordinateMatrix, Vector +from transformo._typing import CoordinateMatrix, Matrix, Vector from transformo.core import Operator from transformo.datatypes import Parameter @@ -130,13 +130,17 @@ def forward(self, coordinates: CoordinateMatrix) -> CoordinateMatrix: """ Forward method of the 3 parameter Helmert. """ - return coordinates + self.T + coords = coordinates.copy() + coords[:, 0:3] = coords[:, 0:3] + self.T + return coords def inverse(self, coordinates: CoordinateMatrix) -> CoordinateMatrix: """ Inverse method of the 3 parameter Helmert. """ - return coordinates - self.T + coords = coordinates.copy() + coords[:, 0:3] = coords[:, 0:3] - self.T + return coords def estimate( self, @@ -152,8 +156,12 @@ def estimate( this method is called. """ - avg_source = np.average(source_coordinates, axis=0, weights=source_weights) - avg_target = np.average(target_coordinates, axis=0, weights=target_weights) + avg_source = np.average( + source_coordinates[:, 0:3], axis=0, weights=source_weights + ) + avg_target = np.average( + target_coordinates[:, 0:3], axis=0, weights=target_weights + ) mean_translation = avg_target - avg_source @@ -172,7 +180,7 @@ class RotationConvention(enum.Enum): # fmt: off -def R1(rx: float) -> CoordinateMatrix: +def R1(rx: float) -> Matrix: """Rotation matrix for rotating about the x-axis.""" return np.array( [ @@ -182,7 +190,7 @@ def R1(rx: float) -> CoordinateMatrix: ] ) -def R2(ry: float) -> CoordinateMatrix: +def R2(ry: float) -> Matrix: """Rotation matrix for rotating about the y-axis.""" return np.array( [ @@ -192,7 +200,7 @@ def R2(ry: float) -> CoordinateMatrix: ] ) -def R3(rz: float) -> CoordinateMatrix: +def R3(rz: float) -> Matrix: """Rotation matrix for rotating about the z-axis.""" return np.array( [ @@ -296,7 +304,7 @@ def _parameter_list(self) -> list[Parameter]: return params @cached_property - def R(self) -> CoordinateMatrix: # pylint: disable=invalid-name + def R(self) -> Matrix: # pylint: disable=invalid-name """ Rotation matrix. """ @@ -342,13 +350,18 @@ def forward(self, coordinates: CoordinateMatrix) -> CoordinateMatrix: # from the single-point Helmert formulation of B = T + s * R*A. # By transposing the rotation matrix we get the same results # when instead doing B = T + s * A*R^T. - return self.T + self.scale * coordinates @ self.R.T + coords = coordinates.copy() + coords[:, 0:3] = self.T + self.scale * coords[:, 0:3] @ self.R.T + return coords def inverse(self, coordinates: CoordinateMatrix) -> CoordinateMatrix: """ Inverse method of the 7 parameter Helmert. """ - return -self.T + 1 / self.scale * coordinates @ self.R + coords = coordinates.copy() + coords[:, 0:3] = -self.T + 1 / self.scale * coords[:, 0:3] @ self.R + + return coords def estimate( self, @@ -382,7 +395,7 @@ def rad2arcsec(rad) -> float: A[i * 3 : i * 3 + 3, :] = np.column_stack([np.eye(3), c[0:3], R]) - b = target_coordinates.flatten() + b = target_coordinates[:, 0:3].flatten() coeffs, _, _, _ = np.linalg.lstsq(A, b, rcond=None) diff --git a/src/transformo/operators/proj.py b/src/transformo/operators/proj.py index 97b3b2e..64a5164 100644 --- a/src/transformo/operators/proj.py +++ b/src/transformo/operators/proj.py @@ -50,22 +50,24 @@ def forward(self, coordinates: CoordinateMatrix) -> CoordinateMatrix: """ Forward method of the Transformation. """ - x, y, z = self._transformer.transform( + x, y, z, t = self._transformer.transform( coordinates[:, 0], coordinates[:, 1], coordinates[:, 2], + coordinates[:, 3], direction=pyproj.enums.TransformDirection.FORWARD, ) - return np.array([x, y, z]).transpose() + return np.array([x, y, z, t]).transpose() def inverse(self, coordinates: CoordinateMatrix) -> CoordinateMatrix: """ Inverse method of the Transformation. """ - x, y, z = self._transformer.transform( + x, y, z, t = self._transformer.transform( coordinates[:, 0], coordinates[:, 1], coordinates[:, 2], + coordinates[:, 3], direction=pyproj.enums.TransformDirection.INVERSE, ) - return np.array([x, y, z]).transpose() + return np.array([x, y, z, t]).transpose() diff --git a/src/transformo/presenters/coordinates.py b/src/transformo/presenters/coordinates.py index 48912b7..f0f173b 100644 --- a/src/transformo/presenters/coordinates.py +++ b/src/transformo/presenters/coordinates.py @@ -212,7 +212,7 @@ def evaluate( target = target_data.coordinate_matrix model = results[-1].coordinate_matrix - residuals = np.subtract(target, model) + residuals = np.subtract(target[:, 0:3], model[:, 0:3]) # convert to milimeters residuals *= 1000 diff --git a/src/transformo/transformer.py b/src/transformo/transformer.py index c5d4d70..df65717 100644 --- a/src/transformo/transformer.py +++ b/src/transformo/transformer.py @@ -7,7 +7,7 @@ import numpy as np import pyproj -from transformo._typing import CoordinateMatrix, Vector +from transformo._typing import CoordinateMatrix, CoordinateVector class Transformer: @@ -15,7 +15,7 @@ class Transformer: Transform coordinates using PROJ. Interface for pyproj that works using Transformo's datatypes such - as CoordinateMatrix and Vector. + as CoordinateMatrix and CoordinateVector. """ def __init__(self, transformer: pyproj.Transformer | None = None): @@ -50,7 +50,7 @@ def transform_many(self, coordinates: CoordinateMatrix) -> CoordinateMatrix: results = self.transformer.itransform(coordinates) return np.array(list(results)) - def transform_one(self, coordinate: Vector) -> Vector: + def transform_one(self, coordinate: CoordinateVector) -> CoordinateVector: """ Transform a single coordinate. """ diff --git a/test/datasources/test_datasource.py b/test/datasources/test_datasource.py index 30664f6..fb05c34 100644 --- a/test/datasources/test_datasource.py +++ b/test/datasources/test_datasource.py @@ -23,7 +23,7 @@ def test_datasource(coordinate_factory: Callable) -> None: # Is the coordinate matrix property functioning as expected? assert isinstance(ds1.coordinate_matrix, np.ndarray) - assert ds1.coordinate_matrix.shape == (n, 3) + assert ds1.coordinate_matrix.shape == (n, 4) assert (ds1.coordinate_matrix[0, :] == ds1.coordinates[0].vector).all() # Can we add two datasources? @@ -31,7 +31,7 @@ def test_datasource(coordinate_factory: Callable) -> None: ds_combined = ds1 + ds2 assert len(ds_combined.coordinates) == 2 * n - assert ds_combined.coordinate_matrix.shape == (2 * n, 3) + assert ds_combined.coordinate_matrix.shape == (2 * n, 4) assert isinstance(ds_combined, DataSource) ds_combined2 = ds1 @@ -39,7 +39,7 @@ def test_datasource(coordinate_factory: Callable) -> None: ds_combined2 = ds_combined2 + ds assert len(ds_combined2.coordinates) == 3 * n - assert ds_combined2.coordinate_matrix.shape == (3 * n, 3) + assert ds_combined2.coordinate_matrix.shape == (3 * n, 4) assert isinstance(ds_combined2, DataSource) diff --git a/test/operators/test_helmert.py b/test/operators/test_helmert.py index 9b60e23..2019107 100644 --- a/test/operators/test_helmert.py +++ b/test/operators/test_helmert.py @@ -102,7 +102,7 @@ def test_helmerttranslation_as_operator(): op = HelmertTranslation(x=3, y=5, z=10) # Can we roundtrip the `forward` and `inverse` methods - source_coordinates = np.zeros(shape=(10, 3)) + source_coordinates = np.zeros(shape=(10, 4)) roundtripped_coordinates = op.inverse(op.forward(source_coordinates)) print(roundtripped_coordinates) assert np.all(source_coordinates == roundtripped_coordinates) @@ -218,8 +218,12 @@ def test_helmert7param_transformation(source_coordinates): print(projstring) T = Transformer.from_projstring(projstring) + print(source_coordinates[0, :]) transformo_coords = h7.forward(source_coordinates) proj_coords = T.transform_many(source_coordinates) + print(source_coordinates[0, :]) + print(transformo_coords[0, :]) + print(proj_coords[0, :]) # We are not going to get an exact match but it's very close and good enough assert np.allclose(proj_coords, transformo_coords) diff --git a/test/operators/test_operator.py b/test/operators/test_operator.py index cda6ebf..fd3151b 100644 --- a/test/operators/test_operator.py +++ b/test/operators/test_operator.py @@ -9,7 +9,7 @@ from transformo._typing import CoordinateMatrix from transformo.datatypes import Parameter -from transformo.operators import DummyOperator, Operator +from transformo.operators import DummyOperator, Operator, ProjOperator def test_base_operator(source_coordinates): @@ -83,3 +83,20 @@ def test_dummyoperator(source_coordinates, target_coordinates): # The `DummyOperator`` should be able to run the `estimate()` method assert operator.can_estimate is True + + +def test_4d_transformation(source_coordinates): + """ + Test that spatiotemporal transformation works. + """ + # apply an offset in the x-direction (ten meters pr year since 2000) + op = ProjOperator(proj_string="+proj=helmert +dx=10 +t_epoch=2000.0") + transformed = op.forward(source_coordinates) + + print(source_coordinates) + print(transformed) + + # manually remove the applied x-offset + dt = source_coordinates[:, 3] - 2000 + x_offset_removed = transformed[:, 0] - dt * 10 + assert np.all(x_offset_removed == source_coordinates[:, 0]) diff --git a/test/test_pipeline.py b/test/test_pipeline.py index ed4449b..f259ca0 100644 --- a/test/test_pipeline.py +++ b/test/test_pipeline.py @@ -29,8 +29,8 @@ def test_pipeline(datasource_factory: Callable) -> None: # Test that source/target_coordinates properties are working as intended n_coordinates = 2 * 10 # 2 datasources with 10 coordinates each - assert pipeline.source_coordinates.shape == (n_coordinates, 3) - assert pipeline.target_coordinates.shape == (n_coordinates, 3) + assert pipeline.source_coordinates.shape == (n_coordinates, 4) + assert pipeline.target_coordinates.shape == (n_coordinates, 4) assert ( pipeline.source_coordinates[0, 0] From 16e58bc506b642fe0b7f1c9d961d365dceb18b1f Mon Sep 17 00:00:00 2001 From: Kristian Evers Date: Thu, 12 Jun 2025 17:14:18 +0200 Subject: [PATCH 2/2] Allow non-overlapping DataSources When processing a Pipeline DataSources are now modified so that they only contain coordinates from the union of the source and target stations. When removing stations from DataSources the original ordering of the data is comprimised. Hence Coordinate lists are sorted (in order of stations) as a final step of DataSource instantiantion. --- src/transformo/core.py | 18 +++++++++++++ src/transformo/pipeline.py | 23 ++++++++++++++++ test/datasources/test_datasource.py | 42 +++++++++++++++++++++++++++++ test/test_pipeline.py | 17 ------------ 4 files changed, 83 insertions(+), 17 deletions(-) diff --git a/src/transformo/core.py b/src/transformo/core.py index a7c644b..fba70ca 100644 --- a/src/transformo/core.py +++ b/src/transformo/core.py @@ -213,6 +213,8 @@ def __post_init__(self) -> None: if (t := self.overrides[key].t) is not None: c.t = t + self.coordinates = sorted(self.coordinates, key=lambda c: c.station) + def __add__(self, other: DataSource) -> DataSource: """ Add two `DataSource`s. @@ -331,6 +333,22 @@ def update_coordinates(self, coordinates: CoordinateMatrix) -> DataSource: return DataSource(coordinates=new_coordinates) + def station_union(self, other: DataSource) -> list[str]: + return list(set(self.stations) & set(other.stations)) + + def limit_to_stations(self, stations: list[str]) -> None: + """ + Limit DataSource to stations given in supplied list. + """ + stations_to_remove = [] + + for c in self.coordinates: + if c.station not in stations: + stations_to_remove.append(c) + + for c in stations_to_remove: + self.coordinates.remove(c) + class CombinedDataSource(DataSource): """Combination of two or more DataSource's.""" diff --git a/src/transformo/pipeline.py b/src/transformo/pipeline.py index 118b1ba..a33a195 100644 --- a/src/transformo/pipeline.py +++ b/src/transformo/pipeline.py @@ -81,6 +81,29 @@ def __init__( self._combined_source_data = sum(self.source_data, DataSource(None)) self._combined_target_data = sum(self.target_data, DataSource(None)) + all_source_stations = self._combined_source_data.stations + all_target_stations = self._combined_target_data.stations + + station_union = self._combined_source_data.station_union( + self._combined_target_data + ) + + self._combined_source_data.limit_to_stations(station_union) + self._combined_target_data.limit_to_stations(station_union) + + limited_source_stations = self._combined_source_data.stations + limited_target_stations = self._combined_target_data.stations + + if len(limited_source_stations) != len(all_source_stations): + n = len(all_source_stations) - len(limited_source_stations) + msg = f"{n} stations removed from source data (not in intersection of source and target data)" + logger.warning(msg) + + if len(limited_target_stations) != len(all_target_stations): + n = len(all_target_stations) - len(limited_target_stations) + msg = f"{n} stations removed from target data (not in intersection of source and target data)" + logger.warning(msg) + self._intermediate_results: list[DataSource] = [] @classmethod diff --git a/test/datasources/test_datasource.py b/test/datasources/test_datasource.py index fb05c34..5c686cd 100644 --- a/test/datasources/test_datasource.py +++ b/test/datasources/test_datasource.py @@ -238,3 +238,45 @@ def test_station_overrides(files) -> None: if c.station == "SULD": assert c.sz == 0.42 assert c.station == "MULD" + + +def test_station_union(files) -> None: + """ + Test DataSource.station_union(). + + In the file `dk_cors_etrs89.csv` we have the following stations: + + BUDP, ESBC, FER5, FYHA, GESR, HABY, HIRS, SMID, SULD, TEJH + + By renaming some of them in the import we can simulate to different + DataSources that includes different stations. + """ + + ds1 = CsvDataSource(filename=files["dk_cors_etrs89.csv"]) + ds2 = CsvDataSource( + filename=files["dk_cors_etrs89.csv"], + overrides={ + "BUDP": CoordinateOverrides(station="BIDP"), + "FYHA": CoordinateOverrides(station="PYHA"), + "HIRS": CoordinateOverrides(station="HALS"), + "SULD": CoordinateOverrides(station="MULD"), + }, + ) + + union = ds1.station_union(ds2) + expected_union = ["ESBC", "FER5", "GESR", "HABY", "SMID", "TEJH"] + + assert set(union) == set(expected_union) + + +def test_limit_to_stations(files) -> None: + """Test DataSource.limit_to_stations().""" + ds = CsvDataSource( + filename=files["dk_cors_etrs89.csv"], + ) + + stations = ["BUDP", "ESBC"] + ds.limit_to_stations(stations) + limited_stations = [c.station for c in ds.coordinates] + + assert stations == limited_stations diff --git a/test/test_pipeline.py b/test/test_pipeline.py index f259ca0..716e3bd 100644 --- a/test/test_pipeline.py +++ b/test/test_pipeline.py @@ -32,23 +32,6 @@ def test_pipeline(datasource_factory: Callable) -> None: assert pipeline.source_coordinates.shape == (n_coordinates, 4) assert pipeline.target_coordinates.shape == (n_coordinates, 4) - assert ( - pipeline.source_coordinates[0, 0] - == pipeline.source_data[0].coordinates[0].vector[0] - ) - assert ( - pipeline.source_coordinates[15, 2] - == pipeline.source_data[1].coordinates[5].vector[2] - ) - assert ( - pipeline.target_coordinates[7, 0] - == pipeline.target_data[0].coordinates[7].vector[0] - ) - assert ( - pipeline.target_coordinates[19, 2] - == pipeline.target_data[1].coordinates[9].vector[2] - ) - def test_pipeline_yaml_serilization(files: dict) -> None: """