From d62fe6acd5754590165fa405495df33423bf4ae7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20Dr=C3=B6nner?= Date: Tue, 26 Nov 2024 11:25:10 +0100 Subject: [PATCH 01/14] add ml model shape --- geoengine/ml.py | 72 ++++++++++++++++++++++++++++++++++++++++-------- setup.cfg | 2 +- tests/test_ml.py | 32 +++++++++++++++------ 3 files changed, 85 insertions(+), 21 deletions(-) diff --git a/geoengine/ml.py b/geoengine/ml.py index d9ef6c01..26806922 100644 --- a/geoengine/ml.py +++ b/geoengine/ml.py @@ -5,16 +5,17 @@ from pathlib import Path import tempfile from dataclasses import dataclass +import geoengine_openapi_client.models from onnx import TypeProto, TensorProto, ModelProto from onnx.helper import tensor_dtype_to_string -from geoengine_openapi_client.models import MlModelMetadata, MlModel, RasterDataType +from geoengine_openapi_client.models import MlModelMetadata, MlModel, RasterDataType, TensorShape3D import geoengine_openapi_client from geoengine.auth import get_session from geoengine.datasets import UploadId from geoengine.error import InputException -@dataclass +@ dataclass class MlModelConfig: '''Configuration for an ml model''' name: str @@ -34,7 +35,8 @@ def register_ml_model(onnx_model: ModelProto, onnx_model, input_type=model_config.metadata.input_type, output_type=model_config.metadata.output_type, - num_input_bands=model_config.metadata.num_input_bands, + input_shape=model_config.metadata.input_shape, + out_shape=model_config.metadata.output_shape ) session = get_session() @@ -62,7 +64,8 @@ def register_ml_model(onnx_model: ModelProto, def validate_model_config(onnx_model: ModelProto, *, input_type: RasterDataType, output_type: RasterDataType, - num_input_bands: int): + input_shape: TensorShape3D, + out_shape: TensorShape3D): '''Validates the model config. Raises an exception if the model config is invalid''' def check_data_type(data_type: TypeProto, expected_type: RasterDataType, prefix: 'str'): @@ -80,6 +83,13 @@ def check_data_type(data_type: TypeProto, expected_type: RasterDataType, prefix: if domain.version != 9: raise InputException('Only ONNX models with opset version 9 are supported') + if input_shape.x != input_shape.y: + raise InputException('Currently only input shapes with x==y are allowed') + if out_shape.x != out_shape.y: + raise InputException('Currently only output shapes with x==y are allowed') + if out_shape.attributes != 1: + raise InputException('Currently only output shapes with one attribute/band allowed') + model_inputs = onnx_model.graph.input model_outputs = onnx_model.graph.output @@ -87,18 +97,58 @@ def check_data_type(data_type: TypeProto, expected_type: RasterDataType, prefix: raise InputException('Models with multiple inputs are not supported') check_data_type(model_inputs[0].type, input_type, 'input') - dims = model_inputs[0].type.tensor_type.shape.dim - if len(dims) != 2: - raise InputException('Only 2D input tensors are supported') - if not dims[1].dim_value: - raise InputException('Dimension 1 of the input tensor must have a length') - if dims[1].dim_value != num_input_bands: - raise InputException(f'Model input has {dims[1].dim_value} bands, but {num_input_bands} bands are expected') + dim = model_inputs[0].type.tensor_type.shape.dim + + if len(dim) == 2: + if not dim[1].dim_value: + raise InputException('Dimension 1 of a 1D input tensor must have a length') + if dim[1].dim_value != input_shape.attributes: + raise InputException(f'Model input has {dim[1].dim_value} bands, but {input_shape.attributes} are expected') + elif len(dim) == 4: + if not dim[1].dim_value: + raise InputException('Dimension 1 of the a 3D input tensor must have a length') + if not dim[2].dim_value: + raise InputException('Dimension 2 of the a 3D input tensor must have a length') + if not dim[3].dim_value: + raise InputException('Dimension 3 of the a 3D input tensor must have a length') + if dim[1].dim_value != input_shape.attributes: + raise InputException(f'Model input has {dim[1].dim_value} y size, but {input_shape.y} are expected') + if dim[2].dim_value != input_shape.attributes: + raise InputException(f'Model input has {dim[2].dim_value} x size, but {input_shape.x} are expected') + if dim[3].dim_value != input_shape.attributes: + raise InputException(f'Model input has {dim[3].dim_value} bands, but {input_shape.attributes} are expected') + else: + raise InputException('Only 1D and 3D input tensors are supported') if len(model_outputs) < 1: raise InputException('Models with no outputs are not supported') check_data_type(model_outputs[0].type, output_type, 'output') + dim = model_outputs[0].type.tensor_type.shape.dim + + if len(dim) == 1: + pass # this is a happens if there is only a single out? so shape would be [-1] + elif len(dim) == 2: + if not dim[1].dim_value: + raise InputException('Dimension 1 of a 1D input tensor must have a length') + if dim[1].dim_value != 1: + raise InputException(f'Model output has {dim[1].dim_value} bands, but {out_shape.attributes} are expected') + elif len(dim) == 4: + if not dim[1].dim_value: + raise InputException('Dimension 1 of the a 3D input tensor must have a length') + if not dim[2].dim_value: + raise InputException('Dimension 2 of the a 3D input tensor must have a length') + if not dim[3].dim_value: + raise InputException('Dimension 3 of the a 3D input tensor must have a length') + if dim[1].dim_value != out_shape.attributes: + raise InputException(f'Model output has {dim[1].dim_value} y size, but {out_shape.y} are expected') + if dim[2].dim_value != out_shape.attributes: + raise InputException(f'Model output has {dim[2].dim_value} x size, but {out_shape.x} are expected') + if dim[3].dim_value != out_shape.attributes: + raise InputException(f'Model output has {dim[3].dim_value} bands, but {out_shape.attributes} are expected') + else: + raise InputException('Only 1D and 3D output tensors are supported') + RASTER_TYPE_TO_ONNX_TYPE = { RasterDataType.F32: TensorProto.FLOAT, diff --git a/setup.cfg b/setup.cfg index 675ed7c4..6cdc4ea7 100644 --- a/setup.cfg +++ b/setup.cfg @@ -18,7 +18,7 @@ package_dir = packages = find: python_requires = >=3.9 install_requires = - geoengine-openapi-client == 0.0.17 + geoengine-openapi-client @ git+https://github.com/geo-engine/openapi-client@ml-model-input-output-shape#subdirectory=python geopandas >=0.9,<0.15 matplotlib >=3.5,<3.8 numpy >=1.21,<2 diff --git a/tests/test_ml.py b/tests/test_ml.py index 88a7865c..d5ef9e20 100644 --- a/tests/test_ml.py +++ b/tests/test_ml.py @@ -4,7 +4,7 @@ from sklearn.ensemble import RandomForestClassifier from skl2onnx import to_onnx import numpy as np -from geoengine_openapi_client.models import MlModelMetadata, RasterDataType +from geoengine_openapi_client.models import MlModelMetadata, RasterDataType, TensorShape3D import geoengine as ge from . import UrllibMocker @@ -48,8 +48,17 @@ def test_uploading_onnx_model(self): "metadata": { "fileName": "model.onnx", "inputType": "F32", - "numInputBands": 2, - "outputType": "I64" + "outputType": "I64", + "inputShape": { + "y": 1, + "x": 1, + "attributes": 2 + }, + "outputShape": { + "y": 1, + "x": 1, + "attributes": 1 + } }, "name": "foo", "upload": upload_id @@ -65,8 +74,9 @@ def test_uploading_onnx_model(self): metadata=MlModelMetadata( file_name="model.onnx", input_type=RasterDataType.F32, - num_input_bands=2, output_type=RasterDataType.I64, + input_shape=TensorShape3D(y=1, x=1, attributes=2), + output_shape=TensorShape3D(y=1, x=1, attributes=1) ), display_name="Decision Tree", description="A simple decision tree model", @@ -81,8 +91,9 @@ def test_uploading_onnx_model(self): metadata=MlModelMetadata( file_name="model.onnx", input_type=RasterDataType.F32, - num_input_bands=4, output_type=RasterDataType.I64, + input_shape=TensorShape3D(y=1, x=1, attributes=4), + output_shape=TensorShape3D(y=1, x=1, attributes=1) ), display_name="Decision Tree", description="A simple decision tree model", @@ -90,7 +101,7 @@ def test_uploading_onnx_model(self): ) self.assertEqual( str(exception.exception), - 'Model input has 2 bands, but 4 bands are expected' + 'Model input has 2 bands, but 4 are expected' ) with self.assertRaises(ge.InputException) as exception: @@ -101,8 +112,9 @@ def test_uploading_onnx_model(self): metadata=MlModelMetadata( file_name="model.onnx", input_type=RasterDataType.F64, - num_input_bands=2, output_type=RasterDataType.I64, + input_shape=TensorShape3D(y=1, x=1, attributes=2), + output_shape=TensorShape3D(y=1, x=1, attributes=1) ), display_name="Decision Tree", description="A simple decision tree model", @@ -121,8 +133,9 @@ def test_uploading_onnx_model(self): metadata=MlModelMetadata( file_name="model.onnx", input_type=RasterDataType.F32, - num_input_bands=2, output_type=RasterDataType.I32, + input_shape=TensorShape3D(y=1, x=1, attributes=2), + output_shape=TensorShape3D(y=1, x=1, attributes=1) ), display_name="Decision Tree", description="A simple decision tree model", @@ -141,8 +154,9 @@ def test_uploading_onnx_model(self): metadata=MlModelMetadata( file_name="model.onnx", input_type=RasterDataType.F32, - num_input_bands=2, output_type=RasterDataType.I64, + input_shape=TensorShape3D(y=1, x=1, attributes=2), + output_shape=TensorShape3D(y=1, x=1, attributes=1) ), display_name="Decision Tree", description="A simple decision tree model", From b7bd11201dc457c4631139cc122572b40f1c1ee9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20Dr=C3=B6nner?= Date: Tue, 26 Nov 2024 11:47:57 +0100 Subject: [PATCH 02/14] update ml_pipeline example --- examples/ml_pipeline.ipynb | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/examples/ml_pipeline.ipynb b/examples/ml_pipeline.ipynb index 1472eb5e..adf52094 100644 --- a/examples/ml_pipeline.ipynb +++ b/examples/ml_pipeline.ipynb @@ -9,14 +9,14 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import geoengine as ge\n", "from geoengine.ml import MlModelConfig\n", "\n", - "from geoengine_openapi_client.models import MlModelMetadata, RasterDataType\n", + "from geoengine_openapi_client.models import MlModelMetadata, RasterDataType, TensorShape3D\n", "\n", "from sklearn.tree import DecisionTreeClassifier\n", "import numpy as np\n", @@ -25,7 +25,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -41,7 +41,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 3, "metadata": {}, "outputs": [ { @@ -77,7 +77,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -88,8 +88,9 @@ "metadata = MlModelMetadata(\n", " file_name=\"model.onnx\",\n", " input_type=RasterDataType.F32,\n", - " num_input_bands=2,\n", " output_type=RasterDataType.I64,\n", + " input_shape=TensorShape3D(y=1, x=1, attributes=2),\n", + " output_shape=TensorShape3D(y=1, x=1, attributes=1)\n", ")\n", "\n", "model_config = MlModelConfig(\n", @@ -113,12 +114,12 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 5, "metadata": {}, "outputs": [ { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "
" ] From d5bc65e043f638316b99f87e1821db2c84e5c9f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20Dr=C3=B6nner?= Date: Tue, 26 Nov 2024 12:49:14 +0100 Subject: [PATCH 03/14] ml model validierung also allow x,y shaped 1d output --- geoengine/ml.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/geoengine/ml.py b/geoengine/ml.py index 679ab4a9..2b4488e7 100644 --- a/geoengine/ml.py +++ b/geoengine/ml.py @@ -98,9 +98,9 @@ def check_data_type(data_type: TypeProto, expected_type: RasterDataType, prefix: raise InputException('Dimension 2 of the a 3D input tensor must have a length') if not dim[3].dim_value: raise InputException('Dimension 3 of the a 3D input tensor must have a length') - if dim[1].dim_value != input_shape.attributes: + if dim[1].dim_value != input_shape.y: raise InputException(f'Model input has {dim[1].dim_value} y size, but {input_shape.y} are expected') - if dim[2].dim_value != input_shape.attributes: + if dim[2].dim_value != input_shape.x: raise InputException(f'Model input has {dim[2].dim_value} x size, but {input_shape.x} are expected') if dim[3].dim_value != input_shape.attributes: raise InputException(f'Model input has {dim[3].dim_value} bands, but {input_shape.attributes} are expected') @@ -112,7 +112,6 @@ def check_data_type(data_type: TypeProto, expected_type: RasterDataType, prefix: check_data_type(model_outputs[0].type, output_type, 'output') dim = model_outputs[0].type.tensor_type.shape.dim - if len(dim) == 1: pass # this is a happens if there is only a single out? so shape would be [-1] elif len(dim) == 2: @@ -120,6 +119,15 @@ def check_data_type(data_type: TypeProto, expected_type: RasterDataType, prefix: raise InputException('Dimension 1 of a 1D input tensor must have a length') if dim[1].dim_value != 1: raise InputException(f'Model output has {dim[1].dim_value} bands, but {out_shape.attributes} are expected') + elif len(dim) == 3: + if not dim[1].dim_value: + raise InputException('Dimension 1 of a 3D input tensor must have a length') + if not dim[2].dim_value: + raise InputException('Dimension 2 of a 3D input tensor must have a length') + if dim[1].dim_value != out_shape.y: + raise InputException(f'Model output has {dim[1].dim_value} y size, but {out_shape.y} are expected') + if dim[2].dim_value != out_shape.x: + raise InputException(f'Model output has {dim[2].dim_value} x size, but {out_shape.x} are expected') elif len(dim) == 4: if not dim[1].dim_value: raise InputException('Dimension 1 of the a 3D input tensor must have a length') @@ -127,9 +135,9 @@ def check_data_type(data_type: TypeProto, expected_type: RasterDataType, prefix: raise InputException('Dimension 2 of the a 3D input tensor must have a length') if not dim[3].dim_value: raise InputException('Dimension 3 of the a 3D input tensor must have a length') - if dim[1].dim_value != out_shape.attributes: + if dim[1].dim_value != out_shape.y: raise InputException(f'Model output has {dim[1].dim_value} y size, but {out_shape.y} are expected') - if dim[2].dim_value != out_shape.attributes: + if dim[2].dim_value != out_shape.x: raise InputException(f'Model output has {dim[2].dim_value} x size, but {out_shape.x} are expected') if dim[3].dim_value != out_shape.attributes: raise InputException(f'Model output has {dim[3].dim_value} bands, but {out_shape.attributes} are expected') From 1a5600bd41192caf79fad19088c3c14da7df4bd3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20Dr=C3=B6nner?= Date: Fri, 7 Feb 2025 10:08:34 +0100 Subject: [PATCH 04/14] rename MlModel 3DTensorShape attrbutes to bands --- examples/ml_pipeline.ipynb | 4 ++-- geoengine/ml.py | 14 +++++++------- tests/test_ml.py | 12 ++++++------ 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/examples/ml_pipeline.ipynb b/examples/ml_pipeline.ipynb index adf52094..c955137f 100644 --- a/examples/ml_pipeline.ipynb +++ b/examples/ml_pipeline.ipynb @@ -89,8 +89,8 @@ " file_name=\"model.onnx\",\n", " input_type=RasterDataType.F32,\n", " output_type=RasterDataType.I64,\n", - " input_shape=TensorShape3D(y=1, x=1, attributes=2),\n", - " output_shape=TensorShape3D(y=1, x=1, attributes=1)\n", + " input_shape=TensorShape3D(y=1, x=1, bands=2),\n", + " output_shape=TensorShape3D(y=1, x=1, bands=1)\n", ")\n", "\n", "model_config = MlModelConfig(\n", diff --git a/geoengine/ml.py b/geoengine/ml.py index ff3d7d0d..118de4bf 100644 --- a/geoengine/ml.py +++ b/geoengine/ml.py @@ -123,8 +123,8 @@ def check_data_type(data_type: TypeProto, expected_type: RasterDataType, prefix: if len(dim) == 2: if not dim[1].dim_value: raise InputException('Dimension 1 of a 1D input tensor must have a length') - if dim[1].dim_value != input_shape.attributes: - raise InputException(f'Model input has {dim[1].dim_value} bands, but {input_shape.attributes} are expected') + if dim[1].dim_value != input_shape.bands: + raise InputException(f'Model input has {dim[1].dim_value} bands, but {input_shape.bands} are expected') elif len(dim) == 4: if not dim[1].dim_value: raise InputException('Dimension 1 of the a 3D input tensor must have a length') @@ -136,8 +136,8 @@ def check_data_type(data_type: TypeProto, expected_type: RasterDataType, prefix: raise InputException(f'Model input has {dim[1].dim_value} y size, but {input_shape.y} are expected') if dim[2].dim_value != input_shape.x: raise InputException(f'Model input has {dim[2].dim_value} x size, but {input_shape.x} are expected') - if dim[3].dim_value != input_shape.attributes: - raise InputException(f'Model input has {dim[3].dim_value} bands, but {input_shape.attributes} are expected') + if dim[3].dim_value != input_shape.bands: + raise InputException(f'Model input has {dim[3].dim_value} bands, but {input_shape.bands} are expected') else: raise InputException('Only 1D and 3D input tensors are supported') @@ -152,7 +152,7 @@ def check_data_type(data_type: TypeProto, expected_type: RasterDataType, prefix: if not dim[1].dim_value: raise InputException('Dimension 1 of a 1D input tensor must have a length') if dim[1].dim_value != 1: - raise InputException(f'Model output has {dim[1].dim_value} bands, but {out_shape.attributes} are expected') + raise InputException(f'Model output has {dim[1].dim_value} bands, but {out_shape.bands} are expected') elif len(dim) == 3: if not dim[1].dim_value: raise InputException('Dimension 1 of a 3D input tensor must have a length') @@ -173,8 +173,8 @@ def check_data_type(data_type: TypeProto, expected_type: RasterDataType, prefix: raise InputException(f'Model output has {dim[1].dim_value} y size, but {out_shape.y} are expected') if dim[2].dim_value != out_shape.x: raise InputException(f'Model output has {dim[2].dim_value} x size, but {out_shape.x} are expected') - if dim[3].dim_value != out_shape.attributes: - raise InputException(f'Model output has {dim[3].dim_value} bands, but {out_shape.attributes} are expected') + if dim[3].dim_value != out_shape.bands: + raise InputException(f'Model output has {dim[3].dim_value} bands, but {out_shape.bands} are expected') else: raise InputException('Only 1D and 3D output tensors are supported') diff --git a/tests/test_ml.py b/tests/test_ml.py index 7c0c603f..df9fe7c2 100644 --- a/tests/test_ml.py +++ b/tests/test_ml.py @@ -79,8 +79,8 @@ def test_uploading_onnx_model(self): file_name="model.onnx", input_type=RasterDataType.F32, output_type=RasterDataType.I64, - input_shape=TensorShape3D(y=1, x=1, attributes=4), - output_shape=TensorShape3D(y=1, x=1, attributes=1) + input_shape=TensorShape3D(y=1, x=1, bands=4), + output_shape=TensorShape3D(y=1, x=1, bands=1) ), display_name="Decision Tree", description="A simple decision tree model", @@ -100,8 +100,8 @@ def test_uploading_onnx_model(self): file_name="model.onnx", input_type=RasterDataType.F64, output_type=RasterDataType.I64, - input_shape=TensorShape3D(y=1, x=1, attributes=2), - output_shape=TensorShape3D(y=1, x=1, attributes=1) + input_shape=TensorShape3D(y=1, x=1, bands=2), + output_shape=TensorShape3D(y=1, x=1, bands=1) ), display_name="Decision Tree", description="A simple decision tree model", @@ -121,8 +121,8 @@ def test_uploading_onnx_model(self): file_name="model.onnx", input_type=RasterDataType.F32, output_type=RasterDataType.I32, - input_shape=TensorShape3D(y=1, x=1, attributes=2), - output_shape=TensorShape3D(y=1, x=1, attributes=1) + input_shape=TensorShape3D(y=1, x=1, bands=2), + output_shape=TensorShape3D(y=1, x=1, bands=1) ), display_name="Decision Tree", description="A simple decision tree model", From b884da95b0731b5b4810fc89a70ac6aee838eab0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20Dr=C3=B6nner?= Date: Thu, 8 May 2025 17:12:06 +0200 Subject: [PATCH 05/14] update openapi branch --- .github/.backend_git_ref | 2 +- examples/ml_pipeline.ipynb | 6 +++--- geoengine/ml.py | 6 +++--- setup.cfg | 2 +- tests/test_ml.py | 2 +- 5 files changed, 9 insertions(+), 9 deletions(-) diff --git a/.github/.backend_git_ref b/.github/.backend_git_ref index ba2906d0..476ae44d 100644 --- a/.github/.backend_git_ref +++ b/.github/.backend_git_ref @@ -1 +1 @@ -main +e07f65bba004f23fe5bb957042bc71232b1b1c21 diff --git a/examples/ml_pipeline.ipynb b/examples/ml_pipeline.ipynb index c955137f..b88bf5c8 100644 --- a/examples/ml_pipeline.ipynb +++ b/examples/ml_pipeline.ipynb @@ -9,14 +9,14 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import geoengine as ge\n", "from geoengine.ml import MlModelConfig\n", "\n", - "from geoengine_openapi_client.models import MlModelMetadata, RasterDataType, TensorShape3D\n", + "from geoengine_openapi_client.models import MlModelMetadata, RasterDataType, MlTensorShape3D as TensorShape3D\n", "\n", "from sklearn.tree import DecisionTreeClassifier\n", "import numpy as np\n", @@ -180,7 +180,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.12" + "version": "3.12.3" } }, "nbformat": 4, diff --git a/geoengine/ml.py b/geoengine/ml.py index 118de4bf..a3b7e16d 100644 --- a/geoengine/ml.py +++ b/geoengine/ml.py @@ -9,7 +9,7 @@ import geoengine_openapi_client.models from onnx import TypeProto, TensorProto, ModelProto from onnx.helper import tensor_dtype_to_string -from geoengine_openapi_client.models import MlModelMetadata, MlModel, RasterDataType, TensorShape3D +from geoengine_openapi_client.models import MlModelMetadata, MlModel, RasterDataType, MlTensorShape3D import geoengine_openapi_client from geoengine.auth import get_session from geoengine.datasets import UploadId @@ -98,8 +98,8 @@ def register_ml_model(onnx_model: ModelProto, def validate_model_config(onnx_model: ModelProto, *, input_type: RasterDataType, output_type: RasterDataType, - input_shape: TensorShape3D, - out_shape: TensorShape3D): + input_shape: MlTensorShape3D, + out_shape: MlTensorShape3D): '''Validates the model config. Raises an exception if the model config is invalid''' def check_data_type(data_type: TypeProto, expected_type: RasterDataType, prefix: 'str'): diff --git a/setup.cfg b/setup.cfg index 34557edc..c02d44f6 100644 --- a/setup.cfg +++ b/setup.cfg @@ -18,7 +18,7 @@ package_dir = packages = find: python_requires = >=3.9 install_requires = - geoengine-openapi-client @ git+https://github.com/geo-engine/openapi-client@ml-model-input-output-shape#subdirectory=python + geoengine-openapi-client @ git+https://github.com/geo-engine/openapi-client@ml-model-input-outpt-shape-2#subdirectory=python geopandas >=0.9,<0.15 matplotlib >=3.5,<3.8 numpy >=1.21,<2.1 diff --git a/tests/test_ml.py b/tests/test_ml.py index df9fe7c2..45f3914a 100644 --- a/tests/test_ml.py +++ b/tests/test_ml.py @@ -4,7 +4,7 @@ from sklearn.ensemble import RandomForestClassifier from skl2onnx import to_onnx import numpy as np -from geoengine_openapi_client.models import MlModelMetadata, RasterDataType, TensorShape3D +from geoengine_openapi_client.models import MlModelMetadata, RasterDataType, MlTensorShape3D as TensorShape3D import geoengine as ge from tests.ge_test import GeoEngineTestInstance From c45bf1324ed20219ebbf4c9e135c0a2af2d9eb6b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20Dr=C3=B6nner?= Date: Wed, 14 May 2025 22:09:55 +0200 Subject: [PATCH 06/14] change ml model verification --- .github/.backend_git_ref | 2 +- examples/expression.ipynb | 2 +- geoengine/ml.py | 101 ++++++++++++++++++-------------------- 3 files changed, 51 insertions(+), 54 deletions(-) diff --git a/.github/.backend_git_ref b/.github/.backend_git_ref index 476ae44d..76003954 100644 --- a/.github/.backend_git_ref +++ b/.github/.backend_git_ref @@ -1 +1 @@ -e07f65bba004f23fe5bb957042bc71232b1b1c21 +a8d61cd3e5d0cddf82d96609ace7812e58899db2 diff --git a/examples/expression.ipynb b/examples/expression.ipynb index ccbf9140..572e3529 100644 --- a/examples/expression.ipynb +++ b/examples/expression.ipynb @@ -397,7 +397,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.12" + "version": "3.12.3" } }, "nbformat": 4, diff --git a/geoengine/ml.py b/geoengine/ml.py index 7ea8739e..4ac87a4b 100644 --- a/geoengine/ml.py +++ b/geoengine/ml.py @@ -38,6 +38,7 @@ def register_ml_model(onnx_model: ModelProto, input_shape=model_config.metadata.input_shape, out_shape=model_config.metadata.output_shape ) + check_backend_constraints(model_config.metadata.input_shape, model_config.metadata.output_shape) session = get_session() @@ -62,6 +63,48 @@ def register_ml_model(onnx_model: ModelProto, return MlModelName.from_response(res_name) +def model_dim_to_tensorshape(model_dims): + '''Transform an ONNX dimension into a MlTensorShape3D''' + mts = MlTensorShape3D(x=1, y=1, bands=1) + if len(model_dims) == 1 and model_dims[0] > 0: + mts.bands = model_dims[0] + elif len(model_dims) == 2: + if model_dims[0] in (-1, 1): + mts.bands = model_dims[1] + else: + mts.y = model_dims[1] + mts.x = model_dims[2] + elif len(model_dims) == 3: + if model_dims[0] in (-1, 1): + mts.y = model_dims[1] + mts.x = model_dims[2] + else: + mts.y = model_dims[0] + mts.x = model_dims[1] + mts.bands = model_dims[2] + elif len(model_dims) == 4 and model_dims[0] in (-1, 1): + mts.y = model_dims[1] + mts.x = model_dims[2] + mts.bands = model_dims[3] + else: + raise InputException('Only 1D and 3D input tensors are supported. Got model dim {model_dims}') + return mts + + +def check_backend_constraints(input_shape: MlTensorShape3D, output_shape: MlTensorShape3D, ge_tile_size=(512, 512)): + ''' Checks that the shapes match the constraintsof the backend''' + + if not ( + input_shape.x in [1, ge_tile_size[0]] and input_shape.y in [1, ge_tile_size[1]] and input_shape.bands > 0 + ): + raise InputException('Backend currently supports single pixel and full tile shaped input! Got {input_shape}!') + + if not ( + output_shape.x in [1, ge_tile_size[0]] and output_shape.y in [1, ge_tile_size[1]] and output_shape.bands > 0 + ): + raise InputException('Backend currently supports single pixel and full tile shaped Output! Got {input_shape}!') + + # pylint: disable=too-many-branches,too-many-statements def validate_model_config(onnx_model: ModelProto, *, input_type: RasterDataType, @@ -89,64 +132,18 @@ def check_data_type(data_type: TypeProto, expected_type: RasterDataType, prefix: check_data_type(model_inputs[0].type, input_type, 'input') dim = model_inputs[0].type.tensor_type.shape.dim - - if len(dim) == 2: - if not dim[1].dim_value: - raise InputException('Dimension 1 of a 1D input tensor must have a length') - if dim[1].dim_value != input_shape.bands: - raise InputException(f'Model input has {dim[1].dim_value} bands, but {input_shape.bands} are expected') - elif len(dim) == 4: - if not dim[1].dim_value: - raise InputException('Dimension 1 of the a 3D input tensor must have a length') - if not dim[2].dim_value: - raise InputException('Dimension 2 of the a 3D input tensor must have a length') - if not dim[3].dim_value: - raise InputException('Dimension 3 of the a 3D input tensor must have a length') - if dim[1].dim_value != input_shape.y: - raise InputException(f'Model input has {dim[1].dim_value} y size, but {input_shape.y} are expected') - if dim[2].dim_value != input_shape.x: - raise InputException(f'Model input has {dim[2].dim_value} x size, but {input_shape.x} are expected') - if dim[3].dim_value != input_shape.bands: - raise InputException(f'Model input has {dim[3].dim_value} bands, but {input_shape.bands} are expected') - else: - raise InputException('Only 1D and 3D input tensors are supported') + in_ts3d = model_dim_to_tensorshape(dim) + if not in_ts3d == input_shape: + raise InputException("Input shape {in_ts3d} and metadata {input_shape} not equal!") if len(model_outputs) < 1: raise InputException('Models with no outputs are not supported') check_data_type(model_outputs[0].type, output_type, 'output') dim = model_outputs[0].type.tensor_type.shape.dim - if len(dim) == 1: - pass # this is a happens if there is only a single out? so shape would be [-1] - elif len(dim) == 2: - if not dim[1].dim_value: - raise InputException('Dimension 1 of a 1D input tensor must have a length') - if dim[1].dim_value != 1: - raise InputException(f'Model output has {dim[1].dim_value} bands, but {out_shape.bands} are expected') - elif len(dim) == 3: - if not dim[1].dim_value: - raise InputException('Dimension 1 of a 3D input tensor must have a length') - if not dim[2].dim_value: - raise InputException('Dimension 2 of a 3D input tensor must have a length') - if dim[1].dim_value != out_shape.y: - raise InputException(f'Model output has {dim[1].dim_value} y size, but {out_shape.y} are expected') - if dim[2].dim_value != out_shape.x: - raise InputException(f'Model output has {dim[2].dim_value} x size, but {out_shape.x} are expected') - elif len(dim) == 4: - if not dim[1].dim_value: - raise InputException('Dimension 1 of the a 3D input tensor must have a length') - if not dim[2].dim_value: - raise InputException('Dimension 2 of the a 3D input tensor must have a length') - if not dim[3].dim_value: - raise InputException('Dimension 3 of the a 3D input tensor must have a length') - if dim[1].dim_value != out_shape.y: - raise InputException(f'Model output has {dim[1].dim_value} y size, but {out_shape.y} are expected') - if dim[2].dim_value != out_shape.x: - raise InputException(f'Model output has {dim[2].dim_value} x size, but {out_shape.x} are expected') - if dim[3].dim_value != out_shape.bands: - raise InputException(f'Model output has {dim[3].dim_value} bands, but {out_shape.bands} are expected') - else: - raise InputException('Only 1D and 3D output tensors are supported') + out_ts3d = model_dim_to_tensorshape(dim) + if not out_ts3d == out_shape: + raise InputException("Output shape {out_ts3d} and metadata {out_shape} not equal!") RASTER_TYPE_TO_ONNX_TYPE = { From 89c54df8ec5cde98c50323630c8adbe51ad00329 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20Dr=C3=B6nner?= Date: Thu, 15 May 2025 23:06:28 +0200 Subject: [PATCH 07/14] add more cases to model shape converer --- geoengine/ml.py | 46 +++++++++++++++++++++++++--------------------- tests/test_ml.py | 20 ++++++++++---------- 2 files changed, 35 insertions(+), 31 deletions(-) diff --git a/geoengine/ml.py b/geoengine/ml.py index 4ac87a4b..9b77cb14 100644 --- a/geoengine/ml.py +++ b/geoengine/ml.py @@ -65,29 +65,32 @@ def register_ml_model(onnx_model: ModelProto, def model_dim_to_tensorshape(model_dims): '''Transform an ONNX dimension into a MlTensorShape3D''' + mts = MlTensorShape3D(x=1, y=1, bands=1) - if len(model_dims) == 1 and model_dims[0] > 0: - mts.bands = model_dims[0] + if len(model_dims) == 1 and model_dims[0].dim_value in (-1, 0): + pass # in this case, the model will produce as many outs as inputs + elif len(model_dims) == 1 and model_dims[0].dim_value > 0: + mts.bands = model_dims[0].dim_value elif len(model_dims) == 2: - if model_dims[0] in (-1, 1): - mts.bands = model_dims[1] + if model_dims[0].dim_value in (None, -1, 0, 1): + mts.bands = model_dims[1].dim_value else: - mts.y = model_dims[1] - mts.x = model_dims[2] + mts.y = model_dims[0].dim_value + mts.x = model_dims[1].dim_value elif len(model_dims) == 3: - if model_dims[0] in (-1, 1): - mts.y = model_dims[1] - mts.x = model_dims[2] + if model_dims[0].dim_value in (None, -1, 0, 1): + mts.y = model_dims[1].dim_value + mts.x = model_dims[2].dim_value else: - mts.y = model_dims[0] - mts.x = model_dims[1] - mts.bands = model_dims[2] - elif len(model_dims) == 4 and model_dims[0] in (-1, 1): - mts.y = model_dims[1] - mts.x = model_dims[2] - mts.bands = model_dims[3] + mts.y = model_dims[0].dim_value + mts.x = model_dims[1].dim_value + mts.bands = model_dims[2].dim_value + elif len(model_dims) == 4 and model_dims[0].dim_value in (None, -1, 0, 1): + mts.y = model_dims[1].dim_value + mts.x = model_dims[2].dim_value + mts.bands = model_dims[3].dim_value else: - raise InputException('Only 1D and 3D input tensors are supported. Got model dim {model_dims}') + raise InputException(f'Only 1D and 3D input tensors are supported. Got model dim {model_dims}') return mts @@ -97,12 +100,12 @@ def check_backend_constraints(input_shape: MlTensorShape3D, output_shape: MlTens if not ( input_shape.x in [1, ge_tile_size[0]] and input_shape.y in [1, ge_tile_size[1]] and input_shape.bands > 0 ): - raise InputException('Backend currently supports single pixel and full tile shaped input! Got {input_shape}!') + raise InputException(f'Backend currently supports single pixel and full tile shaped input! Got {input_shape}!') if not ( output_shape.x in [1, ge_tile_size[0]] and output_shape.y in [1, ge_tile_size[1]] and output_shape.bands > 0 ): - raise InputException('Backend currently supports single pixel and full tile shaped Output! Got {input_shape}!') + raise InputException(f'Backend currently supports single pixel and full tile shaped Output! Got {input_shape}!') # pylint: disable=too-many-branches,too-many-statements @@ -132,9 +135,10 @@ def check_data_type(data_type: TypeProto, expected_type: RasterDataType, prefix: check_data_type(model_inputs[0].type, input_type, 'input') dim = model_inputs[0].type.tensor_type.shape.dim + in_ts3d = model_dim_to_tensorshape(dim) if not in_ts3d == input_shape: - raise InputException("Input shape {in_ts3d} and metadata {input_shape} not equal!") + raise InputException(f"Input shape {in_ts3d} and metadata {input_shape} not equal!") if len(model_outputs) < 1: raise InputException('Models with no outputs are not supported') @@ -143,7 +147,7 @@ def check_data_type(data_type: TypeProto, expected_type: RasterDataType, prefix: dim = model_outputs[0].type.tensor_type.shape.dim out_ts3d = model_dim_to_tensorshape(dim) if not out_ts3d == out_shape: - raise InputException("Output shape {out_ts3d} and metadata {out_shape} not equal!") + raise InputException(f"Output shape {out_ts3d} and metadata {out_shape} not equal!") RASTER_TYPE_TO_ONNX_TYPE = { diff --git a/tests/test_ml.py b/tests/test_ml.py index 7bc6d30a..098e74b6 100644 --- a/tests/test_ml.py +++ b/tests/test_ml.py @@ -4,7 +4,7 @@ from sklearn.ensemble import RandomForestClassifier from skl2onnx import to_onnx import numpy as np -from geoengine_openapi_client.models import MlModelMetadata, RasterDataType, MlTensorShape3D as TensorShape3D +from geoengine_openapi_client.models import MlModelMetadata, RasterDataType, MlTensorShape3D import geoengine as ge from tests.ge_test import GeoEngineTestInstance @@ -41,8 +41,8 @@ def test_uploading_onnx_model(self): file_name="model.onnx", input_type=RasterDataType.F32, output_type=RasterDataType.I64, - input_shape=TensorShape3D(y=1, x=1, bands=2), - output_shape=TensorShape3D(y=1, x=1, bands=1) + input_shape=MlTensorShape3D(y=1, x=1, bands=2), + output_shape=MlTensorShape3D(y=1, x=1, bands=1) ), display_name="Decision Tree", description="A simple decision tree model", @@ -79,8 +79,8 @@ def test_uploading_onnx_model(self): file_name="model.onnx", input_type=RasterDataType.F32, output_type=RasterDataType.I64, - input_shape=TensorShape3D(y=1, x=1, bands=4), - output_shape=TensorShape3D(y=1, x=1, bands=1) + input_shape=MlTensorShape3D(y=1, x=1, bands=4), + output_shape=MlTensorShape3D(y=1, x=1, bands=1) ), display_name="Decision Tree", description="A simple decision tree model", @@ -88,7 +88,7 @@ def test_uploading_onnx_model(self): ) self.assertEqual( str(exception.exception), - 'Model input has 2 bands, but 4 are expected' + 'Input shape bands=2 x=1 y=1 and metadata bands=4 x=1 y=1 not equal!' ) with self.assertRaises(ge.InputException) as exception: @@ -100,8 +100,8 @@ def test_uploading_onnx_model(self): file_name="model.onnx", input_type=RasterDataType.F64, output_type=RasterDataType.I64, - input_shape=TensorShape3D(y=1, x=1, bands=2), - output_shape=TensorShape3D(y=1, x=1, bands=1) + input_shape=MlTensorShape3D(y=1, x=1, bands=2), + output_shape=MlTensorShape3D(y=1, x=1, bands=1) ), display_name="Decision Tree", description="A simple decision tree model", @@ -121,8 +121,8 @@ def test_uploading_onnx_model(self): file_name="model.onnx", input_type=RasterDataType.F32, output_type=RasterDataType.I32, - input_shape=TensorShape3D(y=1, x=1, bands=2), - output_shape=TensorShape3D(y=1, x=1, bands=1) + input_shape=MlTensorShape3D(y=1, x=1, bands=2), + output_shape=MlTensorShape3D(y=1, x=1, bands=1) ), display_name="Decision Tree", description="A simple decision tree model", From 96547dcd4da16f7c32170d5d6f112ee91ad765c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20Dr=C3=B6nner?= Date: Thu, 15 May 2025 23:15:46 +0200 Subject: [PATCH 08/14] update backend ref --- .github/.backend_git_ref | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/.backend_git_ref b/.github/.backend_git_ref index 76003954..f27dfe59 100644 --- a/.github/.backend_git_ref +++ b/.github/.backend_git_ref @@ -1 +1 @@ -a8d61cd3e5d0cddf82d96609ace7812e58899db2 +e85582875c0d476c5239d44853b0301e800b0fb2 From dde2c0d42b0906e0a7bd17a7e17f3f3433f7144d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20Dr=C3=B6nner?= Date: Thu, 15 May 2025 23:45:26 +0200 Subject: [PATCH 09/14] fix onnx to 0.17 --- setup.cfg | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.cfg b/setup.cfg index 9a5cc53b..927c254b 100644 --- a/setup.cfg +++ b/setup.cfg @@ -36,6 +36,7 @@ install_requires = urllib3 >= 2.1, < 2.4 pydantic >= 2.10.6, < 2.11 skl2onnx >=1.17,<2 + onnx == 1.17 [options.extras_require] dev = From b71c8f202312979b3e8eb1565e6d54f2382704c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20Dr=C3=B6nner?= Date: Fri, 16 May 2025 12:49:18 +0200 Subject: [PATCH 10/14] update backend ref --- .github/.backend_git_ref | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/.backend_git_ref b/.github/.backend_git_ref index f27dfe59..3450ef68 100644 --- a/.github/.backend_git_ref +++ b/.github/.backend_git_ref @@ -1 +1 @@ -e85582875c0d476c5239d44853b0301e800b0fb2 +cdb162df11bff5a3ae5854126d15538c77a2cabb From da6ee108a83cc1fbbbbdbab1f37bb4c4128257c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20Dr=C3=B6nner?= Date: Fri, 16 May 2025 17:14:14 +0200 Subject: [PATCH 11/14] more tests --- tests/test_ml.py | 47 +++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 45 insertions(+), 2 deletions(-) diff --git a/tests/test_ml.py b/tests/test_ml.py index 098e74b6..2ed0b69d 100644 --- a/tests/test_ml.py +++ b/tests/test_ml.py @@ -1,20 +1,63 @@ '''Tests ML functionality''' +from typing import List import unittest +from onnx import TensorShapeProto as TSP from sklearn.ensemble import RandomForestClassifier from skl2onnx import to_onnx import numpy as np from geoengine_openapi_client.models import MlModelMetadata, RasterDataType, MlTensorShape3D import geoengine as ge +from geoengine.ml import model_dim_to_tensorshape from tests.ge_test import GeoEngineTestInstance -class WorkflowStorageTests(unittest.TestCase): - '''Test methods for storing workflows as datasets''' +class MlModelTests(unittest.TestCase): + '''Test methods for MlModels''' def setUp(self) -> None: ge.reset(False) + def test_model_dim_to_tensorshape(self): + ''' Test model_dim_to_tensorshape ''' + + dim_1d: List[TSP.Dimension] = [TSP.Dimension(dim_value=7)] + mts_1d = MlTensorShape3D(bands=7, y=1, x=1) + self.assertEqual(model_dim_to_tensorshape(dim_1d), mts_1d) + + dim_1d_v: List[TSP.Dimension] = [TSP.Dimension(dim_value=None), TSP.Dimension(dim_value=7)] + mts_1d_v = MlTensorShape3D(bands=7, y=1, x=1) + self.assertEqual(model_dim_to_tensorshape(dim_1d_v), mts_1d_v) + + dim_2d_t: List[TSP.Dimension] = [TSP.Dimension(dim_value=512), TSP.Dimension(dim_value=512)] + mts_2d_t = MlTensorShape3D(bands=1, y=512, x=512) + self.assertEqual(model_dim_to_tensorshape(dim_2d_t), mts_2d_t) + + dim_2d_1: List[TSP.Dimension] = [TSP.Dimension(dim_value=1), TSP.Dimension(dim_value=7)] + mts_2d_1 = MlTensorShape3D(bands=7, y=1, x=1) + self.assertEqual(model_dim_to_tensorshape(dim_2d_1), mts_2d_1) + + dim_3d_t: List[TSP.Dimension] = [ + TSP.Dimension(dim_value=512), TSP.Dimension(dim_value=512), TSP.Dimension(dim_value=7) + ] + mts_3d_t = MlTensorShape3D(bands=7, y=512, x=512) + self.assertEqual(model_dim_to_tensorshape(dim_3d_t), mts_3d_t) + + dim_3d_v: List[TSP.Dimension] = [ + TSP.Dimension(dim_value=None), TSP.Dimension(dim_value=512), TSP.Dimension(dim_value=512) + ] + mts_3d_v = MlTensorShape3D(bands=1, y=512, x=512) + self.assertEqual(model_dim_to_tensorshape(dim_3d_v), mts_3d_v) + + dim_4d_v: List[TSP.Dimension] = [ + TSP.Dimension(dim_value=None), + TSP.Dimension(dim_value=512), + TSP.Dimension(dim_value=512), + TSP.Dimension(dim_value=4) + ] + mts_4d_v = MlTensorShape3D(bands=4, y=512, x=512) + self.assertEqual(model_dim_to_tensorshape(dim_4d_v), mts_4d_v) + def test_uploading_onnx_model(self): clf = RandomForestClassifier(random_state=42) From 7c0c6c5ae47bb424ffa209798748128b38680d93 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20Dr=C3=B6nner?= Date: Tue, 3 Jun 2025 09:43:27 +0200 Subject: [PATCH 12/14] update backend ref and openapi version --- .github/.backend_git_ref | 2 +- setup.cfg | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/.backend_git_ref b/.github/.backend_git_ref index 3450ef68..d242a7cf 100644 --- a/.github/.backend_git_ref +++ b/.github/.backend_git_ref @@ -1 +1 @@ -cdb162df11bff5a3ae5854126d15538c77a2cabb +7aadfa3 diff --git a/setup.cfg b/setup.cfg index 6c96276d..38448a5b 100644 --- a/setup.cfg +++ b/setup.cfg @@ -18,7 +18,7 @@ package_dir = packages = find: python_requires = >=3.10 install_requires = - geoengine-openapi-client @ git+https://github.com/geo-engine/openapi-client@ml-model-input-outpt-shape-2#subdirectory=python + geoengine-openapi-client == 0.0.25 geopandas >=1.0,<2.0 matplotlib >=3.5,<3.11 numpy >=1.21,<2.3 From a0216cdaae2dfd115cdc3f24cd0c6e8dcae04db0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20Dr=C3=B6nner?= Date: Tue, 3 Jun 2025 09:51:31 +0200 Subject: [PATCH 13/14] update backend ref --- .github/.backend_git_ref | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/.backend_git_ref b/.github/.backend_git_ref index d242a7cf..62fd2b5b 100644 --- a/.github/.backend_git_ref +++ b/.github/.backend_git_ref @@ -1 +1 @@ -7aadfa3 +7aadfa37aadfa383e6eee63442e366890dfb1160114caed From 48c6fee912bbe2ec4c15471fe3e8aeee8e5c3b14 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20Dr=C3=B6nner?= Date: Tue, 3 Jun 2025 10:32:58 +0200 Subject: [PATCH 14/14] update backend ref --- .github/.backend_git_ref | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/.backend_git_ref b/.github/.backend_git_ref index 62fd2b5b..a2296491 100644 --- a/.github/.backend_git_ref +++ b/.github/.backend_git_ref @@ -1 +1 @@ -7aadfa37aadfa383e6eee63442e366890dfb1160114caed +7aadfa383e6eee63442e366890dfb1160114caed