Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@ jobs:
run: uv sync --all-packages --frozen
- name: Ruff
run: uv run ruff format --check . && uv run ruff check .
- name: Pyright
run: uv run pyright .
- name: Ty
run: uv run ty check

tests:
name: Run tests
Expand Down
15 changes: 10 additions & 5 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v5.0.0
rev: v6.0.0
hooks:
- id: check-yaml
- id: end-of-file-fixer
- repo: https://github.com/tsvikas/sync-with-uv
rev: v0.4.0
rev: v0.5.0
hooks:
- id: sync-with-uv
- repo: https://github.com/charliermarsh/ruff-pre-commit
Expand All @@ -14,7 +14,12 @@ repos:
- id: ruff-check
args: [--fix, --exit-non-zero-on-fix]
- id: ruff-format
- repo: https://github.com/RobertCraigie/pyright-python
rev: v1.1.400
- repo: local
hooks:
- id: pyright
- id: ty-check
name: ty-check
language: python
entry: ty check
pass_filenames: false
args: [--python=.venv/]
additional_dependencies: [ty]
1 change: 1 addition & 0 deletions .vscode/extensions.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
{
"recommendations": [
"astral-sh.ty",
"ms-python.python",
"ms-python.vscode-pylance",
"charliermarsh.ruff",
Expand Down
1 change: 1 addition & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
}
},
"files.insertFinalNewline": true,
"python.languageServer": "None",
"python.testing.unittestEnabled": false,
"python.testing.pytestEnabled": true,
"python.analysis.autoImportCompletions": true,
Expand Down
6 changes: 3 additions & 3 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ uv run --package tilebox-datasets pytest tilebox-datasets
uv run ruff format . && uv run ruff check --fix .

# type checking:
uv run pyright .
uv run ty check
```

### Adding dependencies to one of the packages
Expand All @@ -54,8 +54,8 @@ uv add --package tilebox-datasets "numpy>=2"
### Used code quality tools

- [ruff](https://github.com/astral-sh/ruff) for linting and formatting
- [pyright](https://github.com/microsoft/pyright) for type checking
- [pre-commit](https://pre-commit.com/) for running all of the above automatically on each git commit
- [ty](github.com/astral-sh/ty) for type checking
- [prek](https://prek.j178.dev/) for running all of the above automatically on each git commit

## Protobuf usage

Expand Down
38 changes: 7 additions & 31 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,10 @@ dev = [
"pyarrow>=17.0.0",
# some dev tooling
"ruff>=0.11.10",
# pyright 1.1.401 reports many wrong false positives, let's wait until that is fixed before upgrading
"pyright>=1.1.379,<1.1.401",
"pre-commit>=3.8.0",
"types-protobuf>=6.30",
"junitparser>=3.2.0",
"ty>=0.0.11",
"prek>=0.2.27",
]

[project.scripts]
Expand Down Expand Up @@ -112,33 +111,10 @@ known-first-party = ["tilebox", "_tilebox"]
[tool.ruff.lint.per-file-ignores]
"*/tests/*" = ["INP001", "SLF001"]

[tool.pyright]
[tool.ty.src]
exclude = [
"**/.ipynb_checkpoints",
"**/__pycache__",
".venv",
"tilebox-datasets/tests/example_dataset/*", # auto-generated code
"tilebox-workflows/tests/proto/*", # auto-generated code
# auto-generated code
"**/*_pb2.py",
"**/*_pb2.pyi",
"**/*pb2_grpc.py"
]

# ignore warnings in those files, but still type check them when used as a dependency in other files
ignore = [
# it's auto generated
"**/datasets/v1",
"**/workflows/v1",
"**/tilebox/v1",
"**/buf/validate",
]

# pyright needs to have all the dependencies installed to be able to type check
# we can make sure of this by telling it to use the uv venv
venvPath = "."
venv = ".venv"
extraPaths = [
"tilebox-datasets",
"tilebox-grpc",
"tilebox-storage",
"tilebox-workflows",
]

reportPrivateImportUsage = false
2 changes: 1 addition & 1 deletion tilebox-datasets/tests/data/datapoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def example_datapoints(draw: DrawFn, generated_fields: bool = False, missing_fie
some_time=draw(datetime_messages() | maybe_none),
some_duration=draw(duration_messages() | maybe_none),
some_bytes=draw(binary(min_size=1, max_size=10) | maybe_none),
some_bool=draw(booleans() | maybe_none), # type: ignore[arg-type]
some_bool=draw(booleans() | maybe_none),
# well-known types
some_identifier=draw(uuid_messages() | maybe_none),
some_vec3=draw(vec3_messages() | maybe_none),
Expand Down
3 changes: 2 additions & 1 deletion tilebox-datasets/tilebox/datasets/message_pool.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from google.protobuf import descriptor_pb2, duration_pb2, timestamp_pb2
from google.protobuf.descriptor_pool import Default
from google.protobuf.message import Message
from google.protobuf.message_factory import GetMessageClass, GetMessages

from tilebox.datasets.data.datasets import AnnotatedType
Expand All @@ -25,5 +26,5 @@ def register_message_types(descriptor_set: descriptor_pb2.FileDescriptorSet) ->
GetMessages(descriptor_set.file, pool=Default())


def get_message_type(type_url: str) -> type:
def get_message_type(type_url: str) -> type[Message]:
return GetMessageClass(Default().FindMessageTypeByName(type_url))
6 changes: 4 additions & 2 deletions tilebox-datasets/tilebox/datasets/progress.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,8 @@ def _calc_progress_seconds(self, time: datetime) -> int:

def set_progress(self, time: datetime) -> None:
"""Set the progress of the progress bar to the given time"""
done = min(self._calc_progress_seconds(time), self._progress_bar.total)
total = self._calc_progress_seconds(self._interval.end)
done = min(self._calc_progress_seconds(time), total)
self._progress_bar.update(done - self._progress_bar.n)

def set_download_info(self, datapoints: int, byte_size: int, download_time: float) -> None:
Expand All @@ -79,7 +80,8 @@ def __exit__(
) -> None:
try:
if traceback is None:
self._progress_bar.update(self._progress_bar.total - self._progress_bar.n) # set to 100%
total = self._calc_progress_seconds(self._interval.end)
self._progress_bar.update(total - self._progress_bar.n) # set to 100%

self._progress_bar.close() # mark as completed or failed
except AttributeError:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from collections.abc import Sized
from collections.abc import Sequence
from datetime import timedelta
from typing import Any
from uuid import UUID
Expand All @@ -16,9 +16,10 @@
from tilebox.datasets.datasets.v1.well_known_types_pb2 import UUID as UUIDMessage # noqa: N811
from tilebox.datasets.datasets.v1.well_known_types_pb2 import Geometry, LatLon, LatLonAlt, Quaternion, Vec3

ProtoFieldValue = Message | float | str | bool | bytes | Sized | None
ScalarProtoFieldValue = Message | float | str | bool | bytes
ProtoFieldValue = ScalarProtoFieldValue | Sequence[ScalarProtoFieldValue] | None

_FILL_VALUES_BY_DTYPE = {
_FILL_VALUES_BY_DTYPE: dict[type[np.dtype[Any]], Any] = {
npdtypes.Int8DType: np.int8(0),
npdtypes.Int16DType: np.int16(0),
npdtypes.Int32DType: np.int32(0),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"""

import contextlib
from collections.abc import Sized
from collections.abc import Sequence
from typing import Any, TypeVar

import numpy as np
Expand Down Expand Up @@ -231,10 +231,10 @@ def resize(self, buffer_size: int) -> None:
elif buffer_size > len(self._data):
# resize the data buffer to the new capacity, by just padding it with zeros at the end
missing = buffer_size - len(self._data)
self._data = np.pad(
self._data = np.pad( # ty: ignore[no-matching-overload]
self._data,
((0, missing), (0, 0)),
constant_values=self._type.fill_value, # type: ignore[arg-type]
constant_values=self._type.fill_value,
)


Expand All @@ -258,13 +258,13 @@ def __init__(
self._array_dim: int | None = None

def __call__(self, index: int, value: ProtoFieldValue) -> None:
if not isinstance(value, Sized):
if not isinstance(value, Sequence):
raise TypeError(f"Expected array field but got {type(value)}")

if self._array_dim is None or len(value) > self._array_dim:
self._resize_array_dim(len(value))

for i, v in enumerate(value): # type: ignore[arg-type] # somehow the isinstance(value, Sized) isn't used here
for i, v in enumerate(value): # somehow the isinstance(value, Sized) isn't used here
self._data[index, i, :] = self._type.from_proto(v)

def finalize(
Expand Down Expand Up @@ -309,10 +309,10 @@ def _resize(self) -> None:
else: # resize the data buffer to the new capacity, by just padding it with zeros at the end
missing_capacity = self._capacity - self._data.shape[0]
missing_array_dim = self._array_dim - self._data.shape[1]
self._data = np.pad(
self._data = np.pad( # ty: ignore[no-matching-overload]
self._data,
((0, missing_capacity), (0, missing_array_dim), (0, 0)),
constant_values=self._type.fill_value, # type: ignore[arg-type]
constant_values=self._type.fill_value,
)


Expand Down Expand Up @@ -374,13 +374,13 @@ def _create_field_converter(field: FieldDescriptor) -> _FieldConverter:
"""
# special handling for enums:
if field.type == FieldDescriptor.TYPE_ENUM:
if field.is_repeated: # type: ignore[attr-defined]
if field.is_repeated:
raise NotImplementedError("Repeated enum fields are not supported")

return _EnumFieldConverter(field.name, enum_mapping_from_field_descriptor(field))

field_type = infer_field_type(field)
if field.is_repeated: # type: ignore[attr-defined]
if field.is_repeated:
return _ArrayFieldConverter(field.name, field_type)

return _SimpleFieldConverter(field.name, field_type)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@

def to_messages( # noqa: C901, PLR0912
data: IngestionData,
message_type: type,
message_type: type[Message],
required_fields: list[str] | None = None,
ignore_fields: list[str] | None = None,
) -> list[Message]:
Expand All @@ -44,9 +44,9 @@ def to_messages( # noqa: C901, PLR0912
# let's validate our fields, to make sure that they are all known fields for the given protobuf message
# and that they are all lists of the same length
field_lengths = defaultdict(list)
fields: dict[str, pd.Series | np.ndarray] = {}
fields: dict[str, pd.Series | np.ndarray | list[ProtoFieldValue]] = {}

field_names = list(map(str, data))
field_names = [str(field) for field in data]
if isinstance(data, xr.Dataset):
# list(dataset) only returns the variables, not the coords, so for xarray we need to add the coords as well
# but not all coords, we only care abou time for now
Expand Down Expand Up @@ -84,7 +84,7 @@ def to_messages( # noqa: C901, PLR0912
else:
values = convert_values_to_proto(values, field_type, filter_none=False)

fields[field_name] = values # type: ignore[assignment]
fields[field_name] = values

# now convert every datapoint to a protobuf message
if len(field_lengths) == 0: # early return, no actual data to convert
Expand All @@ -103,7 +103,7 @@ def marshal_messages(messages: list[Message]) -> list[bytes]:


def columnar_to_row_based(
data: dict[str, pd.Series | np.ndarray],
data: dict[str, pd.Series | np.ndarray | list[ProtoFieldValue]],
) -> Iterator[dict[str, Any]]:
if len(data) == 0:
return
Expand All @@ -126,12 +126,12 @@ def convert_values_to_proto(

def convert_repeated_values_to_proto(
values: np.ndarray | pd.Series | list[np.ndarray], field_type: ProtobufFieldType
) -> Any:
) -> list[ProtoFieldValue]:
if isinstance(values, np.ndarray): # it was an xarray, with potentially padded fill values at the end
values = trim_trailing_fill_values(values, field_type.fill_value)

# since repeated fields can have different lengths between datapoints, we can filter out None values here
return [convert_values_to_proto(repeated_values, field_type, filter_none=True) for repeated_values in values]
return [convert_values_to_proto(repeated_values, field_type, filter_none=True) for repeated_values in values] # ty: ignore[invalid-return-type]


def trim_trailing_fill_values(values: np.ndarray, fill_value: Any) -> list[np.ndarray]:
Expand Down
8 changes: 6 additions & 2 deletions tilebox-datasets/tilebox/datasets/query/id_interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,18 +55,22 @@ def parse(cls, arg: IDIntervalLike, start_exclusive: bool = False, end_inclusive
case IDInterval(_, _, _, _):
return arg
case (UUID(), UUID()):
start, end = arg
start: UUID = arg[0]
end: UUID = arg[1]
return IDInterval(
start_id=start,
end_id=end,
start_exclusive=start_exclusive,
end_inclusive=end_inclusive,
)
case (str(), str()):
start, end = arg
start: str = arg[0]
end: str = arg[1]
return IDInterval(
start_id=UUID(start),
end_id=UUID(end),
start_exclusive=start_exclusive,
end_inclusive=end_inclusive,
)

raise ValueError(f"Failed to convert {arg} ({type(arg)}) to IDInterval")
2 changes: 1 addition & 1 deletion tilebox-datasets/tilebox/datasets/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,7 @@ def _client_info() -> ClientInfo:
def _environment_info() -> str:
python_version = sys.version.split(" ")[0]
try:
shell = str(get_ipython()) # type: ignore[name-defined]
shell = str(get_ipython()) # ty: ignore[unresolved-reference]
except NameError:
return f"Python {python_version}" # Probably standard Python interpreter

Expand Down
2 changes: 1 addition & 1 deletion tilebox-grpc/_tilebox/grpc/error.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def with_pythonic_errors(stub: Stub, async_funcs: bool = False) -> Stub:
wrap_func = _wrap_rpc if not async_funcs else _async_wrap_rpc
for name, rpc in stub.__dict__.items():
if callable(rpc):
setattr(stub, name, wrap_func(rpc)) # type: ignore[assignment]
setattr(stub, name, wrap_func(rpc))
return stub


Expand Down
6 changes: 3 additions & 3 deletions tilebox-grpc/_tilebox/grpc/replay.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def open_recording_channel(url: str, auth_token: str | None, recording: str | Pa


def open_replay_channel(recording: str | Path, assert_request_matches: bool = True) -> Channel:
return _ReplayChannel(recording, assert_request_matches) # type: ignore[return-value]
return _ReplayChannel(recording, assert_request_matches) # ty: ignore[invalid-return-type] # not a subclass, but same interface so works


class _ConcreteValue(Future):
Expand Down Expand Up @@ -87,7 +87,7 @@ def intercept_unary_unary(
client_call_details: ClientCallDetails,
request: RequestType,
) -> Future:
request_data = base64.b64encode(request.SerializeToString()) # type: ignore[attr-defined]
request_data = base64.b64encode(request.SerializeToString()) # ty: ignore[unresolved-attribute]
with self.recording.open("ab") as file:
method = client_call_details.method
if isinstance(method, str):
Expand Down Expand Up @@ -162,7 +162,7 @@ def unary_unary_call(

if recorded_status != StatusCode.OK.value[0]: # the recorded call was an error, so raise it again
code = _STATUS_CODES[recorded_status]
error = AioRpcError(code, None, None, recorded_response.decode()) # type: ignore[arg-type]
error = AioRpcError(code, None, None, recorded_response.decode()) # ty: ignore[invalid-argument-type]
raise error

return response_deserializer(base64.b64decode(recorded_response))
Expand Down
2 changes: 1 addition & 1 deletion tilebox-storage/tilebox/storage/aio.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
from tilebox.storage.providers import login

try:
from IPython.display import HTML, Image, display # type: ignore[assignment]
from IPython.display import HTML, Image, display
except ImportError:
# IPython is not available, so we can't display the quicklook image
# but let's define stubs for the type checker
Expand Down
Loading
Loading