Skip to content
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 30 additions & 7 deletions src/annetbox/base/client_sync.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import http
import logging
import os
import urllib.parse
from abc import abstractmethod
from collections.abc import Callable, Iterable
from functools import wraps
Expand All @@ -19,7 +21,11 @@

Class = TypeVar("Class")
ArgsSpec = ParamSpec("ArgsSpec")

default_page_size = int(os.getenv("NETBOX_PAGE_SIZE", 100))
# rfc9110:
# It is RECOMMENDED that all senders and recipients support, at a minimum,
# URIs with lengths of 8000 octets in protocol elements
max_url_size = int(os.getenv("NETBOX_MAX_URL_SIZE", 8000))
logger = logging.getLogger(__name__)

T = TypeVar("T")
Expand All @@ -42,6 +48,25 @@ def map(
yield func(item)


def split_by_len(pref_len: int, max_batch_len: int, data: list[Any]) -> list[list[Any]]:
res = []
current_batch = []
current_batch_len = 0
if not len(data):
return res
for item in data:
item_len = len(urllib.parse.quote_plus(str(item))) + pref_len
if current_batch_len + item_len > max_batch_len:
current_batch = []
current_batch_len = item_len
res.append(current_batch)
else:
current_batch_len += item_len
current_batch.append(item)
res.append(current_batch)
return res


def _collect_by_pages(
func: Callable[Concatenate[Class, ArgsSpec], PagingResponse[Model]],
) -> Callable[Concatenate[Class, ArgsSpec], PagingResponse[Model]]:
Expand All @@ -54,7 +79,7 @@ def wrapper(
**kwargs: ArgsSpec.kwargs,
) -> PagingResponse[Model]:
kwargs.setdefault("offset", 0)
page_size = kwargs.pop("page_size", 100)
page_size = kwargs.pop("page_size", default_page_size)
limit = kwargs.pop("limit", None)
results = []
method = func.__get__(self, self.__class__)
Expand Down Expand Up @@ -117,12 +142,10 @@ def wrapper(
results=[],
)

batches = [
value[offset: offset + batch_size]
for offset in range(0, len(value), batch_size)
]
max_len = max_url_size - 200 # minus base url, TODO: calc using real base URL
batches = split_by_len(len(field)+2, max_len, value) # +1 for equal sign, +1 for &
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  1. we shouldn't try to put as much values as possible in one URL as netbox is pretty slow and can time out
  2. sometimes it is faster to 2 requests using pool than in a single requests: netbox often does N+1 on server side

Copy link
Contributor Author

@shurick2 shurick2 Mar 20, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I came to better understanding how it works and will close this request in favor of annetutil/annet#290.

  1. we shouldn't try to put as much values as possible in one URL as netbox is pretty slow and can time out

It depends on the installation, but in general, I think we shouldn't force all users to wait, because a timeout may occur. We need to provide configurable parameters, so slow installations can adjust query parameters.

2 sometimes it is faster to 2 requests using pool than in a single requests: netbox often does N+1 on server side

It is true for all cases except dcim/devices/ query. This query uses a pager and cannot be run in parallel. My tests show that it is faster to set a larger page_size, such as -15% faster on 70 devices with 50 ports.

Also I found that _collect_by_pages() ignores page.next and it is a problem, because we ignore limit and page from it, so we can skip some data.


def apply(batch):
def apply(batch: list[Any]):
nonlocal kwargs
kwargs = kwargs.copy()
kwargs[field] = batch
Expand Down
Loading