Skip to content

Commit a045095

Browse files
committed
add max of 100 files
max 10GB of non versioned files max 5GB of versioned files
1 parent b6cfa3e commit a045095

File tree

6 files changed

+145
-24
lines changed

6 files changed

+145
-24
lines changed

mergin/client_push.py

Lines changed: 31 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,14 @@
2424

2525
from .local_changes import LocalChange, LocalChanges
2626

27-
from .common import UPLOAD_CHUNK_ATTEMPT_WAIT, UPLOAD_CHUNK_ATTEMPTS, UPLOAD_CHUNK_SIZE, ClientError, ErrorCode
27+
from .common import (
28+
MAX_UPLOAD_VERSIONED_SIZE,
29+
UPLOAD_CHUNK_ATTEMPT_WAIT,
30+
UPLOAD_CHUNK_ATTEMPTS,
31+
UPLOAD_CHUNK_SIZE,
32+
MAX_UPLOAD_MEDIA_SIZE,
33+
ClientError,
34+
)
2835
from .merginproject import MerginProject
2936
from .editor import filter_changes
3037
from .utils import get_data_checksum
@@ -296,28 +303,23 @@ def push_project_async(mc, directory) -> Optional[UploadJob]:
296303
mp.log.info(f"--- push {project_path} - nothing to do")
297304
return
298305

299-
mp.log.debug("push changes:\n" + pprint.pformat(changes))
306+
mp.log.debug("push changes:\n" + pprint.pformat(asdict(changes)))
300307
tmp_dir = tempfile.TemporaryDirectory(prefix="python-api-client-")
301308

302309
# If there are any versioned files (aka .gpkg) that are not updated through a diff,
303310
# we need to make a temporary copy somewhere to be sure that we are uploading full content.
304311
# That's because if there are pending transactions, checkpointing or switching from WAL mode
305312
# won't work, and we would end up with some changes left in -wal file which do not get
306313
# uploaded. The temporary copy using geodiff uses sqlite backup API and should copy everything.
307-
for f in changes["updated"]:
308-
if mp.is_versioned_file(f["path"]) and "diff" not in f:
314+
for f in changes.updated:
315+
if mp.is_versioned_file(f.path) and not f.diff:
309316
mp.copy_versioned_file_for_upload(f, tmp_dir.name)
310317

311-
for f in changes["added"]:
312-
if mp.is_versioned_file(f["path"]):
318+
for f in changes.added:
319+
if mp.is_versioned_file(f.path):
313320
mp.copy_versioned_file_for_upload(f, tmp_dir.name)
314321

315-
local_changes = LocalChanges(
316-
added=[LocalChange(**change) for change in changes["added"]],
317-
updated=[LocalChange(**change) for change in changes["updated"]],
318-
removed=[LocalChange(**change) for change in changes["removed"]],
319-
)
320-
job = create_upload_job(mc, mp, local_changes, tmp_dir)
322+
job = create_upload_job(mc, mp, changes, tmp_dir)
321323
return job
322324

323325

@@ -471,12 +473,27 @@ def remove_diff_files(job: UploadJob) -> None:
471473
os.remove(diff_file)
472474

473475

474-
def get_push_changes_batch(mc, mp: MerginProject) -> Tuple[dict, int]:
476+
def get_push_changes_batch(mc, mp: MerginProject) -> Tuple[LocalChanges, int]:
475477
"""
476478
Get changes that need to be pushed to the server.
477479
"""
478480
changes = mp.get_push_changes()
479481
project_role = mp.project_role()
480482
changes = filter_changes(mc, project_role, changes)
481483

482-
return changes, sum(len(v) for v in changes.values())
484+
local_changes = LocalChanges(
485+
added=[LocalChange(**change) for change in changes["added"]],
486+
updated=[LocalChange(**change) for change in changes["updated"]],
487+
removed=[LocalChange(**change) for change in changes["removed"]],
488+
)
489+
if local_changes.get_media_upload_size() > MAX_UPLOAD_MEDIA_SIZE:
490+
raise ClientError(
491+
f"Total size of media files to upload exceeds the maximum allowed size of {MAX_UPLOAD_MEDIA_SIZE / (1024**3)} GiB."
492+
)
493+
494+
if local_changes.get_gpgk_upload_size() > MAX_UPLOAD_VERSIONED_SIZE:
495+
raise ClientError(
496+
f"Total size of GPKG files to upload exceeds the maximum allowed size of {MAX_UPLOAD_VERSIONED_SIZE / (1024**3)} GiB."
497+
)
498+
499+
return local_changes, sum(len(v) for v in changes.values())

mergin/common.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,12 @@
2424
# seconds to wait between sync callback calls
2525
SYNC_CALLBACK_WAIT = 0.01
2626

27+
# maximum size of media files able to upload in one push (in bytes)
28+
MAX_UPLOAD_MEDIA_SIZE = 10 * (1024**3)
29+
30+
# maximum size of GPKG files able to upload in one push (in bytes)
31+
MAX_UPLOAD_VERSIONED_SIZE = 5 * (1024**3)
32+
2733
# default URL for submitting logs
2834
MERGIN_DEFAULT_LOGS_URL = "https://g4pfq226j0.execute-api.eu-west-1.amazonaws.com/mergin_client_log_submit"
2935

@@ -39,7 +45,9 @@ class ErrorCode(Enum):
3945

4046

4147
class ClientError(Exception):
42-
def __init__(self, detail: str, url=None, server_code=None, server_response=None, http_error=None, http_method=None):
48+
def __init__(
49+
self, detail: str, url=None, server_code=None, server_response=None, http_error=None, http_method=None
50+
):
4351
self.detail = detail
4452
self.url = url
4553
self.http_error = http_error

mergin/editor.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from itertools import filterfalse
22
from typing import Callable, Dict, List
33

4-
from .utils import is_mergin_config, is_qgis_file, is_versioned_file
4+
from .utils import is_qgis_file
55

66
EDITOR_ROLE_NAME = "editor"
77

mergin/local_changes.py

Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11
from dataclasses import dataclass, field
22
from datetime import datetime
3-
from typing import Dict, Optional, List, Tuple
3+
from typing import Optional, List, Tuple
4+
5+
from .utils import is_versioned_file
6+
7+
MAX_UPLOAD_CHANGES = 100
48

59

610
@dataclass
@@ -55,6 +59,18 @@ class LocalChanges:
5559
updated: List[LocalChange] = field(default_factory=list)
5660
removed: List[LocalChange] = field(default_factory=list)
5761

62+
def __post_init__(self):
63+
"""
64+
Enforce a limit of changes combined from `added` and `updated`.
65+
"""
66+
total_changes = len(self.get_upload_changes())
67+
if total_changes > MAX_UPLOAD_CHANGES:
68+
# Calculate how many changes to keep from `added` and `updated`
69+
added_limit = min(len(self.added), MAX_UPLOAD_CHANGES)
70+
updated_limit = MAX_UPLOAD_CHANGES - added_limit
71+
self.added = self.added[:added_limit]
72+
self.updated = self.updated[:updated_limit]
73+
5874
def to_server_payload(self) -> dict:
5975
return {
6076
"added": [change.to_server_data() for change in self.added],
@@ -96,3 +112,24 @@ def update_chunks(self, server_chunks: List[Tuple[str, str]]) -> None:
96112

97113
for change in self.updated:
98114
change.chunks = self._map_unique_chunks(change.chunks, server_chunks)
115+
116+
def get_media_upload_size(self) -> int:
117+
"""
118+
Calculate the total size of media files in added and updated changes.
119+
"""
120+
total_size = 0
121+
for change in self.get_upload_changes():
122+
if not is_versioned_file(change.path):
123+
total_size += change.size
124+
return total_size
125+
126+
def get_gpgk_upload_size(self) -> int:
127+
"""
128+
Calculate the total size of gpgk files in added and updated changes.
129+
Do not calculate diffs (only new or overwriten files).
130+
"""
131+
total_size = 0
132+
for change in self.get_upload_changes():
133+
if is_versioned_file(change.path) and not change.diff:
134+
total_size += change.size
135+
return total_size

mergin/merginproject.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
conflicted_copy_file_name,
2222
edit_conflict_file_name,
2323
)
24-
24+
from .local_changes import LocalChange
2525

2626
this_dir = os.path.dirname(os.path.realpath(__file__))
2727

@@ -470,20 +470,20 @@ def get_push_changes(self):
470470
changes["updated"] = [f for f in changes["updated"] if f not in not_updated]
471471
return changes
472472

473-
def copy_versioned_file_for_upload(self, f, tmp_dir):
473+
def copy_versioned_file_for_upload(self, f: LocalChange, tmp_dir: str) -> str:
474474
"""
475475
Make a temporary copy of the versioned file using geodiff, to make sure that we have full
476476
content in a single file (nothing left in WAL journal)
477477
"""
478-
path = f["path"]
478+
path = f.path
479479
self.log.info("Making a temporary copy (full upload): " + path)
480480
tmp_file = os.path.join(tmp_dir, path)
481481
os.makedirs(os.path.dirname(tmp_file), exist_ok=True)
482482
self.geodiff.make_copy_sqlite(self.fpath(path), tmp_file)
483-
f["size"] = os.path.getsize(tmp_file)
484-
f["checksum"] = generate_checksum(tmp_file)
485-
f["chunks"] = [str(uuid.uuid4()) for i in range(math.ceil(f["size"] / UPLOAD_CHUNK_SIZE))]
486-
f["upload_file"] = tmp_file
483+
f.size = os.path.getsize(tmp_file)
484+
f.checksum = generate_checksum(tmp_file)
485+
f.chunks = [str(uuid.uuid4()) for i in range(math.ceil(f.size / UPLOAD_CHUNK_SIZE))]
486+
f.upload_file = tmp_file
487487
return tmp_file
488488

489489
def get_list_of_push_changes(self, push_changes):

mergin/test/test_local_changes.py

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,3 +118,62 @@ def test_local_changes_get_upload_changes():
118118
assert len(upload_changes) == 2 # Only added and updated should be included
119119
assert upload_changes[0].path == "file1.txt" # First change is from added
120120
assert upload_changes[1].path == "file2.txt" # Second change is from updated
121+
122+
123+
def test_local_changes_get_media_upload_size():
124+
"""Test the get_media_upload_size method of LocalChanges."""
125+
# Create sample LocalChange instances
126+
added = [
127+
LocalChange(path="file1.txt", checksum="abc123", size=1024, mtime=datetime.now()),
128+
LocalChange(path="file2.jpg", checksum="xyz789", size=2048, mtime=datetime.now()),
129+
]
130+
updated = [
131+
LocalChange(path="file3.mp4", checksum="lmn456", size=5120, mtime=datetime.now()),
132+
LocalChange(path="file4.gpkg", checksum="opq123", size=1024, mtime=datetime.now()),
133+
]
134+
135+
# Initialize LocalChanges
136+
local_changes = LocalChanges(added=added, updated=updated)
137+
138+
# Call get_media_upload_size
139+
media_size = local_changes.get_media_upload_size()
140+
141+
# Assertions
142+
assert media_size == 8192 # Only non-versioned files (txt, jpg, mp4) are included
143+
144+
145+
def test_local_changes_get_gpgk_upload_size():
146+
"""Test the get_gpgk_upload_size method of LocalChanges."""
147+
# Create sample LocalChange instances
148+
added = [
149+
LocalChange(path="file1.gpkg", checksum="abc123", size=1024, mtime=datetime.now()),
150+
LocalChange(path="file2.gpkg", checksum="xyz789", size=2048, mtime=datetime.now(), diff={"path": "diff1"}),
151+
]
152+
updated = [
153+
LocalChange(path="file3.gpkg", checksum="lmn456", size=5120, mtime=datetime.now()),
154+
LocalChange(path="file4.txt", checksum="opq123", size=1024, mtime=datetime.now()),
155+
]
156+
157+
# Initialize LocalChanges
158+
local_changes = LocalChanges(added=added, updated=updated)
159+
160+
# Call get_gpgk_upload_size
161+
gpkg_size = local_changes.get_gpgk_upload_size()
162+
163+
# Assertions
164+
assert gpkg_size == 6144 # Only GPKG files without diffs are included
165+
166+
167+
def test_local_changes_post_init():
168+
"""Test the __post_init__ method of LocalChanges."""
169+
# Create more than MAX_UPLOAD_CHANGES changes
170+
added = [LocalChange(path=f"file{i}.txt", checksum="abc123", size=1024, mtime=datetime.now()) for i in range(80)]
171+
updated = [LocalChange(path=f"file{i}.txt", checksum="xyz789", size=2048, mtime=datetime.now()) for i in range(21)]
172+
173+
# Initialize LocalChanges
174+
local_changes = LocalChanges(added=added, updated=updated)
175+
176+
# Assertions
177+
assert len(local_changes.added) == 80 # All 80 added changes are included
178+
assert len(local_changes.updated) == 20 # Only 20 updated changes are included to respect the limit
179+
assert len(local_changes.added) + len(local_changes.updated) == 100 # Total is limited to MAX_UPLOAD_CHANGES

0 commit comments

Comments
 (0)