From 4ab69f60cce7def5a67ffa2972ac9ecec0267e15 Mon Sep 17 00:00:00 2001 From: "marcel.kocisek" Date: Wed, 1 Oct 2025 15:21:54 +0200 Subject: [PATCH 01/17] Initial migration --- server/mergin/sync/models.py | 25 +++++++++++ ...f54ee8c4acd_add_project_version_changes.py | 42 +++++++++++++++++++ 2 files changed, 67 insertions(+) create mode 100644 server/migrations/community/4f54ee8c4acd_add_project_version_changes.py diff --git a/server/mergin/sync/models.py b/server/mergin/sync/models.py index b0f2795f..9beb6ec6 100644 --- a/server/mergin/sync/models.py +++ b/server/mergin/sync/models.py @@ -44,6 +44,7 @@ Storages = {"local": DiskStorage} project_deleted = signal("project_deleted") project_access_granted = signal("project_access_granted") +project_version_created = signal("project_version_created") class PushChangeType(Enum): @@ -897,6 +898,30 @@ def construct_checkpoint(self) -> None: db.session.commit() +class ProjectVersionChanges(db.Model): + id = db.Column(db.BigInteger, primary_key=True, autoincrement=True) + # exponential order of changes json + rank = db.Column(db.Integer, nullable=False, index=True) + # to which project version is this linked + version_id = db.Column( + db.Integer, + db.ForeignKey("project_version.id", ondelete="CASCADE"), + index=True, + nullable=False, + ) + # cached changes for versions from start to end (inclusive) + changes = db.Column(JSONB, nullable=False) + + __table_args__ = ( + db.UniqueConstraint("version_id", "rank", name="unique_changes"), + db.Index( + "ix_project_version_change_version_id_rank", + version_id, + rank, + ), + ) + + class ProjectVersion(db.Model): id = db.Column(db.Integer, primary_key=True, autoincrement=True) name = db.Column(db.Integer, index=True) diff --git a/server/migrations/community/4f54ee8c4acd_add_project_version_changes.py b/server/migrations/community/4f54ee8c4acd_add_project_version_changes.py new file mode 100644 index 00000000..f40b3ef3 --- /dev/null +++ b/server/migrations/community/4f54ee8c4acd_add_project_version_changes.py @@ -0,0 +1,42 @@ +"""Add project version changes + +Revision ID: 4f54ee8c4acd +Revises: bd1ec73db389 +Create Date: 2025-10-01 11:49:13.560320 + +""" +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision = '4f54ee8c4acd' +down_revision = 'bd1ec73db389' +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.create_table('project_version_changes', + sa.Column('id', sa.BigInteger(), autoincrement=True, nullable=False), + sa.Column('rank', sa.Integer(), nullable=False), + sa.Column('version_id', sa.Integer(), nullable=False), + sa.Column('changes', postgresql.JSONB(astext_type=sa.Text()), nullable=False), + sa.ForeignKeyConstraint(['version_id'], ['project_version.id'], name=op.f('fk_project_version_changes_version_id_project_version'), ondelete='CASCADE'), + sa.PrimaryKeyConstraint('id', name=op.f('pk_project_version_changes')), + sa.UniqueConstraint('version_id', 'rank', name='unique_changes') + ) + op.create_index('ix_project_version_change_version_id_rank', 'project_version_changes', ['version_id', 'rank'], unique=False) + op.create_index(op.f('ix_project_version_changes_rank'), 'project_version_changes', ['rank'], unique=False) + op.create_index(op.f('ix_project_version_changes_version_id'), 'project_version_changes', ['version_id'], unique=False) + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_index(op.f('ix_project_version_changes_version_id'), table_name='project_version_changes') + op.drop_index(op.f('ix_project_version_changes_rank'), table_name='project_version_changes') + op.drop_index('ix_project_version_change_version_id_rank', table_name='project_version_changes') + op.drop_table('project_version_changes') + # ### end Alembic commands ### From 71f9f0f65be49f6a8cfda601aea7a615d0d670b2 Mon Sep 17 00:00:00 2001 From: "marcel.kocisek" Date: Wed, 1 Oct 2025 16:50:49 +0200 Subject: [PATCH 02/17] Modify changes table in name --- server/mergin/sync/models.py | 6 ++++- ...adc90fca0c_add_project_version_changes.py} | 26 +++++++++---------- 2 files changed, 18 insertions(+), 14 deletions(-) rename server/migrations/community/{4f54ee8c4acd_add_project_version_changes.py => 63adc90fca0c_add_project_version_changes.py} (57%) diff --git a/server/mergin/sync/models.py b/server/mergin/sync/models.py index 9beb6ec6..d0475ec6 100644 --- a/server/mergin/sync/models.py +++ b/server/mergin/sync/models.py @@ -898,7 +898,7 @@ def construct_checkpoint(self) -> None: db.session.commit() -class ProjectVersionChanges(db.Model): +class ProjectVersionChange(db.Model): id = db.Column(db.BigInteger, primary_key=True, autoincrement=True) # exponential order of changes json rank = db.Column(db.Integer, nullable=False, index=True) @@ -920,6 +920,10 @@ class ProjectVersionChanges(db.Model): rank, ), ) + project = db.relationship( + "ProjectVersion", + uselist=False, + ) class ProjectVersion(db.Model): diff --git a/server/migrations/community/4f54ee8c4acd_add_project_version_changes.py b/server/migrations/community/63adc90fca0c_add_project_version_changes.py similarity index 57% rename from server/migrations/community/4f54ee8c4acd_add_project_version_changes.py rename to server/migrations/community/63adc90fca0c_add_project_version_changes.py index f40b3ef3..f7836df6 100644 --- a/server/migrations/community/4f54ee8c4acd_add_project_version_changes.py +++ b/server/migrations/community/63adc90fca0c_add_project_version_changes.py @@ -1,8 +1,8 @@ """Add project version changes -Revision ID: 4f54ee8c4acd +Revision ID: 63adc90fca0c Revises: bd1ec73db389 -Create Date: 2025-10-01 11:49:13.560320 +Create Date: 2025-10-01 16:50:08.343639 """ from alembic import op @@ -10,7 +10,7 @@ from sqlalchemy.dialects import postgresql # revision identifiers, used by Alembic. -revision = '4f54ee8c4acd' +revision = '63adc90fca0c' down_revision = 'bd1ec73db389' branch_labels = None depends_on = None @@ -18,25 +18,25 @@ def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - op.create_table('project_version_changes', + op.create_table('project_version_change', sa.Column('id', sa.BigInteger(), autoincrement=True, nullable=False), sa.Column('rank', sa.Integer(), nullable=False), sa.Column('version_id', sa.Integer(), nullable=False), sa.Column('changes', postgresql.JSONB(astext_type=sa.Text()), nullable=False), - sa.ForeignKeyConstraint(['version_id'], ['project_version.id'], name=op.f('fk_project_version_changes_version_id_project_version'), ondelete='CASCADE'), - sa.PrimaryKeyConstraint('id', name=op.f('pk_project_version_changes')), + sa.ForeignKeyConstraint(['version_id'], ['project_version.id'], name=op.f('fk_project_version_change_version_id_project_version'), ondelete='CASCADE'), + sa.PrimaryKeyConstraint('id', name=op.f('pk_project_version_change')), sa.UniqueConstraint('version_id', 'rank', name='unique_changes') ) - op.create_index('ix_project_version_change_version_id_rank', 'project_version_changes', ['version_id', 'rank'], unique=False) - op.create_index(op.f('ix_project_version_changes_rank'), 'project_version_changes', ['rank'], unique=False) - op.create_index(op.f('ix_project_version_changes_version_id'), 'project_version_changes', ['version_id'], unique=False) + op.create_index(op.f('ix_project_version_change_rank'), 'project_version_change', ['rank'], unique=False) + op.create_index(op.f('ix_project_version_change_version_id'), 'project_version_change', ['version_id'], unique=False) + op.create_index('ix_project_version_change_version_id_rank', 'project_version_change', ['version_id', 'rank'], unique=False) # ### end Alembic commands ### def downgrade(): # ### commands auto generated by Alembic - please adjust! ### - op.drop_index(op.f('ix_project_version_changes_version_id'), table_name='project_version_changes') - op.drop_index(op.f('ix_project_version_changes_rank'), table_name='project_version_changes') - op.drop_index('ix_project_version_change_version_id_rank', table_name='project_version_changes') - op.drop_table('project_version_changes') + op.drop_index('ix_project_version_change_version_id_rank', table_name='project_version_change') + op.drop_index(op.f('ix_project_version_change_version_id'), table_name='project_version_change') + op.drop_index(op.f('ix_project_version_change_rank'), table_name='project_version_change') + op.drop_table('project_version_change') # ### end Alembic commands ### From 83f858f0693d6a9e36ea21fc1e16d1856f0bdea0 Mon Sep 17 00:00:00 2001 From: "marcel.kocisek" Date: Fri, 3 Oct 2025 14:29:17 +0200 Subject: [PATCH 03/17] Initial version for merging diffs --- server/mergin/sync/files.py | 38 +++++- server/mergin/sync/models.py | 120 +++++++++++++++++- server/mergin/sync/utils.py | 27 ++++ ...3adc90fca0c_add_project_version_changes.py | 63 ++++++--- 4 files changed, 229 insertions(+), 19 deletions(-) diff --git a/server/mergin/sync/files.py b/server/mergin/sync/files.py index 5015e626..f7b6e2a7 100644 --- a/server/mergin/sync/files.py +++ b/server/mergin/sync/files.py @@ -2,8 +2,9 @@ # # SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-MerginMaps-Commercial import datetime +from enum import Enum import os -from dataclasses import dataclass +from dataclasses import dataclass, field from typing import Optional, List from marshmallow import fields, EXCLUDE, pre_load, post_load, post_dump from pathvalidate import sanitize_filename @@ -11,6 +12,17 @@ from ..app import DateTimeWithZ, ma +class PushChangeType(Enum): + CREATE = "create" + UPDATE = "update" + DELETE = "delete" + UPDATE_DIFF = "update_diff" + + @classmethod + def values(cls): + return [member.value for member in cls.__members__.values()] + + def mergin_secure_filename(filename: str) -> str: """Generate secure filename for given file""" filename = os.path.normpath(filename) @@ -126,3 +138,27 @@ def patch_field(self, data, **kwargs): if not data.get("diff"): data.pop("diff") return data + + +@dataclass +class ProjectVersionChangeData: + path: str + size: int + checksum: str + change: PushChangeType + version: str + diffs: Optional[List[str]] = None + + +class ProjectVersionChangeDataSchema(ma.Schema): + """Schema for changes data in ProjectVersionChange changes column""" + + path = fields.String(required=True) + size = fields.Integer(required=True) + checksum = fields.String(required=True) + diffs = fields.List(fields.String(), required=False) + change = fields.Enum(PushChangeType, by_value=True, required=True) + + @post_load + def make_object(self, data, **kwargs): + return ProjectVersionChangeData(**data) diff --git a/server/mergin/sync/models.py b/server/mergin/sync/models.py index d0475ec6..7f1e93bb 100644 --- a/server/mergin/sync/models.py +++ b/server/mergin/sync/models.py @@ -24,6 +24,8 @@ from .files import ( File, + ProjectVersionChangeData, + ProjectVersionChangeDataSchema, UploadChanges, ChangesSchema, ProjectFile, @@ -36,6 +38,7 @@ LOG_BASE, CachedLevel, generate_checksum, + get_all_cached_levels, get_merged_versions, is_versioned_file, is_qgis, @@ -910,7 +913,7 @@ class ProjectVersionChange(db.Model): nullable=False, ) # cached changes for versions from start to end (inclusive) - changes = db.Column(JSONB, nullable=False) + data = db.Column(JSONB, nullable=False) __table_args__ = ( db.UniqueConstraint("version_id", "rank", name="unique_changes"), @@ -925,6 +928,121 @@ class ProjectVersionChange(db.Model): uselist=False, ) + @staticmethod + def merge_data( + changes: List[ProjectVersionChange], + ) -> List[ProjectVersionChangeData]: + """Merge changes from another ProjectVersionChange into this one""" + identifier = "path" + result: Dict[str, ProjectVersionChangeData] = {} + for change in changes: + for item in change.data: + current_data: ProjectVersionChangeData = ( + ProjectVersionChangeDataSchema().load(item) + ) + existing_data = result.get(current_data.path) + if existing_data: + # merge changes data jsons + if existing_data.change == PushChangeType.CREATE: + if current_data.change == PushChangeType.DELETE: + # create + delete = nothing + del result[identifier] + elif current_data.change in ( + PushChangeType.UPDATE.value, + PushChangeType.UPDATE_DIFF.value, + ): + # create + update = create with updated info + current_data.change = existing_data.change + current_data.diffs = None + result[identifier] = current_data + elif existing_data.change == PushChangeType.UPDATE: + if current_data.change == PushChangeType.UPDATE_DIFF: + # update + update_diff = update_diff with latest info + current_data.change = existing_data.change + current_data.diffs = None + result[identifier] = current_data + elif existing_data.change == PushChangeType.UPDATE_DIFF.value: + if current_data.change == PushChangeType.UPDATE_DIFF.value: + # update_diff + update_diff = update_diff with latest info + current_data.diffs.extend(existing_data.diffs or []) + result[identifier] = current_data + else: + result[current_data.path] = current_data + return list(result.values()) + + def get_data(self, start_version=0) -> None: + """Create a changes json checkpoint (aka. merged changes). + Find all smaller changes which are needed to create the final changes json. + In case of missing some lower rank checkpoint, use individual changes instead. + """ + if self.changes: + return + version_name = self.version.name + project_id = self.version.project_id + if start_version > version_name: + logging.error( + f"Start version {start_version} is higher than end version {version_name} - broken history" + ) + return + + # TODO: rename get_merged_versions to get_merged_checkpoints and move it ProjectVersion class + expected_checkpoints = get_merged_versions(start_version, version_name) + expected_changes: List[ProjectVersionChange] = ( + ProjectVersionChange.query.join( + ProjectVersion, ProjectVersionChange.version_id == ProjectVersion.id + ) + .filter( + ProjectVersion.project_id == project_id, + ProjectVersion.name >= start_version, + ProjectVersion.name <= version_name, + tuple_(ProjectVersionChange.rank, ProjectVersion.name).in_( + [(item.rank, item.end) for item in expected_checkpoints] + ), + ) + .order_by(ProjectVersion.name) + .all() + ) + expected_diffs = FileHistory.query.join( + ProjectVersion, FileHistory.version_id == ProjectVersion.id + ).filter( + ProjectVersion.project_id == project_id, + FileHistory.project_version_name >= start_version, + FileHistory.project_version_name <= version_name, + FileHistory.change == PushChangeType.UPDATE_DIFF.value, + ) + + changes = [] + for checkpoint in expected_checkpoints: + cached_change = next( + ( + c + for c in expected_changes + if c.rank == checkpoint.rank and c.version.name == checkpoint.end + ), + None, + ) + if not cached_change and checkpoint.rank > 0: + # Filter all changes that are in previous checkpoint range + individual_changes = expected_changes[: checkpoint.end + 1] + if not individual_changes: + logging.error( + f"Unable to find rank 0 changes for {checkpoint.rank} for project {project_id}" + ) + return + merged_data = self.merge_data(individual_changes) + changes.append(self.merge_data(individual_changes)) + checkpoint_change = ProjectVersionChange( + version_id=self.version_id, + rank=checkpoint.rank, + changes=[asdict(c) for c in merged_data], + ) + db.session.add(checkpoint_change) + db.session.flush() + else: + changes.append(cached_change) + changes = self.merge_data(changes) + db.session.commit() + class ProjectVersion(db.Model): id = db.Column(db.Integer, primary_key=True, autoincrement=True) diff --git a/server/mergin/sync/utils.py b/server/mergin/sync/utils.py index a0f61fd6..a8d592e6 100644 --- a/server/mergin/sync/utils.py +++ b/server/mergin/sync/utils.py @@ -661,6 +661,25 @@ def get_cached_levels(version: int) -> List[CachedLevel]: return levels +def get_all_cached_levels(start: int, end: int) -> List[CachedLevel]: + """ + Get all cached levels between start version and end version. + This basically provide the list of cached levels which are fully contained in the range. + """ + levels = [] + rank_max = math.floor(math.log(end, LOG_BASE)) + + for rank in range(1, rank_max + 1): + index_start = (start - 1) // LOG_BASE**rank + 1 + index_end = end // LOG_BASE**rank + for index in range(index_start, index_end + 1): + level = CachedLevel(rank=rank, index=index) + if level.start >= start and level.end <= end: + levels.append(level) + + return levels + + def get_merged_versions(start: int, end: int) -> List[CachedLevel]: """ Get all (merged) versions between start version and end version while respecting cached levels. @@ -682,3 +701,11 @@ def get_merged_versions(start: int, end: int) -> List[CachedLevel]: break return levels + + +def merge_dict_lists(base=[], new=[], key="path"): + """Merge two lists of dictionaries based on 'path' key, updating existing entries and adding new ones.""" + merged = {item[key]: item for item in base} + for item in new: + merged[item[key]] = item + return list(merged.values()) diff --git a/server/migrations/community/63adc90fca0c_add_project_version_changes.py b/server/migrations/community/63adc90fca0c_add_project_version_changes.py index f7836df6..83c9f10a 100644 --- a/server/migrations/community/63adc90fca0c_add_project_version_changes.py +++ b/server/migrations/community/63adc90fca0c_add_project_version_changes.py @@ -5,38 +5,67 @@ Create Date: 2025-10-01 16:50:08.343639 """ + from alembic import op import sqlalchemy as sa from sqlalchemy.dialects import postgresql # revision identifiers, used by Alembic. -revision = '63adc90fca0c' -down_revision = 'bd1ec73db389' +revision = "63adc90fca0c" +down_revision = "bd1ec73db389" branch_labels = None depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - op.create_table('project_version_change', - sa.Column('id', sa.BigInteger(), autoincrement=True, nullable=False), - sa.Column('rank', sa.Integer(), nullable=False), - sa.Column('version_id', sa.Integer(), nullable=False), - sa.Column('changes', postgresql.JSONB(astext_type=sa.Text()), nullable=False), - sa.ForeignKeyConstraint(['version_id'], ['project_version.id'], name=op.f('fk_project_version_change_version_id_project_version'), ondelete='CASCADE'), - sa.PrimaryKeyConstraint('id', name=op.f('pk_project_version_change')), - sa.UniqueConstraint('version_id', 'rank', name='unique_changes') + op.create_table( + "project_version_change", + sa.Column("id", sa.BigInteger(), autoincrement=True, nullable=False), + sa.Column("rank", sa.Integer(), nullable=False), + sa.Column("version_id", sa.Integer(), nullable=False), + sa.Column("data", postgresql.JSONB(astext_type=sa.Text()), nullable=False), + sa.ForeignKeyConstraint( + ["version_id"], + ["project_version.id"], + name=op.f("fk_project_version_change_version_id_project_version"), + ondelete="CASCADE", + ), + sa.PrimaryKeyConstraint("id", name=op.f("pk_project_version_change")), + sa.UniqueConstraint("version_id", "rank", name="unique_changes"), + ) + op.create_index( + op.f("ix_project_version_change_rank"), + "project_version_change", + ["rank"], + unique=False, + ) + op.create_index( + op.f("ix_project_version_change_version_id"), + "project_version_change", + ["version_id"], + unique=False, + ) + op.create_index( + "ix_project_version_change_version_id_rank", + "project_version_change", + ["version_id", "rank"], + unique=False, ) - op.create_index(op.f('ix_project_version_change_rank'), 'project_version_change', ['rank'], unique=False) - op.create_index(op.f('ix_project_version_change_version_id'), 'project_version_change', ['version_id'], unique=False) - op.create_index('ix_project_version_change_version_id_rank', 'project_version_change', ['version_id', 'rank'], unique=False) # ### end Alembic commands ### def downgrade(): # ### commands auto generated by Alembic - please adjust! ### - op.drop_index('ix_project_version_change_version_id_rank', table_name='project_version_change') - op.drop_index(op.f('ix_project_version_change_version_id'), table_name='project_version_change') - op.drop_index(op.f('ix_project_version_change_rank'), table_name='project_version_change') - op.drop_table('project_version_change') + op.drop_index( + "ix_project_version_change_version_id_rank", table_name="project_version_change" + ) + op.drop_index( + op.f("ix_project_version_change_version_id"), + table_name="project_version_change", + ) + op.drop_index( + op.f("ix_project_version_change_rank"), table_name="project_version_change" + ) + op.drop_table("project_version_change") # ### end Alembic commands ### From 3db1550c2c4967a3cfc420e26bb457c89c8b6b19 Mon Sep 17 00:00:00 2001 From: "marcel.kocisek" Date: Fri, 3 Oct 2025 14:40:45 +0200 Subject: [PATCH 04/17] Adapt merge versions --- server/mergin/sync/files.py | 16 ++++++++++------ server/mergin/sync/models.py | 4 +--- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/server/mergin/sync/files.py b/server/mergin/sync/files.py index 6a4c2143..f5a9db12 100644 --- a/server/mergin/sync/files.py +++ b/server/mergin/sync/files.py @@ -4,11 +4,18 @@ import datetime from enum import Enum import os -from dataclasses import dataclass, field +from dataclasses import dataclass from typing import Optional, List import uuid from flask import current_app -from marshmallow import ValidationError, fields, EXCLUDE, post_dump, validates_schema +from marshmallow import ( + ValidationError, + fields, + EXCLUDE, + post_dump, + validates_schema, + post_load, +) from pathvalidate import sanitize_filename from .utils import ( @@ -237,10 +244,7 @@ def patch_field(self, data, **kwargs): @dataclass -class ProjectVersionChangeData: - path: str - size: int - checksum: str +class ProjectVersionChangeData(File): change: PushChangeType version: str diffs: Optional[List[str]] = None diff --git a/server/mergin/sync/models.py b/server/mergin/sync/models.py index f0592b86..7f37e458 100644 --- a/server/mergin/sync/models.py +++ b/server/mergin/sync/models.py @@ -24,7 +24,6 @@ from flask import current_app from .files import ( - File, ProjectVersionChangeData, ProjectVersionChangeDataSchema, ProjectDiffFile, @@ -988,8 +987,7 @@ def get_data(self, start_version=0) -> None: ) return - # TODO: rename get_merged_versions to get_merged_checkpoints and move it ProjectVersion class - expected_checkpoints = get_merged_versions(start_version, version_name) + expected_checkpoints = Checkpoint.get_checkpoints(start_version, version_name) expected_changes: List[ProjectVersionChange] = ( ProjectVersionChange.query.join( ProjectVersion, ProjectVersionChange.version_id == ProjectVersion.id From 03f5098370866d99ce00e39a5dcd9321c17d1203 Mon Sep 17 00:00:00 2001 From: "marcel.kocisek" Date: Tue, 7 Oct 2025 18:06:07 +0200 Subject: [PATCH 05/17] Delta endpoints + logic improvements: - introduce get_delta method with caching - introduce delta contrller --- server/mergin/auth/models.py | 1 - server/mergin/sync/files.py | 29 ++- server/mergin/sync/models.py | 210 ++++++++++++++---- server/mergin/sync/public_api_v2.yaml | 70 ++++++ .../mergin/sync/public_api_v2_controller.py | 32 ++- server/mergin/tests/test_public_api_v2.py | 128 ++++++++++- server/mergin/tests/utils.py | 6 +- ...3adc90fca0c_add_project_version_changes.py | 2 +- 8 files changed, 418 insertions(+), 60 deletions(-) diff --git a/server/mergin/auth/models.py b/server/mergin/auth/models.py index 5dcf275e..3ab05f6b 100644 --- a/server/mergin/auth/models.py +++ b/server/mergin/auth/models.py @@ -11,7 +11,6 @@ from sqlalchemy import or_, func, text from ..app import db -from ..sync.models import ProjectUser from ..sync.utils import get_user_agent, get_ip, get_device_id, is_reserved_word MAX_USERNAME_LENGTH = 50 diff --git a/server/mergin/sync/files.py b/server/mergin/sync/files.py index f5a9db12..7cc49e9a 100644 --- a/server/mergin/sync/files.py +++ b/server/mergin/sync/files.py @@ -244,21 +244,40 @@ def patch_field(self, data, **kwargs): @dataclass -class ProjectVersionChangeData(File): +class ChangeDiffFile: + path: str + size: Optional[int] = None + + +class ChangeDiffFileSchema(ma.Schema): + path = fields.String(required=True) + size = fields.Integer(required=False) + + +@dataclass +class ProjectVersionChangeDelta(File): change: PushChangeType version: str - diffs: Optional[List[str]] = None + diffs: Optional[List[ChangeDiffFile]] = None -class ProjectVersionChangeDataSchema(ma.Schema): +class ProjectVersionChangeDeltaSchema(ma.Schema): """Schema for changes data in ProjectVersionChange changes column""" path = fields.String(required=True) size = fields.Integer(required=True) checksum = fields.String(required=True) - diffs = fields.List(fields.String(), required=False) + version = fields.String(required=True) + diffs = fields.List(fields.Nested(ChangeDiffFileSchema())) change = fields.Enum(PushChangeType, by_value=True, required=True) @post_load def make_object(self, data, **kwargs): - return ProjectVersionChangeData(**data) + return ProjectVersionChangeDelta(**data) + + @post_dump + def patch_field(self, data, **kwargs): + # drop 'diffs' key entirely if empty or None as clients would expect + if not data.get("diffs"): + data.pop("diffs", None) + return data diff --git a/server/mergin/sync/models.py b/server/mergin/sync/models.py index 7f37e458..6b4109bd 100644 --- a/server/mergin/sync/models.py +++ b/server/mergin/sync/models.py @@ -24,8 +24,9 @@ from flask import current_app from .files import ( - ProjectVersionChangeData, - ProjectVersionChangeDataSchema, + ChangeDiffFile, + ProjectVersionChangeDelta, + ProjectVersionChangeDeltaSchema, ProjectDiffFile, ProjectFileChange, ChangesSchema, @@ -915,7 +916,7 @@ class ProjectVersionChange(db.Model): nullable=False, ) # cached changes for versions from start to end (inclusive) - data = db.Column(JSONB, nullable=False) + delta = db.Column(JSONB, nullable=False) __table_args__ = ( db.UniqueConstraint("version_id", "rank", name="unique_changes"), @@ -925,30 +926,33 @@ class ProjectVersionChange(db.Model): rank, ), ) - project = db.relationship( + version = db.relationship( "ProjectVersion", uselist=False, ) @staticmethod - def merge_data( + def merge_changes( changes: List[ProjectVersionChange], - ) -> List[ProjectVersionChangeData]: - """Merge changes from another ProjectVersionChange into this one""" - identifier = "path" - result: Dict[str, ProjectVersionChangeData] = {} + ) -> List[ProjectVersionChangeDelta]: + """ + Merge multiple changes jsons into one list of changes. + Changes are merged based on file path and change type. + """ + result: Dict[str, ProjectVersionChangeDelta] = {} for change in changes: - for item in change.data: - current_data: ProjectVersionChangeData = ( - ProjectVersionChangeDataSchema().load(item) + for item in change.delta: + current_data: ProjectVersionChangeDelta = ( + ProjectVersionChangeDeltaSchema().load(item) ) existing_data = result.get(current_data.path) + path = current_data.path if existing_data: # merge changes data jsons if existing_data.change == PushChangeType.CREATE: if current_data.change == PushChangeType.DELETE: # create + delete = nothing - del result[identifier] + del result[path] elif current_data.change in ( PushChangeType.UPDATE.value, PushChangeType.UPDATE_DIFF.value, @@ -956,45 +960,59 @@ def merge_data( # create + update = create with updated info current_data.change = existing_data.change current_data.diffs = None - result[identifier] = current_data + else: + result[path] = current_data elif existing_data.change == PushChangeType.UPDATE: if current_data.change == PushChangeType.UPDATE_DIFF: # update + update_diff = update_diff with latest info current_data.change = existing_data.change current_data.diffs = None - result[identifier] = current_data + result[path] = current_data elif existing_data.change == PushChangeType.UPDATE_DIFF.value: if current_data.change == PushChangeType.UPDATE_DIFF.value: # update_diff + update_diff = update_diff with latest info current_data.diffs.extend(existing_data.diffs or []) - result[identifier] = current_data + result[path] = current_data + else: + # delete + anything = anything + result[path] = current_data else: result[current_data.path] = current_data return list(result.values()) - def get_data(self, start_version=0) -> None: - """Create a changes json checkpoint (aka. merged changes). - Find all smaller changes which are needed to create the final changes json. - In case of missing some lower rank checkpoint, use individual changes instead. + def get_delta(self, from_version=1) -> Optional[List[ProjectVersionChangeDelta]]: + """ + Get changes between two versions, merging them if needed. """ - if self.changes: - return version_name = self.version.name project_id = self.version.project_id - if start_version > version_name: + if from_version > version_name: logging.error( - f"Start version {start_version} is higher than end version {version_name} - broken history" + f"Start version {from_version} is higher than end version {version_name} - broken history" ) return - expected_checkpoints = Checkpoint.get_checkpoints(start_version, version_name) - expected_changes: List[ProjectVersionChange] = ( - ProjectVersionChange.query.join( - ProjectVersion, ProjectVersionChange.version_id == ProjectVersion.id + if from_version == version_name: + # Return only changes for this version + changes = ( + ProjectVersionChange.query.join( + ProjectVersion, ProjectVersionChange.version_id == ProjectVersion.id + ) + .filter( + ProjectVersion.project_id == project_id, + ProjectVersion.name == version_name, + ) + .order_by(ProjectVersion.name) + .all() ) + return self.merge_changes(changes) + + expected_checkpoints = Checkpoint.get_checkpoints(from_version, version_name) + expected_changes: List[ProjectVersionChange] = ( + ProjectVersionChange.query.join(ProjectVersion) .filter( ProjectVersion.project_id == project_id, - ProjectVersion.name >= start_version, + ProjectVersion.name > from_version, ProjectVersion.name <= version_name, tuple_(ProjectVersionChange.rank, ProjectVersion.name).in_( [(item.rank, item.end) for item in expected_checkpoints] @@ -1003,18 +1021,12 @@ def get_data(self, start_version=0) -> None: .order_by(ProjectVersion.name) .all() ) - expected_diffs = FileHistory.query.join( - ProjectVersion, FileHistory.version_id == ProjectVersion.id - ).filter( - ProjectVersion.project_id == project_id, - FileHistory.project_version_name >= start_version, - FileHistory.project_version_name <= version_name, - FileHistory.change == PushChangeType.UPDATE_DIFF.value, - ) + individual_changes: List[ProjectVersionChange] = [] changes = [] for checkpoint in expected_checkpoints: - cached_change = next( + # find checkpoint change in already cached changes or any zero rank change needed + expected_change = next( ( c for c in expected_changes @@ -1022,28 +1034,103 @@ def get_data(self, start_version=0) -> None: ), None, ) - if not cached_change and checkpoint.rank > 0: - # Filter all changes that are in previous checkpoint range - individual_changes = expected_changes[: checkpoint.end + 1] + if expected_change: + changes.append(expected_change) + continue + + if checkpoint.rank > 0: + # Filter all changes that are 0 rank and are in the range of the checkpoint, cache them to prevent multiple queries + individual_changes = ( + ProjectVersionChange.query.join(ProjectVersion) + .filter( + ProjectVersion.project_id == project_id, + ProjectVersion.name > from_version, + ProjectVersion.name <= version_name, + ProjectVersionChange.rank == 0, + ) + .all() + if not individual_changes + else individual_changes + ) if not individual_changes: logging.error( f"Unable to find rank 0 changes for {checkpoint.rank} for project {project_id}" ) return - merged_data = self.merge_data(individual_changes) - changes.append(self.merge_data(individual_changes)) + merged_history = self.merge_changes( + [ + change + for change in individual_changes + if checkpoint.start < change.version.name <= checkpoint.end + ] + ) + for item in merged_history: + path = item.path + if not is_versioned_file(path): + continue + + file_history = ( + FileHistory.query.join(ProjectFilePath) + .filter( + ProjectFilePath.project_id == project_id, + ProjectFilePath.path == path, + FileHistory.project_version_name <= checkpoint.end, + ) + .order_by(FileHistory.project_version_name.desc()) + .limit(1) + .first() + ) + existing_diff_checkpoint = ( + FileDiff.query.filter( + FileDiff.file_path_id == file_history.file_path_id, + FileDiff.rank == checkpoint.rank, + tuple_(FileDiff.rank, FileDiff.version).in_( + [(item.rank, item.end) for item in expected_checkpoints] + ), + ) + .order_by(FileDiff.version.desc()) + .limit(1) + .first() + ) + if not existing_diff_checkpoint: + base_file = FileHistory.get_basefile( + file_history.file_path_id, checkpoint.end + ) + if not base_file: + continue + diff_path = mergin_secure_filename( + path + "-diff-" + str(uuid.uuid4()) + ) + checkpoint_diff = FileDiff( + basefile=base_file, + path=diff_path, + rank=checkpoint.rank, + version=checkpoint.end, + ) + # patching diff to new rank file name + item.diffs = [ChangeDiffFile(path=diff_path, size=0)] + db.session.add(checkpoint_diff) + db.session.flush() + latest_change = next( + (c for c in individual_changes if c.version.name == checkpoint.end), + None, + ) checkpoint_change = ProjectVersionChange( - version_id=self.version_id, + version_id=latest_change.version_id, rank=checkpoint.rank, - changes=[asdict(c) for c in merged_data], + delta=[ + {**asdict(c), "change": c.change.value} for c in merged_history + ], ) + changes.append(checkpoint_change) + db.session.add(checkpoint_change) db.session.flush() - else: - changes.append(cached_change) - changes = self.merge_data(changes) db.session.commit() + history_data = self.merge_changes(changes) + return history_data + class ProjectVersion(db.Model): id = db.Column(db.Integer, primary_key=True, autoincrement=True) @@ -1111,7 +1198,6 @@ def __init__( .filter(ProjectFilePath.path.in_(changed_files_paths)) .all() } - for item in changes: # get existing DB file reference or create a new one (for added files) db_file = existing_files_map.get( @@ -1140,6 +1226,32 @@ def __init__( else: latest_files_map[fh.path] = fh.id + # cache changes data json for version checkpoints + # rank 0 is for all changes from start to current version + changes_data = [ + ProjectVersionChangeDelta( + path=c.path, + change=c.change, + size=c.size, + checksum=c.checksum, + version=self.to_v_name(name), + diffs=( + [ChangeDiffFile(path=c.diff.path, size=c.diff.size)] + if c.diff + else None + ), + ) + for c in changes + ] + pvc = ProjectVersionChange( + version=self, + rank=0, + delta=ProjectVersionChangeDeltaSchema(many=True).dump(changes_data), + ) + + db.session.add(pvc) + db.session.flush() + # update cached values in project and push to transaction buffer so that self.files is up-to-date self.project.latest_project_files.file_history_ids = latest_files_map.values() db.session.flush() diff --git a/server/mergin/sync/public_api_v2.yaml b/server/mergin/sync/public_api_v2.yaml index d4c87016..bec63d4b 100644 --- a/server/mergin/sync/public_api_v2.yaml +++ b/server/mergin/sync/public_api_v2.yaml @@ -381,6 +381,40 @@ paths: $ref: "#/components/schemas/ProjectLocked" x-openapi-router-controller: mergin.sync.public_api_v2_controller + /projects/{id}/delta: + get: + tags: + - project + summary: Get project changes (delta) between two versions + operationId: get_project_delta + parameters: + - $ref: "#/components/parameters/ProjectId" + - name: since + in: query + required: true + schema: + type: string + example: v1 + description: Start version (exclusive) + - name: to + in: query + required: true + schema: + type: string + example: v2 + description: End version (inclusive) + responses: + "200": + description: Project changes between two versions + content: + application/json: + schema: + $ref: "#/components/schemas/ProjectDelta" + "400": + $ref: "#/components/responses/BadRequest" + "404": + $ref: "#/components/responses/NotFound" + x-openapi-router-controller: mergin.sync.public_api_v2_controller components: responses: NoContent: @@ -800,3 +834,39 @@ components: - editor - writer - owner + ProjectDelta: + type: object + required: + - path + - size + - checksum + - version + - change + properties: + path: + type: string + example: survey.gpkg + size: + type: integer + example: 1024 + checksum: + type: string + example: 9adb76bf81a34880209040ffe5ee262a090b62ab + version: + type: string + example: v2 + change: + type: string + enum: [create, update, delete, update_diff] + example: update + diffs: + type: array + items: + type: object + properties: + path: + type: string + example: survey.gpkg-diff-1 + size: + type: integer + example: 512 diff --git a/server/mergin/sync/public_api_v2_controller.py b/server/mergin/sync/public_api_v2_controller.py index 6bac0ff6..17d89b2b 100644 --- a/server/mergin/sync/public_api_v2_controller.py +++ b/server/mergin/sync/public_api_v2_controller.py @@ -22,7 +22,7 @@ from ..app import db from ..auth import auth_required from ..auth.models import User -from .models import FileDiff, Project, ProjectRole, ProjectMember +from .models import FileDiff, Project, ProjectRole, ProjectMember, ProjectVersionChange from .permissions import ProjectPermissions, require_project_by_uuid from .utils import prepare_download_response from ..app import db @@ -38,7 +38,7 @@ StorageLimitHit, UploadError, ) -from .files import ChangesSchema +from .files import ChangesSchema, ProjectVersionChangeDeltaSchema from .forms import project_name_validation from .models import ( Project, @@ -402,3 +402,31 @@ def upload_chunk(id: str): UploadChunkSchema().dump({"id": chunk_id, "valid_until": valid_until}), 200, ) + + +@auth_required +def get_project_delta(id: str): + """Get project changes (delta) between two versions""" + since = request.args.get("since") + to = request.args.get("to") + if not since or not to: + abort(400, "Missing 'since' or 'to' query parameter") + + project = require_project_by_uuid(id, ProjectPermissions.Read) + since_version = ProjectVersion.from_v_name(since) + to_version = ProjectVersion.from_v_name(to) + + if since_version > to_version: + abort(400, "'since' version must be less than 'to' version") + + to_change = ( + ProjectVersionChange.query.join(ProjectVersion) + .filter( + ProjectVersion.project_id == project.id, + ProjectVersion.name == to_version, + ) + .first_or_404() + ) + changes = to_change.get_delta(since_version) + + return ProjectVersionChangeDeltaSchema(many=True).dump(changes), 200 diff --git a/server/mergin/tests/test_public_api_v2.py b/server/mergin/tests/test_public_api_v2.py index 762f5a59..433ebab0 100644 --- a/server/mergin/tests/test_public_api_v2.py +++ b/server/mergin/tests/test_public_api_v2.py @@ -3,6 +3,7 @@ # SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-MerginMaps-Commercial import os import shutil +from typing import List from unittest.mock import patch import uuid from pygeodiff import GeoDiffLibError @@ -11,7 +12,15 @@ from ..app import db from tests import test_project, test_workspace_id from ..config import Configuration -from ..sync.models import FileDiff, FileHistory, Project, ProjectFilePath, ProjectRole +from ..sync.models import ( + FileDiff, + FileHistory, + Project, + ProjectFilePath, + ProjectRole, + ProjectVersionChange, +) +from ..sync.files import PushChangeType from sqlalchemy.exc import IntegrityError import pytest from datetime import datetime, timedelta, timezone @@ -306,6 +315,82 @@ def test_create_diff_checkpoint(diff_project): assert not os.path.exists(diff.abs_path) +def test_project_version_change_delta(diff_project): + """Test that ProjectVersionChangeData and its schema work as expected""" + version = diff_project.get_latest_version() + assert version.name == 10 + pvcs: List[ProjectVersionChange] = ( + ProjectVersionChange.query.join(ProjectVersion) + .filter(ProjectVersion.project_id == diff_project.id) + .all() + ) + assert len(pvcs) == 10 + initial_pvc = pvcs[0] + assert initial_pvc.get_delta(3) is None + initial_version = initial_pvc.version + assert initial_pvc.rank == 0 + assert initial_pvc.version.id == initial_version.id + assert len(initial_pvc.get_delta()) == len(initial_version.files) + # no ranks created as we get here just first version with get_delta + assert ProjectVersionChange.query.filter_by(rank=1).count() == 0 + second_pvc = pvcs[1] + second_data = second_pvc.get_delta() + assert len(second_data) == 1 + assert second_data[0].change == PushChangeType.DELETE + # no ranks created as we get here just first version with get_delta + assert ProjectVersionChange.query.filter_by(rank=1).count() == 0 + + # delete + create version + create_pvc = pvcs[2] + create_data = create_pvc.get_delta() + assert len(create_data) == 1 + assert create_data[0].change == PushChangeType.CREATE + + # get_delta with rank creation + checkpoint_pvc = pvcs[3] + checkpoint_data = checkpoint_pvc.get_delta() + assert len(checkpoint_data) == 1 + assert checkpoint_data[0].change == PushChangeType.UPDATE_DIFF + + checkpoint_changes = ProjectVersionChange.query.filter_by(rank=1) + filediff_checkpoints = FileDiff.query.filter_by(rank=1) + checkpoint_change = checkpoint_changes.first() + filediff_checkpoint = filediff_checkpoints.first() + + assert checkpoint_changes.count() == 1 + assert checkpoint_change.version_id == checkpoint_pvc.version_id + assert filediff_checkpoints.count() == 1 + assert filediff_checkpoint.version == 4 + # check if filediff basefile is correctly set + assert ( + filediff_checkpoint.basefile_id + == FileHistory.query.filter_by(project_version_name=3).first().id + ) + file_history = FileHistory.query.filter_by(project_version_name=4).first() + assert checkpoint_data[0].version == f"v4" + assert checkpoint_data[0].path == file_history.path + assert checkpoint_data[0].size == file_history.size + assert checkpoint_data[0].checksum == file_history.checksum + assert len(checkpoint_data[0].diffs) == 1 + assert checkpoint_data[0].diffs[0]["path"] == filediff_checkpoint.path + assert checkpoint_data[0].diffs[0]["size"] == 0 + + # get data with multiple ranks = 1 level checkpoints 1-8 + 2 level checkpoint 9-10 + latest_pvc = pvcs[-1] + latest_data = latest_pvc.get_delta() + assert len(latest_data) == 2 + assert latest_data[0].change == PushChangeType.DELETE + assert latest_data[1].change == PushChangeType.CREATE + pv = push_change( + diff_project, "removed", "test.gpkg", diff_project.storage.project_dir + ) + latest_pvc = ProjectVersionChange.query.filter_by(version_id=pv.id).first() + latest_data = latest_pvc.get_delta(pv.name - 3) + assert len(latest_data) == 1 + # test.gpkg is transparent as it was created and deleted in this range + assert not next((c for c in latest_data if c.path == "test.gpkg"), None) + + push_data = [ # success ( @@ -647,3 +732,44 @@ def test_full_push(client): os.path.join(project.storage.project_dir, "v2", test_file["path"]) ) assert not Upload.query.filter_by(project_id=project.id).first() + + +def test_project_delta(client, diff_project): + """Test project delta endpoint""" + response = client.get(f"v2/projects/{diff_project.id}/delta") + assert response.status_code == 400 + + response = client.get(f"v2/projects/{diff_project.id}/delta?since=v1&to=v10") + assert response.status_code == 200 + assert len(response.json) == 10 + assert response.json[0]["version"] == "v1" + assert response.json[-1]["version"] == "v10" + + response = client.get(f"v2/projects/{project.id}/delta?since=v3&to=v7") + assert response.status_code == 200 + assert len(response.json) == 5 + assert response.json[0]["version"] == "v4" + assert response.json[-1]["version"] == "v8" + + response = client.get(f"v2/projects/{project.id}/delta?since=v10") + assert response.status_code == 200 + assert len(response.json) == 0 + + response = client.get(f"v2/projects/{project.id}/delta?to=v1") + assert response.status_code == 200 + assert len(response.json) == 0 + + response = client.get(f"v2/projects/{project.id}/delta?since=v8&to=v12") + assert response.status_code == 422 + assert response.json["code"] == "InvalidVersionRange" + + response = client.get(f"v2/projects/{project.id}/delta?since=3&to=7") + assert response.status_code == 422 + assert response.json["code"] == "InvalidVersionFormat" + + response = client.get(f"v2/projects/{project.id}/delta?since=v3&to=7") + assert response.status_code == 422 + assert response.json["code"] == "InvalidVersionFormat" + + response = client.get(f"v2/projects/9999/delta") + assert response.status_code == 404 diff --git a/server/mergin/tests/utils.py b/server/mergin/tests/utils.py index fc7e4240..89766d24 100644 --- a/server/mergin/tests/utils.py +++ b/server/mergin/tests/utils.py @@ -303,9 +303,12 @@ def push_change(project, action, path, src_dir): current_files = project.files new_version = ProjectVersion.to_v_name(project.next_version()) changes = {"added": [], "updated": [], "removed": []} - metadata = {**file_info(src_dir, path), "location": os.path.join(new_version, path)} if action == "added": + metadata = { + **file_info(src_dir, path), + "location": os.path.join(new_version, path), + } new_file = os.path.join(project.storage.project_dir, metadata["location"]) os.makedirs(os.path.dirname(new_file), exist_ok=True) shutil.copy(os.path.join(src_dir, metadata["path"]), new_file) @@ -349,6 +352,7 @@ def push_change(project, action, path, src_dir): changes["updated"].append(metadata) elif action == "removed": f_removed = next(f for f in current_files if f.path == path) + os.remove(os.path.join(project.storage.project_dir, f_removed.location)) changes["removed"].append(asdict(f_removed)) else: return diff --git a/server/migrations/community/63adc90fca0c_add_project_version_changes.py b/server/migrations/community/63adc90fca0c_add_project_version_changes.py index 83c9f10a..5d7ffe8d 100644 --- a/server/migrations/community/63adc90fca0c_add_project_version_changes.py +++ b/server/migrations/community/63adc90fca0c_add_project_version_changes.py @@ -24,7 +24,7 @@ def upgrade(): sa.Column("id", sa.BigInteger(), autoincrement=True, nullable=False), sa.Column("rank", sa.Integer(), nullable=False), sa.Column("version_id", sa.Integer(), nullable=False), - sa.Column("data", postgresql.JSONB(astext_type=sa.Text()), nullable=False), + sa.Column("delta", postgresql.JSONB(astext_type=sa.Text()), nullable=False), sa.ForeignKeyConstraint( ["version_id"], ["project_version.id"], From 4fb5c83ec75158fc2053eb6c0ccf719a9218bb87 Mon Sep 17 00:00:00 2001 From: "marcel.kocisek" Date: Wed, 8 Oct 2025 18:00:48 +0200 Subject: [PATCH 06/17] extract method for create checkpoint - added tests for pull and checkpoints --- server/mergin/sync/models.py | 225 +++++++++--------- server/mergin/sync/public_api_v2.yaml | 5 +- .../mergin/sync/public_api_v2_controller.py | 35 ++- server/mergin/tests/test_public_api_v2.py | 131 +++++----- 4 files changed, 204 insertions(+), 192 deletions(-) diff --git a/server/mergin/sync/models.py b/server/mergin/sync/models.py index 6b4109bd..5eb1601c 100644 --- a/server/mergin/sync/models.py +++ b/server/mergin/sync/models.py @@ -980,40 +980,118 @@ def merge_changes( result[current_data.path] = current_data return list(result.values()) - def get_delta(self, from_version=1) -> Optional[List[ProjectVersionChangeDelta]]: + @classmethod + def _create_checkpoint( + cls, + project_id: str, + checkpoint: Checkpoint, + individual_changes: List[ProjectVersionChange], + ) -> Optional[ProjectVersionChange]: + """ + Creates and caches a new ProjectVersionChange checkpoint and any required FileDiff checkpoints. + """ + individual_changes_range = [ + change + for change in individual_changes + if checkpoint.start < change.version.name <= checkpoint.end + ] + + if not individual_changes_range: + logging.warning( + f"No individual changes found for project {project_id} in range v{checkpoint.start}-v{checkpoint.end} to create checkpoint." + ) + return None + + merged_deltas = cls.merge_changes(individual_changes_range) + versioned_deltas = [ + item for item in merged_deltas if is_versioned_file(item.path) + ] + + # Pre-fetch data for all versioned files to create FileDiff checkpoints + versioned_file_paths = [delta.path for delta in versioned_deltas] + if versioned_file_paths: + file_paths = ProjectFilePath.query.filter( + ProjectFilePath.project_id == project_id, + ProjectFilePath.path.in_(versioned_file_paths), + ).all() + file_path_map = {fp.path: fp.id for fp in file_paths} + + for item in versioned_deltas: + file_path_id = file_path_map.get(item.path) + if not file_path_id: + continue + + # Check if a FileDiff checkpoint already exists + existing_diff_checkpoint = FileDiff.query.filter_by( + file_path_id=file_path_id, + rank=checkpoint.rank, + version=checkpoint.end, + ).first() + + if not existing_diff_checkpoint: + base_file = FileHistory.get_basefile(file_path_id, checkpoint.end) + if not base_file: + continue + + diff_path = mergin_secure_filename( + f"{item.path}-diff-{uuid.uuid4()}" + ) + checkpoint_diff = FileDiff( + basefile=base_file, + path=diff_path, + rank=checkpoint.rank, + version=checkpoint.end, + ) + # Patch the delta with the path to the new diff checkpoint + item.diffs = [ChangeDiffFile(path=diff_path, size=0)] + db.session.add(checkpoint_diff) + + checkpoint_change = ProjectVersionChange( + version_id=individual_changes_range[-1].version_id, + rank=checkpoint.rank, + delta=[{**asdict(c), "change": c.change.value} for c in merged_deltas], + ) + db.session.add(checkpoint_change) + return checkpoint_change + + @classmethod + def get_delta( + cls, project_id: str, since: int, to: int + ) -> Optional[List[ProjectVersionChangeDelta]]: """ Get changes between two versions, merging them if needed. + - create FileDiff checkpoints if needed + - create ProjectVersionChange checkpoints if needed with delta json """ - version_name = self.version.name - project_id = self.version.project_id - if from_version > version_name: + if since > to: logging.error( - f"Start version {from_version} is higher than end version {version_name} - broken history" + f"Start version {since} is higher than end version {to} - broken history" ) return - - if from_version == version_name: + if since == to: # Return only changes for this version - changes = ( - ProjectVersionChange.query.join( - ProjectVersion, ProjectVersionChange.version_id == ProjectVersion.id - ) + change = ( + ProjectVersionChange.query.join(ProjectVersion) .filter( ProjectVersion.project_id == project_id, - ProjectVersion.name == version_name, + ProjectVersion.name == to, + ProjectVersionChange.rank == 0, ) - .order_by(ProjectVersion.name) - .all() + .first() + ) + return ( + [ProjectVersionChangeDeltaSchema().load(item) for item in change.delta] + if change + else [] ) - return self.merge_changes(changes) - expected_checkpoints = Checkpoint.get_checkpoints(from_version, version_name) + expected_checkpoints = Checkpoint.get_checkpoints(since + 1, to) expected_changes: List[ProjectVersionChange] = ( ProjectVersionChange.query.join(ProjectVersion) .filter( ProjectVersion.project_id == project_id, - ProjectVersion.name > from_version, - ProjectVersion.name <= version_name, + ProjectVersion.name > since, + ProjectVersion.name <= to, tuple_(ProjectVersionChange.rank, ProjectVersion.name).in_( [(item.rank, item.end) for item in expected_checkpoints] ), @@ -1021,115 +1099,46 @@ def get_delta(self, from_version=1) -> Optional[List[ProjectVersionChangeDelta]] .order_by(ProjectVersion.name) .all() ) + existing_changes_map = {(c.rank, c.version.name): c for c in expected_changes} + + # Cache all individual (rank 0) changes in the required range. individual_changes: List[ProjectVersionChange] = [] changes = [] for checkpoint in expected_checkpoints: - # find checkpoint change in already cached changes or any zero rank change needed - expected_change = next( - ( - c - for c in expected_changes - if c.rank == checkpoint.rank and c.version.name == checkpoint.end - ), - None, + expected_change = existing_changes_map.get( + (checkpoint.rank, checkpoint.end) ) + if expected_change: changes.append(expected_change) continue if checkpoint.rank > 0: - # Filter all changes that are 0 rank and are in the range of the checkpoint, cache them to prevent multiple queries individual_changes = ( - ProjectVersionChange.query.join(ProjectVersion) - .filter( - ProjectVersion.project_id == project_id, - ProjectVersion.name > from_version, - ProjectVersion.name <= version_name, - ProjectVersionChange.rank == 0, - ) - .all() - if not individual_changes - else individual_changes - ) - if not individual_changes: - logging.error( - f"Unable to find rank 0 changes for {checkpoint.rank} for project {project_id}" - ) - return - merged_history = self.merge_changes( - [ - change - for change in individual_changes - if checkpoint.start < change.version.name <= checkpoint.end - ] - ) - for item in merged_history: - path = item.path - if not is_versioned_file(path): - continue - - file_history = ( - FileHistory.query.join(ProjectFilePath) + ( + ProjectVersionChange.query.join(ProjectVersion) .filter( - ProjectFilePath.project_id == project_id, - ProjectFilePath.path == path, - FileHistory.project_version_name <= checkpoint.end, - ) - .order_by(FileHistory.project_version_name.desc()) - .limit(1) - .first() - ) - existing_diff_checkpoint = ( - FileDiff.query.filter( - FileDiff.file_path_id == file_history.file_path_id, - FileDiff.rank == checkpoint.rank, - tuple_(FileDiff.rank, FileDiff.version).in_( - [(item.rank, item.end) for item in expected_checkpoints] - ), + ProjectVersion.project_id == project_id, + ProjectVersion.name > since, + ProjectVersion.name <= to, + ProjectVersionChange.rank == 0, ) - .order_by(FileDiff.version.desc()) - .limit(1) - .first() + .all() ) - if not existing_diff_checkpoint: - base_file = FileHistory.get_basefile( - file_history.file_path_id, checkpoint.end - ) - if not base_file: - continue - diff_path = mergin_secure_filename( - path + "-diff-" + str(uuid.uuid4()) - ) - checkpoint_diff = FileDiff( - basefile=base_file, - path=diff_path, - rank=checkpoint.rank, - version=checkpoint.end, - ) - # patching diff to new rank file name - item.diffs = [ChangeDiffFile(path=diff_path, size=0)] - db.session.add(checkpoint_diff) - db.session.flush() - latest_change = next( - (c for c in individual_changes if c.version.name == checkpoint.end), - None, + if not individual_changes + else individual_changes ) - checkpoint_change = ProjectVersionChange( - version_id=latest_change.version_id, - rank=checkpoint.rank, - delta=[ - {**asdict(c), "change": c.change.value} for c in merged_history - ], + new_checkpoint = cls._create_checkpoint( + project_id, checkpoint, individual_changes ) - changes.append(checkpoint_change) + if new_checkpoint: + changes.append(new_checkpoint) - db.session.add(checkpoint_change) - db.session.flush() db.session.commit() - history_data = self.merge_changes(changes) - return history_data + deltas = cls.merge_changes(changes) + return deltas class ProjectVersion(db.Model): diff --git a/server/mergin/sync/public_api_v2.yaml b/server/mergin/sync/public_api_v2.yaml index bec63d4b..c3f6d15a 100644 --- a/server/mergin/sync/public_api_v2.yaml +++ b/server/mergin/sync/public_api_v2.yaml @@ -409,7 +409,9 @@ paths: content: application/json: schema: - $ref: "#/components/schemas/ProjectDelta" + type: array + items: + $ref: "#/components/schemas/ProjectDelta" "400": $ref: "#/components/responses/BadRequest" "404": @@ -861,6 +863,7 @@ components: example: update diffs: type: array + nullable: true items: type: object properties: diff --git a/server/mergin/sync/public_api_v2_controller.py b/server/mergin/sync/public_api_v2_controller.py index 17d89b2b..3a2ec924 100644 --- a/server/mergin/sync/public_api_v2_controller.py +++ b/server/mergin/sync/public_api_v2_controller.py @@ -407,26 +407,23 @@ def upload_chunk(id: str): @auth_required def get_project_delta(id: str): """Get project changes (delta) between two versions""" - since = request.args.get("since") - to = request.args.get("to") - if not since or not to: - abort(400, "Missing 'since' or 'to' query parameter") - + since = ProjectVersion.from_v_name(request.args.get("since")) + to = ProjectVersion.from_v_name(request.args.get("to")) project = require_project_by_uuid(id, ProjectPermissions.Read) - since_version = ProjectVersion.from_v_name(since) - to_version = ProjectVersion.from_v_name(to) - - if since_version > to_version: + if since < 0 or to < 0: + abort(400, "Invalid 'since' or 'to' version") + if since > to: abort(400, "'since' version must be less than 'to' version") - to_change = ( - ProjectVersionChange.query.join(ProjectVersion) - .filter( - ProjectVersion.project_id == project.id, - ProjectVersion.name == to_version, - ) - .first_or_404() - ) - changes = to_change.get_delta(since_version) + ProjectVersion.query.filter( + ProjectVersion.project_id == project.id, + ProjectVersion.name == since, + ).first_or_404() + ProjectVersion.query.filter( + ProjectVersion.project_id == project.id, + ProjectVersion.name == to, + ).first_or_404() + + delta = ProjectVersionChange.get_delta(project.id, since, to) - return ProjectVersionChangeDeltaSchema(many=True).dump(changes), 200 + return ProjectVersionChangeDeltaSchema(many=True).dump(delta), 200 diff --git a/server/mergin/tests/test_public_api_v2.py b/server/mergin/tests/test_public_api_v2.py index 433ebab0..ed6d8912 100644 --- a/server/mergin/tests/test_public_api_v2.py +++ b/server/mergin/tests/test_public_api_v2.py @@ -318,7 +318,9 @@ def test_create_diff_checkpoint(diff_project): def test_project_version_change_delta(diff_project): """Test that ProjectVersionChangeData and its schema work as expected""" version = diff_project.get_latest_version() + project_id = diff_project.id assert version.name == 10 + assert ProjectVersionChange.get_delta(project_id, 2, 1) is None pvcs: List[ProjectVersionChange] = ( ProjectVersionChange.query.join(ProjectVersion) .filter(ProjectVersion.project_id == diff_project.id) @@ -326,39 +328,41 @@ def test_project_version_change_delta(diff_project): ) assert len(pvcs) == 10 initial_pvc = pvcs[0] - assert initial_pvc.get_delta(3) is None initial_version = initial_pvc.version assert initial_pvc.rank == 0 assert initial_pvc.version.id == initial_version.id - assert len(initial_pvc.get_delta()) == len(initial_version.files) + # if version is the same, return just its delta in v1 + assert len(ProjectVersionChange.get_delta(project_id, 1, 1)) == len( + initial_version.files + ) # no ranks created as we get here just first version with get_delta assert ProjectVersionChange.query.filter_by(rank=1).count() == 0 - second_pvc = pvcs[1] - second_data = second_pvc.get_delta() - assert len(second_data) == 1 - assert second_data[0].change == PushChangeType.DELETE + + delta = ProjectVersionChange.get_delta(project_id, 1, 2) + assert len(delta) == 1 + assert delta[0].change == PushChangeType.DELETE # no ranks created as we get here just first version with get_delta assert ProjectVersionChange.query.filter_by(rank=1).count() == 0 # delete + create version - create_pvc = pvcs[2] - create_data = create_pvc.get_delta() - assert len(create_data) == 1 - assert create_data[0].change == PushChangeType.CREATE - - # get_delta with rank creation - checkpoint_pvc = pvcs[3] - checkpoint_data = checkpoint_pvc.get_delta() - assert len(checkpoint_data) == 1 - assert checkpoint_data[0].change == PushChangeType.UPDATE_DIFF + delta = ProjectVersionChange.get_delta(project_id, 1, 3) + assert len(delta) == 1 + assert delta[0].change == PushChangeType.CREATE + + # get_delta with update diff + delta = ProjectVersionChange.get_delta(project_id, 1, 4) + assert len(delta) == 1 + assert delta[0].change == PushChangeType.UPDATE_DIFF + assert ProjectVersionChange.query.filter_by(rank=1).count() == 0 + # create rank 1 checkpoint for v4 + delta = ProjectVersionChange.get_delta(project_id, 0, 4) checkpoint_changes = ProjectVersionChange.query.filter_by(rank=1) filediff_checkpoints = FileDiff.query.filter_by(rank=1) checkpoint_change = checkpoint_changes.first() filediff_checkpoint = filediff_checkpoints.first() - assert checkpoint_changes.count() == 1 - assert checkpoint_change.version_id == checkpoint_pvc.version_id + assert checkpoint_change.version_id == pvcs[3].version_id assert filediff_checkpoints.count() == 1 assert filediff_checkpoint.version == 4 # check if filediff basefile is correctly set @@ -367,28 +371,29 @@ def test_project_version_change_delta(diff_project): == FileHistory.query.filter_by(project_version_name=3).first().id ) file_history = FileHistory.query.filter_by(project_version_name=4).first() - assert checkpoint_data[0].version == f"v4" - assert checkpoint_data[0].path == file_history.path - assert checkpoint_data[0].size == file_history.size - assert checkpoint_data[0].checksum == file_history.checksum - assert len(checkpoint_data[0].diffs) == 1 - assert checkpoint_data[0].diffs[0]["path"] == filediff_checkpoint.path - assert checkpoint_data[0].diffs[0]["size"] == 0 - - # get data with multiple ranks = 1 level checkpoints 1-8 + 2 level checkpoint 9-10 - latest_pvc = pvcs[-1] - latest_data = latest_pvc.get_delta() - assert len(latest_data) == 2 - assert latest_data[0].change == PushChangeType.DELETE - assert latest_data[1].change == PushChangeType.CREATE + assert delta[0].change == PushChangeType.UPDATE_DIFF + assert delta[0].version == "v4" + assert delta[0].path == file_history.path + assert delta[0].size == file_history.size + assert delta[0].checksum == file_history.checksum + assert len(delta[0].diffs) == 1 + assert delta[0].diffs[0]["path"] == filediff_checkpoint.path + assert delta[0].diffs[0]["size"] == 0 + + # get data with multiple ranks = 1 level checkpoints 1-4, 5-8 + checkpoint 9 and 10 + delta = ProjectVersionChange.get_delta(project_id, 0, 10) + assert len(delta) == 2 + assert delta[0].change == PushChangeType.DELETE + assert delta[1].change == PushChangeType.CREATE + assert ProjectVersionChange.query.filter_by(rank=1).count() == 2 + pv = push_change( diff_project, "removed", "test.gpkg", diff_project.storage.project_dir ) - latest_pvc = ProjectVersionChange.query.filter_by(version_id=pv.id).first() - latest_data = latest_pvc.get_delta(pv.name - 3) - assert len(latest_data) == 1 + delta = ProjectVersionChange.get_delta(project_id, pv.name - 3, pv.name) + assert len(delta) == 1 # test.gpkg is transparent as it was created and deleted in this range - assert not next((c for c in latest_data if c.path == "test.gpkg"), None) + assert not next((c for c in delta if c.path == "test.gpkg"), None) push_data = [ @@ -739,37 +744,35 @@ def test_project_delta(client, diff_project): response = client.get(f"v2/projects/{diff_project.id}/delta") assert response.status_code == 400 + response = client.get(f"v2/projects/{diff_project.id}/delta?since=v-1&to=v1") + assert response.status_code == 400 + + response = client.get(f"v2/projects/{diff_project.id}/delta?since=v1000&to=v2000") + assert response.status_code == 404 + response = client.get(f"v2/projects/{diff_project.id}/delta?since=v1&to=v10") assert response.status_code == 200 - assert len(response.json) == 10 - assert response.json[0]["version"] == "v1" - assert response.json[-1]["version"] == "v10" + assert response.json[0]["change"] == PushChangeType.DELETE.value + assert response.json[1]["change"] == PushChangeType.CREATE.value - response = client.get(f"v2/projects/{project.id}/delta?since=v3&to=v7") - assert response.status_code == 200 - assert len(response.json) == 5 - assert response.json[0]["version"] == "v4" - assert response.json[-1]["version"] == "v8" - response = client.get(f"v2/projects/{project.id}/delta?since=v10") +# integration test for pull mechanism +def test_project_pull(client, diff_project): + response = client.get(f"v2/projects/{diff_project.id}/delta?since=v4&to=v8") assert response.status_code == 200 - assert len(response.json) == 0 - - response = client.get(f"v2/projects/{project.id}/delta?to=v1") + delta = response.json + assert len(delta) == 1 + + # simulate pull of delta[0] + assert delta[0]["change"] == PushChangeType.UPDATE_DIFF.value + assert delta[0]["version"] == "v7" + assert len(delta[0]["diffs"]) == 1 + diff = delta[0]["diffs"][0] + assert diff["path"].startswith("base.gpkg-") + assert diff["size"] == 0 # empty diff as in test data + response = client.get(f"v2/projects/{diff_project.id}/raw/diff/{diff['path']}") assert response.status_code == 200 - assert len(response.json) == 0 - - response = client.get(f"v2/projects/{project.id}/delta?since=v8&to=v12") - assert response.status_code == 422 - assert response.json["code"] == "InvalidVersionRange" - - response = client.get(f"v2/projects/{project.id}/delta?since=3&to=7") - assert response.status_code == 422 - assert response.json["code"] == "InvalidVersionFormat" - - response = client.get(f"v2/projects/{project.id}/delta?since=v3&to=7") - assert response.status_code == 422 - assert response.json["code"] == "InvalidVersionFormat" - - response = client.get(f"v2/projects/9999/delta") - assert response.status_code == 404 + created_diff = FileDiff.query.filter_by(path=diff["path"]).first() + assert created_diff and os.path.exists(created_diff.abs_path) + assert created_diff.size > 0 + assert created_diff.checksum From 63100a68144b9970616668e0c2b3c8d965e5141b Mon Sep 17 00:00:00 2001 From: "marcel.kocisek" Date: Thu, 9 Oct 2025 18:13:48 +0200 Subject: [PATCH 07/17] Final fixes and changing schema - accept numbers in endpoint - consider to store diff instead of diffs in db --- server/mergin/sync/files.py | 81 ++++++-- server/mergin/sync/models.py | 180 +++++++++--------- server/mergin/sync/public_api_v2.yaml | 15 +- .../mergin/sync/public_api_v2_controller.py | 10 +- server/mergin/tests/test_public_api_v2.py | 116 +++++++---- ...3adc90fca0c_add_project_version_changes.py | 38 ++++ 6 files changed, 291 insertions(+), 149 deletions(-) diff --git a/server/mergin/sync/files.py b/server/mergin/sync/files.py index 7cc49e9a..9838fba6 100644 --- a/server/mergin/sync/files.py +++ b/server/mergin/sync/files.py @@ -4,7 +4,7 @@ import datetime from enum import Enum import os -from dataclasses import dataclass +from dataclasses import dataclass, field from typing import Optional, List import uuid from flask import current_app @@ -244,36 +244,91 @@ def patch_field(self, data, **kwargs): @dataclass -class ChangeDiffFile: +class DeltaDiffFile: path: str - size: Optional[int] = None -class ChangeDiffFileSchema(ma.Schema): +class DeltaDiffFileSchema(ma.Schema): path = fields.String(required=True) - size = fields.Integer(required=False) @dataclass -class ProjectVersionChangeDelta(File): +class DeltaBase(File): change: PushChangeType - version: str - diffs: Optional[List[ChangeDiffFile]] = None + version: int -class ProjectVersionChangeDeltaSchema(ma.Schema): - """Schema for changes data in ProjectVersionChange changes column""" +@dataclass +class DeltaMerged(DeltaBase): + diffs: List[DeltaDiffFile] = field(default_factory=list) + + def to_data_delta(self): + """Convert DeltaMerged to DeltaData with single diff""" + result = DeltaData( + path=self.path, + size=self.size, + checksum=self.checksum, + change=self.change, + version=self.version, + ) + if self.diffs: + result.diff = self.diffs[0].path + return result + + +@dataclass +class DeltaData(File): + """Delta data stored in database""" + + change: PushChangeType + version: int + diff: Optional[str] = None + + def to_merged_delta(self) -> DeltaMerged: + """Convert DeltaData to DeltaMerged with multiple diffs""" + result = DeltaMerged( + path=self.path, + size=self.size, + checksum=self.checksum, + change=self.change, + version=self.version, + ) + if self.diff: + result.diffs = [DeltaDiffFile(path=self.diff)] + return result + + +class DeltaBaseSchema(ma.Schema): + """Base schema for detla json and response from delta endpoint""" path = fields.String(required=True) size = fields.Integer(required=True) checksum = fields.String(required=True) - version = fields.String(required=True) - diffs = fields.List(fields.Nested(ChangeDiffFileSchema())) + version = fields.Integer(required=True) change = fields.Enum(PushChangeType, by_value=True, required=True) + +class DeltaDataSchema(DeltaBaseSchema): + """Schema for delta data in database""" + + diff = fields.String(required=False) + @post_load def make_object(self, data, **kwargs): - return ProjectVersionChangeDelta(**data) + return DeltaData(**data) + + @post_dump + def patch_field(self, data, **kwargs): + # drop 'diff' key entirely if empty or None as database would expect + if not data.get("diff"): + data.pop("diff", None) + return data + + +class DeltaRespSchema(DeltaBaseSchema): + """Schema for delta data response""" + + diffs = fields.List(fields.Nested(DeltaDiffFileSchema())) @post_dump def patch_field(self, data, **kwargs): diff --git a/server/mergin/sync/models.py b/server/mergin/sync/models.py index 5eb1601c..0d3634e5 100644 --- a/server/mergin/sync/models.py +++ b/server/mergin/sync/models.py @@ -24,9 +24,10 @@ from flask import current_app from .files import ( - ChangeDiffFile, - ProjectVersionChangeDelta, - ProjectVersionChangeDeltaSchema, + DeltaMerged, + DeltaDiffFile, + DeltaData, + DeltaDataSchema, ProjectDiffFile, ProjectFileChange, ChangesSchema, @@ -932,52 +933,49 @@ class ProjectVersionChange(db.Model): ) @staticmethod - def merge_changes( - changes: List[ProjectVersionChange], - ) -> List[ProjectVersionChangeDelta]: + def merge_delta_items( + items: List[DeltaData], + ) -> List[DeltaMerged]: """ - Merge multiple changes jsons into one list of changes. + Merge multiple changes json array objects into one list of changes. Changes are merged based on file path and change type. """ - result: Dict[str, ProjectVersionChangeDelta] = {} - for change in changes: - for item in change.delta: - current_data: ProjectVersionChangeDelta = ( - ProjectVersionChangeDeltaSchema().load(item) - ) - existing_data = result.get(current_data.path) - path = current_data.path - if existing_data: - # merge changes data jsons - if existing_data.change == PushChangeType.CREATE: - if current_data.change == PushChangeType.DELETE: - # create + delete = nothing - del result[path] - elif current_data.change in ( - PushChangeType.UPDATE.value, - PushChangeType.UPDATE_DIFF.value, - ): - # create + update = create with updated info - current_data.change = existing_data.change - current_data.diffs = None - else: - result[path] = current_data - elif existing_data.change == PushChangeType.UPDATE: - if current_data.change == PushChangeType.UPDATE_DIFF: - # update + update_diff = update_diff with latest info - current_data.change = existing_data.change - current_data.diffs = None - result[path] = current_data - elif existing_data.change == PushChangeType.UPDATE_DIFF.value: - if current_data.change == PushChangeType.UPDATE_DIFF.value: - # update_diff + update_diff = update_diff with latest info - current_data.diffs.extend(existing_data.diffs or []) - result[path] = current_data + result: Dict[str, DeltaMerged] = {} + for item in items: + current = item.to_merged_delta() + existing = result.get(current.path) + path = current.path + if existing: + # merge changes data jsons + if existing.change == PushChangeType.CREATE: + if current.change == PushChangeType.DELETE: + # create + delete = nothing + del result[path] + elif current.change in ( + PushChangeType.UPDATE, + PushChangeType.UPDATE_DIFF, + ): + # create + update = create with updated info + current.change = existing.change + current.diffs = [] else: - # delete + anything = anything - result[path] = current_data + result[path] = current + elif existing.change == PushChangeType.UPDATE: + if current.change == PushChangeType.UPDATE_DIFF: + # update + update_diff = update_diff with latest info + current.change = existing.change + current.diffs = [] + result[path] = current + elif existing.change == PushChangeType.UPDATE_DIFF: + if current.change == PushChangeType.UPDATE_DIFF: + # update_diff + update_diff = update_diff with latest info + current.diffs.extend(existing.diffs or []) + result[path] = current else: - result[current_data.path] = current_data + # delete + anything = anything + result[path] = current + else: + result[current.path] = current return list(result.values()) @classmethod @@ -985,30 +983,37 @@ def _create_checkpoint( cls, project_id: str, checkpoint: Checkpoint, - individual_changes: List[ProjectVersionChange], + changes: List[ProjectVersionChange] = [], ) -> Optional[ProjectVersionChange]: """ Creates and caches a new ProjectVersionChange checkpoint and any required FileDiff checkpoints. """ - individual_changes_range = [ + changes_range = [ change - for change in individual_changes - if checkpoint.start < change.version.name <= checkpoint.end + for change in changes + if checkpoint.start <= change.version.name <= checkpoint.end ] - if not individual_changes_range: + if not changes_range: logging.warning( f"No individual changes found for project {project_id} in range v{checkpoint.start}-v{checkpoint.end} to create checkpoint." ) return None - merged_deltas = cls.merge_changes(individual_changes_range) - versioned_deltas = [ - item for item in merged_deltas if is_versioned_file(item.path) + # dump delta objects from database and flatten list for merging + deltas = [] + for change in changes_range: + delta_items = DeltaDataSchema(many=True).load(change.delta) + deltas.extend(delta_items) + merged_delta_items: List[DeltaData] = [ + d.to_data_delta() for d in cls.merge_delta_items(deltas) ] # Pre-fetch data for all versioned files to create FileDiff checkpoints - versioned_file_paths = [delta.path for delta in versioned_deltas] + versioned_delta_items = [ + item for item in merged_delta_items if is_versioned_file(item.path) + ] + versioned_file_paths = [delta.path for delta in versioned_delta_items] if versioned_file_paths: file_paths = ProjectFilePath.query.filter( ProjectFilePath.project_id == project_id, @@ -1016,7 +1021,7 @@ def _create_checkpoint( ).all() file_path_map = {fp.path: fp.id for fp in file_paths} - for item in versioned_deltas: + for item in versioned_delta_items: file_path_id = file_path_map.get(item.path) if not file_path_id: continue @@ -1043,21 +1048,35 @@ def _create_checkpoint( version=checkpoint.end, ) # Patch the delta with the path to the new diff checkpoint - item.diffs = [ChangeDiffFile(path=diff_path, size=0)] + if item.change == PushChangeType.UPDATE_DIFF: + item.diff = diff_path db.session.add(checkpoint_diff) checkpoint_change = ProjectVersionChange( - version_id=individual_changes_range[-1].version_id, + version_id=changes_range[-1].version_id, rank=checkpoint.rank, - delta=[{**asdict(c), "change": c.change.value} for c in merged_deltas], + delta=DeltaDataSchema(many=True).dump(merged_delta_items), ) db.session.add(checkpoint_change) + db.session.commit() return checkpoint_change + @classmethod + def query_changes(cls, project_id, since, to, rank=None): + """Query changes with specified parameters""" + query = cls.query.join(ProjectVersion).filter( + ProjectVersion.project_id == project_id, + ProjectVersion.name >= since, + ProjectVersion.name <= to, + ) + if rank is not None: + query = query.filter(ProjectVersionChange.rank == rank) + return query.order_by(ProjectVersion.name).all() + @classmethod def get_delta( cls, project_id: str, since: int, to: int - ) -> Optional[List[ProjectVersionChangeDelta]]: + ) -> Optional[List[DeltaMerged]]: """ Get changes between two versions, merging them if needed. - create FileDiff checkpoints if needed @@ -1079,11 +1098,7 @@ def get_delta( ) .first() ) - return ( - [ProjectVersionChangeDeltaSchema().load(item) for item in change.delta] - if change - else [] - ) + return [DeltaMerged(**item) for item in change.delta] if change else None expected_checkpoints = Checkpoint.get_checkpoints(since + 1, to) expected_changes: List[ProjectVersionChange] = ( @@ -1104,28 +1119,21 @@ def get_delta( # Cache all individual (rank 0) changes in the required range. individual_changes: List[ProjectVersionChange] = [] - changes = [] + result: List[DeltaData] = [] for checkpoint in expected_checkpoints: expected_change = existing_changes_map.get( (checkpoint.rank, checkpoint.end) ) + # we have change in database, just return delta data from it if expected_change: - changes.append(expected_change) + deltas = DeltaDataSchema(many=True).load(expected_change.delta) + result.extend(deltas) continue if checkpoint.rank > 0: individual_changes = ( - ( - ProjectVersionChange.query.join(ProjectVersion) - .filter( - ProjectVersion.project_id == project_id, - ProjectVersion.name > since, - ProjectVersion.name <= to, - ProjectVersionChange.rank == 0, - ) - .all() - ) + cls.query_changes(project_id, checkpoint.start, checkpoint.end, 0) if not individual_changes else individual_changes ) @@ -1133,12 +1141,10 @@ def get_delta( project_id, checkpoint, individual_changes ) if new_checkpoint: - changes.append(new_checkpoint) + deltas = DeltaDataSchema(many=True).load(new_checkpoint.delta) + result.extend(deltas) - db.session.commit() - - deltas = cls.merge_changes(changes) - return deltas + return cls.merge_delta_items(result) class ProjectVersion(db.Model): @@ -1237,25 +1243,21 @@ def __init__( # cache changes data json for version checkpoints # rank 0 is for all changes from start to current version - changes_data = [ - ProjectVersionChangeDelta( + delta_data = [ + DeltaData( path=c.path, change=c.change, size=c.size, checksum=c.checksum, - version=self.to_v_name(name), - diffs=( - [ChangeDiffFile(path=c.diff.path, size=c.diff.size)] - if c.diff - else None - ), + version=name, + diff=c.diff.path if c.diff else None, ) for c in changes ] pvc = ProjectVersionChange( version=self, rank=0, - delta=ProjectVersionChangeDeltaSchema(many=True).dump(changes_data), + delta=DeltaDataSchema(many=True).dump(delta_data), ) db.session.add(pvc) diff --git a/server/mergin/sync/public_api_v2.yaml b/server/mergin/sync/public_api_v2.yaml index c3f6d15a..4cd045c5 100644 --- a/server/mergin/sync/public_api_v2.yaml +++ b/server/mergin/sync/public_api_v2.yaml @@ -393,15 +393,15 @@ paths: in: query required: true schema: - type: string - example: v1 + type: integer + example: 1 description: Start version (exclusive) - name: to in: query required: true schema: - type: string - example: v2 + type: integer + example: 2 description: End version (inclusive) responses: "200": @@ -855,8 +855,8 @@ components: type: string example: 9adb76bf81a34880209040ffe5ee262a090b62ab version: - type: string - example: v2 + type: integer + example: 1 change: type: string enum: [create, update, delete, update_diff] @@ -870,6 +870,3 @@ components: path: type: string example: survey.gpkg-diff-1 - size: - type: integer - example: 512 diff --git a/server/mergin/sync/public_api_v2_controller.py b/server/mergin/sync/public_api_v2_controller.py index 3a2ec924..a63332c4 100644 --- a/server/mergin/sync/public_api_v2_controller.py +++ b/server/mergin/sync/public_api_v2_controller.py @@ -38,7 +38,7 @@ StorageLimitHit, UploadError, ) -from .files import ChangesSchema, ProjectVersionChangeDeltaSchema +from .files import ChangesSchema, DeltaRespSchema from .forms import project_name_validation from .models import ( Project, @@ -407,8 +407,8 @@ def upload_chunk(id: str): @auth_required def get_project_delta(id: str): """Get project changes (delta) between two versions""" - since = ProjectVersion.from_v_name(request.args.get("since")) - to = ProjectVersion.from_v_name(request.args.get("to")) + since = int(request.args.get("since")) + to = int(request.args.get("to")) project = require_project_by_uuid(id, ProjectPermissions.Read) if since < 0 or to < 0: abort(400, "Invalid 'since' or 'to' version") @@ -424,6 +424,6 @@ def get_project_delta(id: str): ProjectVersion.name == to, ).first_or_404() - delta = ProjectVersionChange.get_delta(project.id, since, to) + deltas = ProjectVersionChange.get_delta(project.id, since, to) - return ProjectVersionChangeDeltaSchema(many=True).dump(delta), 200 + return DeltaRespSchema(many=True).dump(deltas), 200 diff --git a/server/mergin/tests/test_public_api_v2.py b/server/mergin/tests/test_public_api_v2.py index ed6d8912..83d31b72 100644 --- a/server/mergin/tests/test_public_api_v2.py +++ b/server/mergin/tests/test_public_api_v2.py @@ -8,7 +8,15 @@ import uuid from pygeodiff import GeoDiffLibError -from .utils import add_user, diffs_are_equal, execute_query, push_change +from .utils import ( + add_user, + create_project, + create_workspace, + diffs_are_equal, + execute_query, + login_as_admin, + push_change, +) from ..app import db from tests import test_project, test_workspace_id from ..config import Configuration @@ -21,6 +29,7 @@ ProjectVersionChange, ) from ..sync.files import PushChangeType +from ..sync.utils import is_versioned_file from sqlalchemy.exc import IntegrityError import pytest from datetime import datetime, timedelta, timezone @@ -317,9 +326,9 @@ def test_create_diff_checkpoint(diff_project): def test_project_version_change_delta(diff_project): """Test that ProjectVersionChangeData and its schema work as expected""" - version = diff_project.get_latest_version() + latest_version = diff_project.get_latest_version() project_id = diff_project.id - assert version.name == 10 + assert latest_version.name == 10 assert ProjectVersionChange.get_delta(project_id, 2, 1) is None pvcs: List[ProjectVersionChange] = ( ProjectVersionChange.query.join(ProjectVersion) @@ -352,40 +361,47 @@ def test_project_version_change_delta(diff_project): # get_delta with update diff delta = ProjectVersionChange.get_delta(project_id, 1, 4) assert len(delta) == 1 - assert delta[0].change == PushChangeType.UPDATE_DIFF + assert delta[0].change == PushChangeType.CREATE assert ProjectVersionChange.query.filter_by(rank=1).count() == 0 # create rank 1 checkpoint for v4 delta = ProjectVersionChange.get_delta(project_id, 0, 4) + fh = FileHistory.query.filter_by(project_version_name=3).first() checkpoint_changes = ProjectVersionChange.query.filter_by(rank=1) filediff_checkpoints = FileDiff.query.filter_by(rank=1) checkpoint_change = checkpoint_changes.first() - filediff_checkpoint = filediff_checkpoints.first() + # find checkpoint for base gpkg + base_gpkg_checkpoint = FileDiff.query.filter_by(basefile_id=fh.id, rank=1).first() assert checkpoint_changes.count() == 1 assert checkpoint_change.version_id == pvcs[3].version_id - assert filediff_checkpoints.count() == 1 - assert filediff_checkpoint.version == 4 - # check if filediff basefile is correctly set - assert ( - filediff_checkpoint.basefile_id - == FileHistory.query.filter_by(project_version_name=3).first().id + assert filediff_checkpoints.count() == len( + [file for file in initial_version.files if is_versioned_file(file.path)] ) + assert base_gpkg_checkpoint.version == 4 + # check if filediff basefile is correctly set + assert base_gpkg_checkpoint.basefile_id == fh.id file_history = FileHistory.query.filter_by(project_version_name=4).first() - assert delta[0].change == PushChangeType.UPDATE_DIFF - assert delta[0].version == "v4" - assert delta[0].path == file_history.path - assert delta[0].size == file_history.size - assert delta[0].checksum == file_history.checksum - assert len(delta[0].diffs) == 1 - assert delta[0].diffs[0]["path"] == filediff_checkpoint.path - assert delta[0].diffs[0]["size"] == 0 + assert len(delta) == len(initial_version.files) + delta_base_gpkg = [d for d in delta if d.path == "base.gpkg"] + assert len(delta_base_gpkg) == 1 + # from history is clear, that we are just creating geopackage in this range + assert delta_base_gpkg[0].change == PushChangeType.CREATE + assert delta_base_gpkg[0].version == 3 + assert delta_base_gpkg[0].path == file_history.path + assert delta_base_gpkg[0].size == file_history.size + assert delta_base_gpkg[0].checksum == file_history.checksum + assert len(delta_base_gpkg[0].diffs) == 1 + assert delta_base_gpkg[0].diffs[0].path == base_gpkg_checkpoint.path # get data with multiple ranks = 1 level checkpoints 1-4, 5-8 + checkpoint 9 and 10 delta = ProjectVersionChange.get_delta(project_id, 0, 10) - assert len(delta) == 2 - assert delta[0].change == PushChangeType.DELETE - assert delta[1].change == PushChangeType.CREATE + assert len(delta) == len(latest_version.files) + delta_test_gpkg = [d for d in delta if d.path == "test.gpkg"] + assert delta_test_gpkg + assert delta_test_gpkg[0].change == PushChangeType.CREATE assert ProjectVersionChange.query.filter_by(rank=1).count() == 2 + # base gpgk is transparent + assert not next((c for c in delta if c.path == "base.gpkg"), None) pv = push_change( diff_project, "removed", "test.gpkg", diff_project.storage.project_dir @@ -394,6 +410,16 @@ def test_project_version_change_delta(diff_project): assert len(delta) == 1 # test.gpkg is transparent as it was created and deleted in this range assert not next((c for c in delta if c.path == "test.gpkg"), None) + delta_base_gpkg = next((c for c in delta if c.path == "base.gpkg"), None) + assert delta_base_gpkg.change == PushChangeType.DELETE + + # check update diff + delta = ProjectVersionChange.get_delta(project_id, 5, 7) + assert len(delta) == 1 + assert delta[0].change == PushChangeType.UPDATE_DIFF + assert len(delta[0].diffs) == 2 + # find related diff file in file diffs to check relation + assert FileDiff.query.filter_by(path=delta[0].diffs[0].path) push_data = [ @@ -741,35 +767,59 @@ def test_full_push(client): def test_project_delta(client, diff_project): """Test project delta endpoint""" + login_as_admin(client) + user = add_user() + workspace = create_workspace() + initial_project = create_project("empty_project", workspace=workspace, user=user) + working_dir = os.path.join(TMP_DIR, "empty_work_dir") + os.makedirs(os.path.join(TMP_DIR, "empty_work_dir"), exist_ok=True) + # add basefile + shutil.copy( + os.path.join(test_project_dir, "base.gpkg"), + os.path.join(working_dir, "base.gpkg"), + ) + push_change(initial_project, "added", "base.gpkg", working_dir) + response = client.get(f"v2/projects/{initial_project.id}/delta?since=0&to=1") + assert response.status_code == 200 response = client.get(f"v2/projects/{diff_project.id}/delta") assert response.status_code == 400 - response = client.get(f"v2/projects/{diff_project.id}/delta?since=v-1&to=v1") + response = client.get(f"v2/projects/{diff_project.id}/delta?since=-1&to=1") assert response.status_code == 400 - response = client.get(f"v2/projects/{diff_project.id}/delta?since=v1000&to=v2000") + response = client.get(f"v2/projects/{diff_project.id}/delta?since=1000&to=2000") assert response.status_code == 404 - response = client.get(f"v2/projects/{diff_project.id}/delta?since=v1&to=v10") + response = client.get(f"v2/projects/{diff_project.id}/delta?since=1&to=10") assert response.status_code == 200 - assert response.json[0]["change"] == PushChangeType.DELETE.value - assert response.json[1]["change"] == PushChangeType.CREATE.value + assert len(response.json) == 1 + assert response.json[0]["change"] == PushChangeType.CREATE.value + assert response.json[0]["version"] == 9 + + # simplate update + response = client.get(f"v2/projects/{diff_project.id}/delta?since=4&to=8") + assert response.status_code == 200 + delta = response.json + assert len(delta) == 1 + + # simulate pull of delta[0] + assert delta[0]["change"] == PushChangeType.UPDATE.value + assert delta[0]["version"] == 7 + assert not delta[0].get("diffs") # integration test for pull mechanism def test_project_pull(client, diff_project): - response = client.get(f"v2/projects/{diff_project.id}/delta?since=v4&to=v8") + """Test project pull mechanisom in v2""" + + response = client.get(f"v2/projects/{diff_project.id}/delta?since=5&to=7") assert response.status_code == 200 delta = response.json assert len(delta) == 1 - - # simulate pull of delta[0] assert delta[0]["change"] == PushChangeType.UPDATE_DIFF.value - assert delta[0]["version"] == "v7" - assert len(delta[0]["diffs"]) == 1 + assert delta[0]["version"] == 7 diff = delta[0]["diffs"][0] assert diff["path"].startswith("base.gpkg-") - assert diff["size"] == 0 # empty diff as in test data response = client.get(f"v2/projects/{diff_project.id}/raw/diff/{diff['path']}") assert response.status_code == 200 created_diff = FileDiff.query.filter_by(path=diff["path"]).first() diff --git a/server/migrations/community/63adc90fca0c_add_project_version_changes.py b/server/migrations/community/63adc90fca0c_add_project_version_changes.py index 5d7ffe8d..bb279743 100644 --- a/server/migrations/community/63adc90fca0c_add_project_version_changes.py +++ b/server/migrations/community/63adc90fca0c_add_project_version_changes.py @@ -54,6 +54,44 @@ def upgrade(): ) # ### end Alembic commands ### + # data migration + op.execute( + """ + INSERT INTO project_version_change (version_id, rank, delta) + SELECT + h.version_id, + 0 AS rank, + jsonb_agg( + jsonb_strip_nulls( + jsonb_build_object( + 'path', fp.path, + 'size', h.size, + 'change', h.change, + 'version', 'v' || h.project_version_name, + 'checksum', h.checksum, + 'diff', fdj.diff_path + ) + ) + ) AS delta + FROM + file_history h + JOIN + project_file_path fp ON h.file_path_id = fp.id + LEFT JOIN LATERAL ( + SELECT + fd.path AS diff_path + FROM + file_diff fd + WHERE + fd.file_path_id = fp.id + AND fd.version = h.project_version_name + AND fd.rank = 0 + ) fdj ON TRUE + GROUP BY + h.version_id; + """ + ) + def downgrade(): # ### commands auto generated by Alembic - please adjust! ### From 29a9eeff19263164acdda211ef7be240b41740cd Mon Sep 17 00:00:00 2001 From: "marcel.kocisek" Date: Thu, 9 Oct 2025 18:26:12 +0200 Subject: [PATCH 08/17] Fix missing import --- server/mergin/auth/models.py | 1 + 1 file changed, 1 insertion(+) diff --git a/server/mergin/auth/models.py b/server/mergin/auth/models.py index 3ab05f6b..5dcf275e 100644 --- a/server/mergin/auth/models.py +++ b/server/mergin/auth/models.py @@ -11,6 +11,7 @@ from sqlalchemy import or_, func, text from ..app import db +from ..sync.models import ProjectUser from ..sync.utils import get_user_agent, get_ip, get_device_id, is_reserved_word MAX_USERNAME_LENGTH = 50 From c53b9284cbb4c608b43c8941d9d176c4a7f2c2a4 Mon Sep 17 00:00:00 2001 From: "marcel.kocisek" Date: Thu, 9 Oct 2025 19:42:05 +0200 Subject: [PATCH 09/17] Fix tests and add checkpoints just i UPDATE_DIFF --- server/mergin/sync/models.py | 10 +++--- server/mergin/tests/test_public_api_v2.py | 44 ++++++++++++++--------- 2 files changed, 34 insertions(+), 20 deletions(-) diff --git a/server/mergin/sync/models.py b/server/mergin/sync/models.py index 0d3634e5..7d4bc6af 100644 --- a/server/mergin/sync/models.py +++ b/server/mergin/sync/models.py @@ -1011,7 +1011,10 @@ def _create_checkpoint( # Pre-fetch data for all versioned files to create FileDiff checkpoints versioned_delta_items = [ - item for item in merged_delta_items if is_versioned_file(item.path) + item + for item in merged_delta_items + if is_versioned_file(item.path) + and item.change == PushChangeType.UPDATE_DIFF ] versioned_file_paths = [delta.path for delta in versioned_delta_items] if versioned_file_paths: @@ -1048,8 +1051,7 @@ def _create_checkpoint( version=checkpoint.end, ) # Patch the delta with the path to the new diff checkpoint - if item.change == PushChangeType.UPDATE_DIFF: - item.diff = diff_path + item.diff = diff_path db.session.add(checkpoint_diff) checkpoint_change = ProjectVersionChange( @@ -1133,7 +1135,7 @@ def get_delta( if checkpoint.rank > 0: individual_changes = ( - cls.query_changes(project_id, checkpoint.start, checkpoint.end, 0) + cls.query_changes(project_id, since, to, 0) if not individual_changes else individual_changes ) diff --git a/server/mergin/tests/test_public_api_v2.py b/server/mergin/tests/test_public_api_v2.py index 83d31b72..c80117e0 100644 --- a/server/mergin/tests/test_public_api_v2.py +++ b/server/mergin/tests/test_public_api_v2.py @@ -366,20 +366,13 @@ def test_project_version_change_delta(diff_project): # create rank 1 checkpoint for v4 delta = ProjectVersionChange.get_delta(project_id, 0, 4) - fh = FileHistory.query.filter_by(project_version_name=3).first() checkpoint_changes = ProjectVersionChange.query.filter_by(rank=1) filediff_checkpoints = FileDiff.query.filter_by(rank=1) checkpoint_change = checkpoint_changes.first() - # find checkpoint for base gpkg - base_gpkg_checkpoint = FileDiff.query.filter_by(basefile_id=fh.id, rank=1).first() assert checkpoint_changes.count() == 1 assert checkpoint_change.version_id == pvcs[3].version_id - assert filediff_checkpoints.count() == len( - [file for file in initial_version.files if is_versioned_file(file.path)] - ) - assert base_gpkg_checkpoint.version == 4 + assert filediff_checkpoints.count() == 0 # check if filediff basefile is correctly set - assert base_gpkg_checkpoint.basefile_id == fh.id file_history = FileHistory.query.filter_by(project_version_name=4).first() assert len(delta) == len(initial_version.files) delta_base_gpkg = [d for d in delta if d.path == "base.gpkg"] @@ -390,8 +383,7 @@ def test_project_version_change_delta(diff_project): assert delta_base_gpkg[0].path == file_history.path assert delta_base_gpkg[0].size == file_history.size assert delta_base_gpkg[0].checksum == file_history.checksum - assert len(delta_base_gpkg[0].diffs) == 1 - assert delta_base_gpkg[0].diffs[0].path == base_gpkg_checkpoint.path + assert len(delta_base_gpkg[0].diffs) == 0 # get data with multiple ranks = 1 level checkpoints 1-4, 5-8 + checkpoint 9 and 10 delta = ProjectVersionChange.get_delta(project_id, 0, 10) @@ -403,13 +395,9 @@ def test_project_version_change_delta(diff_project): # base gpgk is transparent assert not next((c for c in delta if c.path == "base.gpkg"), None) - pv = push_change( - diff_project, "removed", "test.gpkg", diff_project.storage.project_dir + delta = ProjectVersionChange.get_delta( + project_id, latest_version.name - 3, latest_version.name ) - delta = ProjectVersionChange.get_delta(project_id, pv.name - 3, pv.name) - assert len(delta) == 1 - # test.gpkg is transparent as it was created and deleted in this range - assert not next((c for c in delta if c.path == "test.gpkg"), None) delta_base_gpkg = next((c for c in delta if c.path == "base.gpkg"), None) assert delta_base_gpkg.change == PushChangeType.DELETE @@ -421,6 +409,30 @@ def test_project_version_change_delta(diff_project): # find related diff file in file diffs to check relation assert FileDiff.query.filter_by(path=delta[0].diffs[0].path) + # create just update_diff versions with checkpoint + base_gpkg = os.path.join(diff_project.storage.project_dir, "test.gpkg") + shutil.copy( + os.path.join(diff_project.storage.project_dir, "v9", "test.gpkg"), base_gpkg + ) + for i in range(7): + sql = f"UPDATE simple SET rating={i}" + execute_query(base_gpkg, sql) + pv = push_change( + diff_project, "updated", "test.gpkg", diff_project.storage.project_dir + ) + delta = ProjectVersionChange.get_delta(project_id, 8, latest_version.name + 6) + assert len(delta) == 2 + # file history in 9.th version is basefile + fh = FileHistory.query.filter_by( + project_version_name=latest_version.name - 1 + ).first() + base_gpkg_checkpoint = FileDiff.query.filter_by(basefile_id=fh.id, rank=1).first() + assert base_gpkg_checkpoint.basefile_id == fh.id + + delta = ProjectVersionChange.get_delta(project_id, 12, latest_version.name + 6) + assert len(delta) == 1 + assert delta[0].diffs[0].path == base_gpkg_checkpoint.path + push_data = [ # success From dce0e1147427749b1bfaed9a4e1ade0321c9f2b8 Mon Sep 17 00:00:00 2001 From: "marcel.kocisek" Date: Thu, 9 Oct 2025 19:43:46 +0200 Subject: [PATCH 10/17] add safe check for dwonloading --- server/mergin/tests/test_public_api_v2.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/server/mergin/tests/test_public_api_v2.py b/server/mergin/tests/test_public_api_v2.py index c80117e0..e5667f8a 100644 --- a/server/mergin/tests/test_public_api_v2.py +++ b/server/mergin/tests/test_public_api_v2.py @@ -324,7 +324,7 @@ def test_create_diff_checkpoint(diff_project): assert not os.path.exists(diff.abs_path) -def test_project_version_change_delta(diff_project): +def test_project_version_change_delta(client, diff_project): """Test that ProjectVersionChangeData and its schema work as expected""" latest_version = diff_project.get_latest_version() project_id = diff_project.id @@ -432,6 +432,11 @@ def test_project_version_change_delta(diff_project): delta = ProjectVersionChange.get_delta(project_id, 12, latest_version.name + 6) assert len(delta) == 1 assert delta[0].diffs[0].path == base_gpkg_checkpoint.path + # check if checkpoint will be there + response = client.get( + f"v2/projects/{diff_project.id}/raw/diff/{delta[0].diffs[0].path}" + ) + assert response.status_code == 200 push_data = [ From 832603dc4a0c0bc34a54ca478f623fc751fa37b5 Mon Sep 17 00:00:00 2001 From: "marcel.kocisek" Date: Thu, 9 Oct 2025 20:37:16 +0200 Subject: [PATCH 11/17] Imporve tests --- server/mergin/tests/test_public_api_v2.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/server/mergin/tests/test_public_api_v2.py b/server/mergin/tests/test_public_api_v2.py index e5667f8a..85fcbe9f 100644 --- a/server/mergin/tests/test_public_api_v2.py +++ b/server/mergin/tests/test_public_api_v2.py @@ -414,7 +414,7 @@ def test_project_version_change_delta(client, diff_project): shutil.copy( os.path.join(diff_project.storage.project_dir, "v9", "test.gpkg"), base_gpkg ) - for i in range(7): + for i in range(6): sql = f"UPDATE simple SET rating={i}" execute_query(base_gpkg, sql) pv = push_change( @@ -427,11 +427,20 @@ def test_project_version_change_delta(client, diff_project): project_version_name=latest_version.name - 1 ).first() base_gpkg_checkpoint = FileDiff.query.filter_by(basefile_id=fh.id, rank=1).first() - assert base_gpkg_checkpoint.basefile_id == fh.id + assert base_gpkg_checkpoint + assert base_gpkg_checkpoint.version == latest_version.name + 6 + fh = FileHistory.query.filter_by( + project_version_name=latest_version.name + 6 + ).first() delta = ProjectVersionChange.get_delta(project_id, 12, latest_version.name + 6) assert len(delta) == 1 + assert len(delta[0].diffs) == 1 assert delta[0].diffs[0].path == base_gpkg_checkpoint.path + assert delta[0].change == PushChangeType.UPDATE_DIFF + assert delta[0].checksum == fh.checksum + assert delta[0].size == fh.size + # check if checkpoint will be there response = client.get( f"v2/projects/{diff_project.id}/raw/diff/{delta[0].diffs[0].path}" From a9140e804d8369eb64b65e7038a6d67ddb01c836 Mon Sep 17 00:00:00 2001 From: "marcel.kocisek" Date: Fri, 10 Oct 2025 18:46:24 +0200 Subject: [PATCH 12/17] Address comments @varmar05 1. part - rename table to project version delta with column changes - rename classes - add get_delta_changes to project instance (nice) - fix migration to int --- server/mergin/sync/files.py | 40 +-- server/mergin/sync/models.py | 257 +++++++++--------- server/mergin/sync/public_api_v2.yaml | 8 +- .../mergin/sync/public_api_v2_controller.py | 18 +- server/mergin/tests/test_public_api_v2.py | 47 ++-- ...9acf967e58ad_add_project_version_delta.py} | 45 ++- 6 files changed, 203 insertions(+), 212 deletions(-) rename server/migrations/community/{63adc90fca0c_add_project_version_changes.py => 9acf967e58ad_add_project_version_delta.py} (65%) diff --git a/server/mergin/sync/files.py b/server/mergin/sync/files.py index 9838fba6..f8103926 100644 --- a/server/mergin/sync/files.py +++ b/server/mergin/sync/files.py @@ -245,26 +245,34 @@ def patch_field(self, data, **kwargs): @dataclass class DeltaDiffFile: + """Diff file path in diffs list""" + path: str -class DeltaDiffFileSchema(ma.Schema): +class DeltaChangeDiffFileSchema(ma.Schema): + """Schema for diff file path in diffs list""" + path = fields.String(required=True) @dataclass -class DeltaBase(File): +class DeltaChangeBase(File): + """Base class for changes stored in json list or returned from delta endpoint""" + change: PushChangeType version: int @dataclass -class DeltaMerged(DeltaBase): +class DeltaChangeMerged(DeltaChangeBase): + """Delta item with merged diffs to list of multiple diff files""" + diffs: List[DeltaDiffFile] = field(default_factory=list) def to_data_delta(self): """Convert DeltaMerged to DeltaData with single diff""" - result = DeltaData( + result = DeltaChange( path=self.path, size=self.size, checksum=self.checksum, @@ -277,16 +285,14 @@ def to_data_delta(self): @dataclass -class DeltaData(File): - """Delta data stored in database""" +class DeltaChange(DeltaChangeBase): + """Delta items stored in database as list of this item with single diff file""" - change: PushChangeType - version: int diff: Optional[str] = None - def to_merged_delta(self) -> DeltaMerged: + def to_merged_delta(self) -> DeltaChangeMerged: """Convert DeltaData to DeltaMerged with multiple diffs""" - result = DeltaMerged( + result = DeltaChangeMerged( path=self.path, size=self.size, checksum=self.checksum, @@ -298,8 +304,8 @@ def to_merged_delta(self) -> DeltaMerged: return result -class DeltaBaseSchema(ma.Schema): - """Base schema for detla json and response from delta endpoint""" +class DeltaChangeBaseSchema(ma.Schema): + """Base schema for delta json and response from delta endpoint""" path = fields.String(required=True) size = fields.Integer(required=True) @@ -308,14 +314,14 @@ class DeltaBaseSchema(ma.Schema): change = fields.Enum(PushChangeType, by_value=True, required=True) -class DeltaDataSchema(DeltaBaseSchema): - """Schema for delta data in database""" +class DeltaChangeSchema(DeltaChangeBaseSchema): + """Schema for change data in changes column""" diff = fields.String(required=False) @post_load def make_object(self, data, **kwargs): - return DeltaData(**data) + return DeltaChange(**data) @post_dump def patch_field(self, data, **kwargs): @@ -325,10 +331,10 @@ def patch_field(self, data, **kwargs): return data -class DeltaRespSchema(DeltaBaseSchema): +class DeltaChangeRespSchema(DeltaChangeBaseSchema): """Schema for delta data response""" - diffs = fields.List(fields.Nested(DeltaDiffFileSchema())) + diffs = fields.List(fields.Nested(DeltaChangeDiffFileSchema())) @post_dump def patch_field(self, data, **kwargs): diff --git a/server/mergin/sync/models.py b/server/mergin/sync/models.py index 7d4bc6af..6fd6a6a4 100644 --- a/server/mergin/sync/models.py +++ b/server/mergin/sync/models.py @@ -24,10 +24,10 @@ from flask import current_app from .files import ( - DeltaMerged, + DeltaChangeMerged, DeltaDiffFile, - DeltaData, - DeltaDataSchema, + DeltaChange, + DeltaChangeSchema, ProjectDiffFile, ProjectFileChange, ChangesSchema, @@ -353,6 +353,86 @@ def bulk_roles_update(self, access: Dict) -> Set[int]: return set(id_diffs) + def get_delta_changes( + self, since: int, to: int + ) -> Optional[List[DeltaChangeMerged]]: + """ + Get changes between two versions, merging them if needed. + - create FileDiff checkpoints if needed + - create ProjectVersionDelta checkpoints if needed with delta json + """ + if since > to: + logging.error( + f"Start version {since} is higher than end version {to} - broken history" + ) + return + if since == to: + return None + project_id = self.id + expected_checkpoints = Checkpoint.get_checkpoints(since + 1, to) + expected_deltas: List[ProjectVersionDelta] = ( + ProjectVersionDelta.query.join(ProjectVersion) + .filter( + ProjectVersion.project_id == project_id, + ProjectVersion.name > since, + ProjectVersion.name <= to, + tuple_(ProjectVersionDelta.rank, ProjectVersion.name).in_( + [(item.rank, item.end) for item in expected_checkpoints] + ), + ) + .order_by(ProjectVersion.name) + .all() + ) + existing_delta_map = {(c.rank, c.version.name): c for c in expected_deltas} + + # Cache all individual (rank 0) delta rows in the required range. + individual_deltas: List[ProjectVersionDelta] = [] + + result: List[DeltaChange] = [] + for checkpoint in expected_checkpoints: + existing_delta = existing_delta_map.get((checkpoint.rank, checkpoint.end)) + + # we have change in database, just return delta data from it + if existing_delta: + result.extend(DeltaChangeSchema(many=True).load(existing_delta.changes)) + continue + + # If higher rank delta checkopoint does not exists, we are using rank=0 deltas to create checkopoint + if checkpoint.rank > 0: + individual_deltas = ( + ProjectVersionDelta.query.join(ProjectVersion) + .filter( + ProjectVersion.project_id == project_id, + ProjectVersion.name >= since, + ProjectVersion.name <= to, + ProjectVersionDelta.rank == 0, + ) + .order_by(ProjectVersion.name) + .all() + if not individual_deltas + else individual_deltas + ) + + if not individual_deltas: + logging.error( + f"No individual deltas found for project {project_id} in range {since} / {to} to create checkpoint." + ) + return + + new_checkpoint = ProjectVersionDelta.create_checkpoint( + project_id, checkpoint, individual_deltas + ) + if new_checkpoint: + result.extend( + DeltaChangeSchema(many=True).load(new_checkpoint.changes) + ) + else: + logging.error( + f"Not possible to create checkpoint for project {project_id} in range {checkpoint.start}-{checkpoint.end}" + ) + + return ProjectVersionDelta.merge_delta_changes(result) + class ProjectRole(Enum): """Project roles ordered by rank (do not change)""" @@ -905,7 +985,7 @@ def construct_checkpoint(self) -> bool: return True -class ProjectVersionChange(db.Model): +class ProjectVersionDelta(db.Model): id = db.Column(db.BigInteger, primary_key=True, autoincrement=True) # exponential order of changes json rank = db.Column(db.Integer, nullable=False, index=True) @@ -917,12 +997,12 @@ class ProjectVersionChange(db.Model): nullable=False, ) # cached changes for versions from start to end (inclusive) - delta = db.Column(JSONB, nullable=False) + changes = db.Column(JSONB, nullable=False) __table_args__ = ( - db.UniqueConstraint("version_id", "rank", name="unique_changes"), + db.UniqueConstraint("version_id", "rank", name="unique_deltas"), db.Index( - "ix_project_version_change_version_id_rank", + "ix_project_version_delta_version_id_rank", version_id, rank, ), @@ -933,80 +1013,78 @@ class ProjectVersionChange(db.Model): ) @staticmethod - def merge_delta_items( - items: List[DeltaData], - ) -> List[DeltaMerged]: + def merge_delta_changes( + items: List[DeltaChange], + ) -> List[DeltaChangeMerged]: """ Merge multiple changes json array objects into one list of changes. Changes are merged based on file path and change type. """ - result: Dict[str, DeltaMerged] = {} + result: Dict[str, DeltaChangeMerged] = {} for item in items: current = item.to_merged_delta() - existing = result.get(current.path) + previous = result.get(current.path) path = current.path - if existing: + if previous: # merge changes data jsons - if existing.change == PushChangeType.CREATE: + if previous.change == PushChangeType.CREATE: if current.change == PushChangeType.DELETE: - # create + delete = nothing + # create + delete = file is transparent for current changes -> delete it del result[path] elif current.change in ( PushChangeType.UPDATE, PushChangeType.UPDATE_DIFF, ): - # create + update = create with updated info - current.change = existing.change + # create + update = create with with the most recent metadata + current.change = previous.change current.diffs = [] - else: - result[path] = current - elif existing.change == PushChangeType.UPDATE: + elif previous.change == PushChangeType.UPDATE: if current.change == PushChangeType.UPDATE_DIFF: - # update + update_diff = update_diff with latest info - current.change = existing.change + # update + update_diff = update with latest info + current.change = previous.change current.diffs = [] result[path] = current - elif existing.change == PushChangeType.UPDATE_DIFF: + elif previous.change == PushChangeType.UPDATE_DIFF: if current.change == PushChangeType.UPDATE_DIFF: # update_diff + update_diff = update_diff with latest info - current.diffs.extend(existing.diffs or []) - result[path] = current - else: - # delete + anything = anything + current.diffs.extend(previous.diffs or []) result[path] = current + elif previous.change == PushChangeType.DELETE: + if current.change == PushChangeType.CREATE: + # delete + create = create + result[path] = current else: result[current.path] = current return list(result.values()) @classmethod - def _create_checkpoint( + def create_checkpoint( cls, project_id: str, checkpoint: Checkpoint, - changes: List[ProjectVersionChange] = [], - ) -> Optional[ProjectVersionChange]: + changes: List[ProjectVersionDelta] = [], + ) -> Optional[ProjectVersionDelta]: """ - Creates and caches a new ProjectVersionChange checkpoint and any required FileDiff checkpoints. + Creates and caches new checkpoint and any required FileDiff checkpoints. """ - changes_range = [ + delta_range = [ change for change in changes if checkpoint.start <= change.version.name <= checkpoint.end ] - if not changes_range: + if not delta_range: logging.warning( f"No individual changes found for project {project_id} in range v{checkpoint.start}-v{checkpoint.end} to create checkpoint." ) return None # dump delta objects from database and flatten list for merging - deltas = [] - for change in changes_range: - delta_items = DeltaDataSchema(many=True).load(change.delta) - deltas.extend(delta_items) - merged_delta_items: List[DeltaData] = [ - d.to_data_delta() for d in cls.merge_delta_items(deltas) + changes = [] + for delta in delta_range: + changes.extend(DeltaChangeSchema(many=True).load(delta.changes)) + merged_delta_items: List[DeltaChange] = [ + d.to_data_delta() for d in cls.merge_delta_changes(changes) ] # Pre-fetch data for all versioned files to create FileDiff checkpoints @@ -1054,99 +1132,14 @@ def _create_checkpoint( item.diff = diff_path db.session.add(checkpoint_diff) - checkpoint_change = ProjectVersionChange( - version_id=changes_range[-1].version_id, + checkpoint_delta = ProjectVersionDelta( + version_id=delta_range[-1].version_id, rank=checkpoint.rank, - delta=DeltaDataSchema(many=True).dump(merged_delta_items), + changes=DeltaChangeSchema(many=True).dump(merged_delta_items), ) - db.session.add(checkpoint_change) + db.session.add(checkpoint_delta) db.session.commit() - return checkpoint_change - - @classmethod - def query_changes(cls, project_id, since, to, rank=None): - """Query changes with specified parameters""" - query = cls.query.join(ProjectVersion).filter( - ProjectVersion.project_id == project_id, - ProjectVersion.name >= since, - ProjectVersion.name <= to, - ) - if rank is not None: - query = query.filter(ProjectVersionChange.rank == rank) - return query.order_by(ProjectVersion.name).all() - - @classmethod - def get_delta( - cls, project_id: str, since: int, to: int - ) -> Optional[List[DeltaMerged]]: - """ - Get changes between two versions, merging them if needed. - - create FileDiff checkpoints if needed - - create ProjectVersionChange checkpoints if needed with delta json - """ - if since > to: - logging.error( - f"Start version {since} is higher than end version {to} - broken history" - ) - return - if since == to: - # Return only changes for this version - change = ( - ProjectVersionChange.query.join(ProjectVersion) - .filter( - ProjectVersion.project_id == project_id, - ProjectVersion.name == to, - ProjectVersionChange.rank == 0, - ) - .first() - ) - return [DeltaMerged(**item) for item in change.delta] if change else None - - expected_checkpoints = Checkpoint.get_checkpoints(since + 1, to) - expected_changes: List[ProjectVersionChange] = ( - ProjectVersionChange.query.join(ProjectVersion) - .filter( - ProjectVersion.project_id == project_id, - ProjectVersion.name > since, - ProjectVersion.name <= to, - tuple_(ProjectVersionChange.rank, ProjectVersion.name).in_( - [(item.rank, item.end) for item in expected_checkpoints] - ), - ) - .order_by(ProjectVersion.name) - .all() - ) - existing_changes_map = {(c.rank, c.version.name): c for c in expected_changes} - - # Cache all individual (rank 0) changes in the required range. - individual_changes: List[ProjectVersionChange] = [] - - result: List[DeltaData] = [] - for checkpoint in expected_checkpoints: - expected_change = existing_changes_map.get( - (checkpoint.rank, checkpoint.end) - ) - - # we have change in database, just return delta data from it - if expected_change: - deltas = DeltaDataSchema(many=True).load(expected_change.delta) - result.extend(deltas) - continue - - if checkpoint.rank > 0: - individual_changes = ( - cls.query_changes(project_id, since, to, 0) - if not individual_changes - else individual_changes - ) - new_checkpoint = cls._create_checkpoint( - project_id, checkpoint, individual_changes - ) - if new_checkpoint: - deltas = DeltaDataSchema(many=True).load(new_checkpoint.delta) - result.extend(deltas) - - return cls.merge_delta_items(result) + return checkpoint_delta class ProjectVersion(db.Model): @@ -1246,7 +1239,7 @@ def __init__( # cache changes data json for version checkpoints # rank 0 is for all changes from start to current version delta_data = [ - DeltaData( + DeltaChange( path=c.path, change=c.change, size=c.size, @@ -1256,10 +1249,10 @@ def __init__( ) for c in changes ] - pvc = ProjectVersionChange( + pvc = ProjectVersionDelta( version=self, rank=0, - delta=DeltaDataSchema(many=True).dump(delta_data), + changes=DeltaChangeSchema(many=True).dump(delta_data), ) db.session.add(pvc) diff --git a/server/mergin/sync/public_api_v2.yaml b/server/mergin/sync/public_api_v2.yaml index 4cd045c5..e30d49c1 100644 --- a/server/mergin/sync/public_api_v2.yaml +++ b/server/mergin/sync/public_api_v2.yaml @@ -836,6 +836,10 @@ components: - editor - writer - owner + ProjectChangeType: + type: string + enum: [create, update, delete, update_diff] + example: update ProjectDelta: type: object required: @@ -858,9 +862,7 @@ components: type: integer example: 1 change: - type: string - enum: [create, update, delete, update_diff] - example: update + $ref: "#/components/schemas/ProjectChangeType" diffs: type: array nullable: true diff --git a/server/mergin/sync/public_api_v2_controller.py b/server/mergin/sync/public_api_v2_controller.py index a63332c4..5debe031 100644 --- a/server/mergin/sync/public_api_v2_controller.py +++ b/server/mergin/sync/public_api_v2_controller.py @@ -22,7 +22,7 @@ from ..app import db from ..auth import auth_required from ..auth.models import User -from .models import FileDiff, Project, ProjectRole, ProjectMember, ProjectVersionChange +from .models import FileDiff, Project, ProjectRole, ProjectMember, ProjectVersionDelta from .permissions import ProjectPermissions, require_project_by_uuid from .utils import prepare_download_response from ..app import db @@ -38,7 +38,7 @@ StorageLimitHit, UploadError, ) -from .files import ChangesSchema, DeltaRespSchema +from .files import ChangesSchema, DeltaChangeRespSchema from .forms import project_name_validation from .models import ( Project, @@ -404,12 +404,12 @@ def upload_chunk(id: str): ) -@auth_required def get_project_delta(id: str): """Get project changes (delta) between two versions""" since = int(request.args.get("since")) to = int(request.args.get("to")) - project = require_project_by_uuid(id, ProjectPermissions.Read) + project: Project = require_project_by_uuid(id, ProjectPermissions.Read) + latest_version = project.latest_version if since < 0 or to < 0: abort(400, "Invalid 'since' or 'to' version") if since > to: @@ -419,11 +419,9 @@ def get_project_delta(id: str): ProjectVersion.project_id == project.id, ProjectVersion.name == since, ).first_or_404() - ProjectVersion.query.filter( - ProjectVersion.project_id == project.id, - ProjectVersion.name == to, - ).first_or_404() + if to > latest_version: + abort(404) - deltas = ProjectVersionChange.get_delta(project.id, since, to) + delta_changes = project.get_delta_changes(since, to) - return DeltaRespSchema(many=True).dump(deltas), 200 + return DeltaChangeRespSchema(many=True).dump(delta_changes), 200 diff --git a/server/mergin/tests/test_public_api_v2.py b/server/mergin/tests/test_public_api_v2.py index 85fcbe9f..c0eb6191 100644 --- a/server/mergin/tests/test_public_api_v2.py +++ b/server/mergin/tests/test_public_api_v2.py @@ -26,7 +26,7 @@ Project, ProjectFilePath, ProjectRole, - ProjectVersionChange, + ProjectVersionDelta, ) from ..sync.files import PushChangeType from ..sync.utils import is_versioned_file @@ -324,14 +324,15 @@ def test_create_diff_checkpoint(diff_project): assert not os.path.exists(diff.abs_path) -def test_project_version_change_delta(client, diff_project): - """Test that ProjectVersionChangeData and its schema work as expected""" +def test_project_version_delta_changes(client, diff_project: Project): + """Test that get_delta_changes and its schema work as expected""" latest_version = diff_project.get_latest_version() project_id = diff_project.id assert latest_version.name == 10 - assert ProjectVersionChange.get_delta(project_id, 2, 1) is None - pvcs: List[ProjectVersionChange] = ( - ProjectVersionChange.query.join(ProjectVersion) + assert diff_project.get_delta_changes(2, 1) is None + assert diff_project.get_delta_changes(2, 2) is None + pvcs: List[ProjectVersionDelta] = ( + ProjectVersionDelta.query.join(ProjectVersion) .filter(ProjectVersion.project_id == diff_project.id) .all() ) @@ -340,33 +341,27 @@ def test_project_version_change_delta(client, diff_project): initial_version = initial_pvc.version assert initial_pvc.rank == 0 assert initial_pvc.version.id == initial_version.id - # if version is the same, return just its delta in v1 - assert len(ProjectVersionChange.get_delta(project_id, 1, 1)) == len( - initial_version.files - ) - # no ranks created as we get here just first version with get_delta - assert ProjectVersionChange.query.filter_by(rank=1).count() == 0 - delta = ProjectVersionChange.get_delta(project_id, 1, 2) + delta = diff_project.get_delta_changes(1, 2) assert len(delta) == 1 assert delta[0].change == PushChangeType.DELETE # no ranks created as we get here just first version with get_delta - assert ProjectVersionChange.query.filter_by(rank=1).count() == 0 + assert ProjectVersionDelta.query.filter_by(rank=1).count() == 0 # delete + create version - delta = ProjectVersionChange.get_delta(project_id, 1, 3) + delta = diff_project.get_delta_changes(1, 3) assert len(delta) == 1 assert delta[0].change == PushChangeType.CREATE # get_delta with update diff - delta = ProjectVersionChange.get_delta(project_id, 1, 4) + delta = diff_project.get_delta_changes(1, 4) assert len(delta) == 1 assert delta[0].change == PushChangeType.CREATE - assert ProjectVersionChange.query.filter_by(rank=1).count() == 0 + assert ProjectVersionDelta.query.filter_by(rank=1).count() == 0 # create rank 1 checkpoint for v4 - delta = ProjectVersionChange.get_delta(project_id, 0, 4) - checkpoint_changes = ProjectVersionChange.query.filter_by(rank=1) + delta = diff_project.get_delta_changes(0, 4) + checkpoint_changes = ProjectVersionDelta.query.filter_by(rank=1) filediff_checkpoints = FileDiff.query.filter_by(rank=1) checkpoint_change = checkpoint_changes.first() assert checkpoint_changes.count() == 1 @@ -386,23 +381,21 @@ def test_project_version_change_delta(client, diff_project): assert len(delta_base_gpkg[0].diffs) == 0 # get data with multiple ranks = 1 level checkpoints 1-4, 5-8 + checkpoint 9 and 10 - delta = ProjectVersionChange.get_delta(project_id, 0, 10) + delta = diff_project.get_delta_changes(0, 10) assert len(delta) == len(latest_version.files) delta_test_gpkg = [d for d in delta if d.path == "test.gpkg"] assert delta_test_gpkg assert delta_test_gpkg[0].change == PushChangeType.CREATE - assert ProjectVersionChange.query.filter_by(rank=1).count() == 2 + assert ProjectVersionDelta.query.filter_by(rank=1).count() == 2 # base gpgk is transparent assert not next((c for c in delta if c.path == "base.gpkg"), None) - delta = ProjectVersionChange.get_delta( - project_id, latest_version.name - 3, latest_version.name - ) + delta = diff_project.get_delta_changes(latest_version.name - 3, latest_version.name) delta_base_gpkg = next((c for c in delta if c.path == "base.gpkg"), None) assert delta_base_gpkg.change == PushChangeType.DELETE # check update diff - delta = ProjectVersionChange.get_delta(project_id, 5, 7) + delta = diff_project.get_delta_changes(5, 7) assert len(delta) == 1 assert delta[0].change == PushChangeType.UPDATE_DIFF assert len(delta[0].diffs) == 2 @@ -420,7 +413,7 @@ def test_project_version_change_delta(client, diff_project): pv = push_change( diff_project, "updated", "test.gpkg", diff_project.storage.project_dir ) - delta = ProjectVersionChange.get_delta(project_id, 8, latest_version.name + 6) + delta = diff_project.get_delta_changes(8, latest_version.name + 6) assert len(delta) == 2 # file history in 9.th version is basefile fh = FileHistory.query.filter_by( @@ -433,7 +426,7 @@ def test_project_version_change_delta(client, diff_project): fh = FileHistory.query.filter_by( project_version_name=latest_version.name + 6 ).first() - delta = ProjectVersionChange.get_delta(project_id, 12, latest_version.name + 6) + delta = diff_project.get_delta_changes(12, latest_version.name + 6) assert len(delta) == 1 assert len(delta[0].diffs) == 1 assert delta[0].diffs[0].path == base_gpkg_checkpoint.path diff --git a/server/migrations/community/63adc90fca0c_add_project_version_changes.py b/server/migrations/community/9acf967e58ad_add_project_version_delta.py similarity index 65% rename from server/migrations/community/63adc90fca0c_add_project_version_changes.py rename to server/migrations/community/9acf967e58ad_add_project_version_delta.py index bb279743..793a5978 100644 --- a/server/migrations/community/63adc90fca0c_add_project_version_changes.py +++ b/server/migrations/community/9acf967e58ad_add_project_version_delta.py @@ -1,8 +1,8 @@ -"""Add project version changes +"""Add project version delta -Revision ID: 63adc90fca0c +Revision ID: 9acf967e58ad Revises: bd1ec73db389 -Create Date: 2025-10-01 16:50:08.343639 +Create Date: 2025-10-10 17:33:31.740232 """ @@ -11,7 +11,7 @@ from sqlalchemy.dialects import postgresql # revision identifiers, used by Alembic. -revision = "63adc90fca0c" +revision = "9acf967e58ad" down_revision = "bd1ec73db389" branch_labels = None depends_on = None @@ -20,35 +20,35 @@ def upgrade(): # ### commands auto generated by Alembic - please adjust! ### op.create_table( - "project_version_change", + "project_version_delta", sa.Column("id", sa.BigInteger(), autoincrement=True, nullable=False), sa.Column("rank", sa.Integer(), nullable=False), sa.Column("version_id", sa.Integer(), nullable=False), - sa.Column("delta", postgresql.JSONB(astext_type=sa.Text()), nullable=False), + sa.Column("changes", postgresql.JSONB(astext_type=sa.Text()), nullable=False), sa.ForeignKeyConstraint( ["version_id"], ["project_version.id"], - name=op.f("fk_project_version_change_version_id_project_version"), + name=op.f("fk_project_version_delta_version_id_project_version"), ondelete="CASCADE", ), - sa.PrimaryKeyConstraint("id", name=op.f("pk_project_version_change")), - sa.UniqueConstraint("version_id", "rank", name="unique_changes"), + sa.PrimaryKeyConstraint("id", name=op.f("pk_project_version_delta")), + sa.UniqueConstraint("version_id", "rank", name="unique_deltas"), ) op.create_index( - op.f("ix_project_version_change_rank"), - "project_version_change", + op.f("ix_project_version_delta_rank"), + "project_version_delta", ["rank"], unique=False, ) op.create_index( - op.f("ix_project_version_change_version_id"), - "project_version_change", + op.f("ix_project_version_delta_version_id"), + "project_version_delta", ["version_id"], unique=False, ) op.create_index( - "ix_project_version_change_version_id_rank", - "project_version_change", + "ix_project_version_delta_version_id_rank", + "project_version_delta", ["version_id", "rank"], unique=False, ) @@ -57,7 +57,7 @@ def upgrade(): # data migration op.execute( """ - INSERT INTO project_version_change (version_id, rank, delta) + INSERT INTO project_version_delta (version_id, rank, changes) SELECT h.version_id, 0 AS rank, @@ -67,12 +67,12 @@ def upgrade(): 'path', fp.path, 'size', h.size, 'change', h.change, - 'version', 'v' || h.project_version_name, + 'version', h.project_version_name, 'checksum', h.checksum, 'diff', fdj.diff_path ) ) - ) AS delta + ) AS changes FROM file_history h JOIN @@ -96,14 +96,13 @@ def upgrade(): def downgrade(): # ### commands auto generated by Alembic - please adjust! ### op.drop_index( - "ix_project_version_change_version_id_rank", table_name="project_version_change" + "ix_project_version_delta_version_id_rank", table_name="project_version_delta" ) op.drop_index( - op.f("ix_project_version_change_version_id"), - table_name="project_version_change", + op.f("ix_project_version_delta_version_id"), table_name="project_version_delta" ) op.drop_index( - op.f("ix_project_version_change_rank"), table_name="project_version_change" + op.f("ix_project_version_delta_rank"), table_name="project_version_delta" ) - op.drop_table("project_version_change") + op.drop_table("project_version_delta") # ### end Alembic commands ### From d0ef271437b2524208ee948dcb6bbf71bed7dc6d Mon Sep 17 00:00:00 2001 From: "marcel.kocisek" Date: Thu, 16 Oct 2025 09:13:09 +0200 Subject: [PATCH 13/17] enhancements v2 --- server/mergin/sync/models.py | 8 ++++++++ server/mergin/sync/public_api_v2.yaml | 5 +++-- .../mergin/sync/public_api_v2_controller.py | 19 ++++++------------- 3 files changed, 17 insertions(+), 15 deletions(-) diff --git a/server/mergin/sync/models.py b/server/mergin/sync/models.py index 6fd6a6a4..9343db13 100644 --- a/server/mergin/sync/models.py +++ b/server/mergin/sync/models.py @@ -1038,6 +1038,8 @@ def merge_delta_changes( # create + update = create with with the most recent metadata current.change = previous.change current.diffs = [] + else: + result[path] = current elif previous.change == PushChangeType.UPDATE: if current.change == PushChangeType.UPDATE_DIFF: # update + update_diff = update with latest info @@ -1053,6 +1055,12 @@ def merge_delta_changes( if current.change == PushChangeType.CREATE: # delete + create = create result[path] = current + elif current.change in ( + PushChangeType.UPDATE, + PushChangeType.UPDATE_DIFF, + ): + # delete + update = invalid sequence, keep delete + continue else: result[current.path] = current return list(result.values()) diff --git a/server/mergin/sync/public_api_v2.yaml b/server/mergin/sync/public_api_v2.yaml index e30d49c1..9ac4a2d0 100644 --- a/server/mergin/sync/public_api_v2.yaml +++ b/server/mergin/sync/public_api_v2.yaml @@ -394,14 +394,15 @@ paths: required: true schema: type: integer - example: 1 + example: + minimum: 0 description: Start version (exclusive) - name: to in: query - required: true schema: type: integer example: 2 + minimum: 1 description: End version (inclusive) responses: "200": diff --git a/server/mergin/sync/public_api_v2_controller.py b/server/mergin/sync/public_api_v2_controller.py index 5debe031..8e5eed23 100644 --- a/server/mergin/sync/public_api_v2_controller.py +++ b/server/mergin/sync/public_api_v2_controller.py @@ -4,6 +4,7 @@ import os from datetime import datetime +from typing import Optional import uuid import gevent import logging @@ -404,24 +405,16 @@ def upload_chunk(id: str): ) -def get_project_delta(id: str): +def get_project_delta(id: str, since: int, to: Optional[int] = None): """Get project changes (delta) between two versions""" - since = int(request.args.get("since")) - to = int(request.args.get("to")) + project: Project = require_project_by_uuid(id, ProjectPermissions.Read) - latest_version = project.latest_version - if since < 0 or to < 0: - abort(400, "Invalid 'since' or 'to' version") + if to is None: + to = project.latest_version + if since > to: abort(400, "'since' version must be less than 'to' version") - ProjectVersion.query.filter( - ProjectVersion.project_id == project.id, - ProjectVersion.name == since, - ).first_or_404() - if to > latest_version: - abort(404) - delta_changes = project.get_delta_changes(since, to) return DeltaChangeRespSchema(many=True).dump(delta_changes), 200 From 8ceee04ea557bf617014c40fe31ca863d634c09a Mon Sep 17 00:00:00 2001 From: "marcel.kocisek" Date: Fri, 24 Oct 2025 17:48:58 +0200 Subject: [PATCH 14/17] Address disscussions: - update new table schema for project version delta - add tests and upgrade existing to handle order of diffs --- server/mergin/sync/models.py | 168 ++++++++------ .../mergin/sync/public_api_v2_controller.py | 11 +- server/mergin/tests/test_public_api_v2.py | 213 ++++++++++++++---- .../4b4648483770_add_project_version_delta.py | 129 +++++++++++ .../9acf967e58ad_add_project_version_delta.py | 108 --------- 5 files changed, 396 insertions(+), 233 deletions(-) create mode 100644 server/migrations/community/4b4648483770_add_project_version_delta.py delete mode 100644 server/migrations/community/9acf967e58ad_add_project_version_delta.py diff --git a/server/mergin/sync/models.py b/server/mergin/sync/models.py index 9343db13..0d342303 100644 --- a/server/mergin/sync/models.py +++ b/server/mergin/sync/models.py @@ -359,7 +359,7 @@ def get_delta_changes( """ Get changes between two versions, merging them if needed. - create FileDiff checkpoints if needed - - create ProjectVersionDelta checkpoints if needed with delta json + - create ProjectVersionDelta checkpoints if needed with changes json """ if since > to: logging.error( @@ -371,19 +371,18 @@ def get_delta_changes( project_id = self.id expected_checkpoints = Checkpoint.get_checkpoints(since + 1, to) expected_deltas: List[ProjectVersionDelta] = ( - ProjectVersionDelta.query.join(ProjectVersion) - .filter( - ProjectVersion.project_id == project_id, - ProjectVersion.name > since, - ProjectVersion.name <= to, - tuple_(ProjectVersionDelta.rank, ProjectVersion.name).in_( + ProjectVersionDelta.query.filter( + ProjectVersionDelta.project_id == project_id, + ProjectVersionDelta.version > since, + ProjectVersionDelta.version <= to, + tuple_(ProjectVersionDelta.rank, ProjectVersionDelta.version).in_( [(item.rank, item.end) for item in expected_checkpoints] ), ) - .order_by(ProjectVersion.name) + .order_by(ProjectVersionDelta.version) .all() ) - existing_delta_map = {(c.rank, c.version.name): c for c in expected_deltas} + existing_delta_map = {(c.rank, c.version): c for c in expected_deltas} # Cache all individual (rank 0) delta rows in the required range. individual_deltas: List[ProjectVersionDelta] = [] @@ -400,14 +399,13 @@ def get_delta_changes( # If higher rank delta checkopoint does not exists, we are using rank=0 deltas to create checkopoint if checkpoint.rank > 0: individual_deltas = ( - ProjectVersionDelta.query.join(ProjectVersion) - .filter( - ProjectVersion.project_id == project_id, - ProjectVersion.name >= since, - ProjectVersion.name <= to, + ProjectVersionDelta.query.filter( + ProjectVersionDelta.project_id == project_id, + ProjectVersionDelta.version >= since, + ProjectVersionDelta.version <= to, ProjectVersionDelta.rank == 0, ) - .order_by(ProjectVersion.name) + .order_by(ProjectVersionDelta.version) .all() if not individual_deltas else individual_deltas @@ -431,7 +429,7 @@ def get_delta_changes( f"Not possible to create checkpoint for project {project_id} in range {checkpoint.start}-{checkpoint.end}" ) - return ProjectVersionDelta.merge_delta_changes(result) + return ProjectVersionDelta.merge_changes(result) class ProjectRole(Enum): @@ -987,12 +985,13 @@ def construct_checkpoint(self) -> bool: class ProjectVersionDelta(db.Model): id = db.Column(db.BigInteger, primary_key=True, autoincrement=True) + version = db.Column(db.Integer, nullable=False, index=True) # exponential order of changes json rank = db.Column(db.Integer, nullable=False, index=True) # to which project version is this linked - version_id = db.Column( - db.Integer, - db.ForeignKey("project_version.id", ondelete="CASCADE"), + project_id = db.Column( + UUID(as_uuid=True), + db.ForeignKey("project.id", ondelete="CASCADE"), index=True, nullable=False, ) @@ -1000,69 +999,91 @@ class ProjectVersionDelta(db.Model): changes = db.Column(JSONB, nullable=False) __table_args__ = ( - db.UniqueConstraint("version_id", "rank", name="unique_deltas"), + db.UniqueConstraint("project_id", "version", "rank", name="unique_deltas"), db.Index( - "ix_project_version_delta_version_id_rank", - version_id, + "ix_project_version_delta_project_id_version_rank", + project_id, + version, rank, ), ) - version = db.relationship( - "ProjectVersion", + project = db.relationship( + "Project", uselist=False, ) @staticmethod - def merge_delta_changes( + def merge_changes( items: List[DeltaChange], ) -> List[DeltaChangeMerged]: """ - Merge multiple changes json array objects into one list of changes. + Merge changes json array objects into one list of changes. Changes are merged based on file path and change type. """ result: Dict[str, DeltaChangeMerged] = {} + # sorting changes by version to apply them in correct order + items.sort(key=lambda x: x.version) + + def handle_replace(result, path, current, previous): + result[path] = current + + def handle_delete(result, path, current, previous): + del result[path] + + def handle_update(result, path, current, previous): + # handle update case, when previous change was create - just revert to create with new metadata + current.change = previous.change + current.version = previous.version + current.diffs = [] + result[path] = current + + def handle_update_diff(result, path, current, previous): + current.diffs = (previous.diffs or []) + (current.diffs or []) + result[path] = current + + dispatch = { + # create + delete = file is transparent for current changes -> delete it + (PushChangeType.CREATE, PushChangeType.DELETE): handle_delete, + # create + update = create with updated info + (PushChangeType.CREATE, PushChangeType.UPDATE): handle_update, + (PushChangeType.CREATE, PushChangeType.UPDATE_DIFF): handle_update, + (PushChangeType.CREATE, PushChangeType.CREATE): None, + # update + update_diff = update with latest info + ( + PushChangeType.UPDATE, + PushChangeType.UPDATE_DIFF, + ): handle_update, + (PushChangeType.UPDATE, PushChangeType.UPDATE): handle_replace, + (PushChangeType.UPDATE, PushChangeType.DELETE): handle_replace, + (PushChangeType.UPDATE, PushChangeType.CREATE): handle_replace, + # update_diff + update_diff = update_diff with latest info with proper order of diffs + ( + PushChangeType.UPDATE_DIFF, + PushChangeType.UPDATE_DIFF, + ): handle_update_diff, + (PushChangeType.UPDATE_DIFF, PushChangeType.UPDATE): handle_replace, + (PushChangeType.UPDATE_DIFF, PushChangeType.DELETE): handle_replace, + (PushChangeType.UPDATE_DIFF, PushChangeType.CREATE): None, + (PushChangeType.DELETE, PushChangeType.CREATE): handle_replace, + # delete + update = invalid sequence, keep delete + (PushChangeType.DELETE, PushChangeType.UPDATE): None, + (PushChangeType.DELETE, PushChangeType.UPDATE_DIFF): None, + (PushChangeType.DELETE, PushChangeType.DELETE): None, + } + for item in items: current = item.to_merged_delta() - previous = result.get(current.path) path = current.path - if previous: - # merge changes data jsons - if previous.change == PushChangeType.CREATE: - if current.change == PushChangeType.DELETE: - # create + delete = file is transparent for current changes -> delete it - del result[path] - elif current.change in ( - PushChangeType.UPDATE, - PushChangeType.UPDATE_DIFF, - ): - # create + update = create with with the most recent metadata - current.change = previous.change - current.diffs = [] - else: - result[path] = current - elif previous.change == PushChangeType.UPDATE: - if current.change == PushChangeType.UPDATE_DIFF: - # update + update_diff = update with latest info - current.change = previous.change - current.diffs = [] - result[path] = current - elif previous.change == PushChangeType.UPDATE_DIFF: - if current.change == PushChangeType.UPDATE_DIFF: - # update_diff + update_diff = update_diff with latest info - current.diffs.extend(previous.diffs or []) - result[path] = current - elif previous.change == PushChangeType.DELETE: - if current.change == PushChangeType.CREATE: - # delete + create = create - result[path] = current - elif current.change in ( - PushChangeType.UPDATE, - PushChangeType.UPDATE_DIFF, - ): - # delete + update = invalid sequence, keep delete - continue - else: - result[current.path] = current + previous = result.get(path) + + if not previous: + result[path] = current + continue + + handler = dispatch.get((previous.change, current.change)) + if handler: + handler(result, path, current, previous) + return list(result.values()) @classmethod @@ -1078,7 +1099,7 @@ def create_checkpoint( delta_range = [ change for change in changes - if checkpoint.start <= change.version.name <= checkpoint.end + if checkpoint.start <= change.version <= checkpoint.end ] if not delta_range: @@ -1092,7 +1113,7 @@ def create_checkpoint( for delta in delta_range: changes.extend(DeltaChangeSchema(many=True).load(delta.changes)) merged_delta_items: List[DeltaChange] = [ - d.to_data_delta() for d in cls.merge_delta_changes(changes) + d.to_data_delta() for d in cls.merge_changes(changes) ] # Pre-fetch data for all versioned files to create FileDiff checkpoints @@ -1104,6 +1125,7 @@ def create_checkpoint( ] versioned_file_paths = [delta.path for delta in versioned_delta_items] if versioned_file_paths: + # get versioned files from DB and lookup their paths to next processing file_paths = ProjectFilePath.query.filter( ProjectFilePath.project_id == project_id, ProjectFilePath.path.in_(versioned_file_paths), @@ -1121,7 +1143,7 @@ def create_checkpoint( rank=checkpoint.rank, version=checkpoint.end, ).first() - + # If does not exists, let's create diff with higher rank and some generated path (name of diff file) if not existing_diff_checkpoint: base_file = FileHistory.get_basefile(file_path_id, checkpoint.end) if not base_file: @@ -1141,7 +1163,8 @@ def create_checkpoint( db.session.add(checkpoint_diff) checkpoint_delta = ProjectVersionDelta( - version_id=delta_range[-1].version_id, + project_id=project_id, + version=checkpoint.end, rank=checkpoint.rank, changes=DeltaChangeSchema(many=True).dump(merged_delta_items), ) @@ -1257,13 +1280,14 @@ def __init__( ) for c in changes ] - pvc = ProjectVersionDelta( - version=self, + pvd = ProjectVersionDelta( + project_id=project.id, + version=name, rank=0, changes=DeltaChangeSchema(many=True).dump(delta_data), ) - db.session.add(pvc) + db.session.add(pvd) db.session.flush() # update cached values in project and push to transaction buffer so that self.files is up-to-date diff --git a/server/mergin/sync/public_api_v2_controller.py b/server/mergin/sync/public_api_v2_controller.py index 8e5eed23..5f539b50 100644 --- a/server/mergin/sync/public_api_v2_controller.py +++ b/server/mergin/sync/public_api_v2_controller.py @@ -409,12 +409,15 @@ def get_project_delta(id: str, since: int, to: Optional[int] = None): """Get project changes (delta) between two versions""" project: Project = require_project_by_uuid(id, ProjectPermissions.Read) - if to is None: - to = project.latest_version + to = project.latest_version if to is None else to + if to > project.latest_version: + abort(400, "'to' version exceeds latest project version") - if since > to: - abort(400, "'since' version must be less than 'to' version") + if since >= to: + abort(400, "'since' version must be less than or equal to 'to' version") delta_changes = project.get_delta_changes(since, to) + if delta_changes is None: + abort(404) return DeltaChangeRespSchema(many=True).dump(delta_changes), 200 diff --git a/server/mergin/tests/test_public_api_v2.py b/server/mergin/tests/test_public_api_v2.py index c0eb6191..eceee077 100644 --- a/server/mergin/tests/test_public_api_v2.py +++ b/server/mergin/tests/test_public_api_v2.py @@ -28,7 +28,7 @@ ProjectRole, ProjectVersionDelta, ) -from ..sync.files import PushChangeType +from ..sync.files import DeltaChange, PushChangeType from ..sync.utils import is_versioned_file from sqlalchemy.exc import IntegrityError import pytest @@ -324,6 +324,89 @@ def test_create_diff_checkpoint(diff_project): assert not os.path.exists(diff.abs_path) +def test_delta_merge_changes(): + """Test merging of delta changes works as expected""" + + create = DeltaChange( + path="file1.gpkg", + change=PushChangeType.CREATE, + version=1, + size=100, + checksum="abc", + ) + update = DeltaChange( + path="file1.gpkg", + change=PushChangeType.UPDATE, + version=2, + size=120, + checksum="def", + ) + delete = DeltaChange( + path="file1.gpkg", + change=PushChangeType.DELETE, + version=3, + size=0, + checksum="ghi", + ) + update_diff1 = DeltaChange( + path="file1.gpkg", + change=PushChangeType.UPDATE_DIFF, + version=4, + size=130, + checksum="xyz", + diff="diff1", + ) + update_diff2 = DeltaChange( + path="file1.gpkg", + change=PushChangeType.UPDATE_DIFF, + version=5, + size=140, + checksum="uvw", + diff="diff2", + ) + + # CREATE + UPDATE -> CREATE + merged = ProjectVersionDelta.merge_changes([create, update]) + assert len(merged) == 1 + assert merged[0].change == PushChangeType.CREATE + assert merged[0].version == create.version + # check reverse order as well + merged = ProjectVersionDelta.merge_changes([update, create]) + assert len(merged) == 1 + assert merged[0].change == PushChangeType.CREATE + assert merged[0].version == create.version + + # CREATE + DELETE -> removed + merged = ProjectVersionDelta.merge_changes([create, delete]) + assert len(merged) == 0 + + # UPDATE + DELETE -> DELETE + merged = ProjectVersionDelta.merge_changes([update, delete]) + assert len(merged) == 1 + assert merged[0].change == PushChangeType.DELETE + + # CREATE + UPDATE_DIFF -> CREATE + merged = ProjectVersionDelta.merge_changes([create, update_diff1]) + assert len(merged) == 1 + assert merged[0].change == PushChangeType.CREATE + assert merged[0].diffs == [] + + # UPDATE + UPDATE_DIFF -> UPDATE + merged = ProjectVersionDelta.merge_changes([update, update_diff1]) + assert len(merged) == 1 + assert merged[0].change == PushChangeType.UPDATE + assert merged[0].diffs == [] + + # UPDATE_DIFF + UPDATE_DIFF -> merged diffs + merged = ProjectVersionDelta.merge_changes([update_diff1, update_diff2]) + assert len(merged) == 1 + assert merged[0].change == PushChangeType.UPDATE_DIFF + assert merged[0].version == update_diff2.version + assert merged[0].size == update_diff2.size + assert merged[0].checksum == update_diff2.checksum + assert [d.path for d in merged[0].diffs] == ["diff1", "diff2"] + + def test_project_version_delta_changes(client, diff_project: Project): """Test that get_delta_changes and its schema work as expected""" latest_version = diff_project.get_latest_version() @@ -331,16 +414,20 @@ def test_project_version_delta_changes(client, diff_project: Project): assert latest_version.name == 10 assert diff_project.get_delta_changes(2, 1) is None assert diff_project.get_delta_changes(2, 2) is None - pvcs: List[ProjectVersionDelta] = ( - ProjectVersionDelta.query.join(ProjectVersion) - .filter(ProjectVersion.project_id == diff_project.id) + deltas: List[ProjectVersionDelta] = ( + ProjectVersionDelta.query.filter_by(project_id=project_id) + .order_by(ProjectVersionDelta.version) .all() ) - assert len(pvcs) == 10 - initial_pvc = pvcs[0] - initial_version = initial_pvc.version - assert initial_pvc.rank == 0 - assert initial_pvc.version.id == initial_version.id + assert len(deltas) == 10 + initial_delta = deltas[0] + initial_version = ProjectVersion.query.filter_by( + project_id=project_id, name=initial_delta.version + ).first() + assert initial_version + assert initial_delta.version + assert initial_delta.rank == 0 + assert initial_delta.version == 1 delta = diff_project.get_delta_changes(1, 2) assert len(delta) == 1 @@ -352,6 +439,8 @@ def test_project_version_delta_changes(client, diff_project: Project): delta = diff_project.get_delta_changes(1, 3) assert len(delta) == 1 assert delta[0].change == PushChangeType.CREATE + assert delta[0].version == 3 + assert delta[0].checksum == deltas[3].changes[0]["checksum"] # get_delta with update diff delta = diff_project.get_delta_changes(1, 4) @@ -365,27 +454,27 @@ def test_project_version_delta_changes(client, diff_project: Project): filediff_checkpoints = FileDiff.query.filter_by(rank=1) checkpoint_change = checkpoint_changes.first() assert checkpoint_changes.count() == 1 - assert checkpoint_change.version_id == pvcs[3].version_id + assert checkpoint_change.version == deltas[3].version assert filediff_checkpoints.count() == 0 # check if filediff basefile is correctly set file_history = FileHistory.query.filter_by(project_version_name=4).first() assert len(delta) == len(initial_version.files) - delta_base_gpkg = [d for d in delta if d.path == "base.gpkg"] - assert len(delta_base_gpkg) == 1 + delta_base_gpkg = next((d for d in delta if d.path == "base.gpkg"), None) + assert delta_base_gpkg # from history is clear, that we are just creating geopackage in this range - assert delta_base_gpkg[0].change == PushChangeType.CREATE - assert delta_base_gpkg[0].version == 3 - assert delta_base_gpkg[0].path == file_history.path - assert delta_base_gpkg[0].size == file_history.size - assert delta_base_gpkg[0].checksum == file_history.checksum - assert len(delta_base_gpkg[0].diffs) == 0 + assert delta_base_gpkg.change == PushChangeType.CREATE + assert delta_base_gpkg.version == 3 + assert delta_base_gpkg.path == file_history.path + assert delta_base_gpkg.size == file_history.size + assert delta_base_gpkg.checksum == file_history.checksum + assert len(delta_base_gpkg.diffs) == 0 # get data with multiple ranks = 1 level checkpoints 1-4, 5-8 + checkpoint 9 and 10 delta = diff_project.get_delta_changes(0, 10) assert len(delta) == len(latest_version.files) - delta_test_gpkg = [d for d in delta if d.path == "test.gpkg"] + delta_test_gpkg = next((d for d in delta if d.path == "test.gpkg"), None) assert delta_test_gpkg - assert delta_test_gpkg[0].change == PushChangeType.CREATE + assert delta_test_gpkg.change == PushChangeType.CREATE assert ProjectVersionDelta.query.filter_by(rank=1).count() == 2 # base gpgk is transparent assert not next((c for c in delta if c.path == "base.gpkg"), None) @@ -394,14 +483,6 @@ def test_project_version_delta_changes(client, diff_project: Project): delta_base_gpkg = next((c for c in delta if c.path == "base.gpkg"), None) assert delta_base_gpkg.change == PushChangeType.DELETE - # check update diff - delta = diff_project.get_delta_changes(5, 7) - assert len(delta) == 1 - assert delta[0].change == PushChangeType.UPDATE_DIFF - assert len(delta[0].diffs) == 2 - # find related diff file in file diffs to check relation - assert FileDiff.query.filter_by(path=delta[0].diffs[0].path) - # create just update_diff versions with checkpoint base_gpkg = os.path.join(diff_project.storage.project_dir, "test.gpkg") shutil.copy( @@ -410,7 +491,7 @@ def test_project_version_delta_changes(client, diff_project: Project): for i in range(6): sql = f"UPDATE simple SET rating={i}" execute_query(base_gpkg, sql) - pv = push_change( + push_change( diff_project, "updated", "test.gpkg", diff_project.storage.project_dir ) delta = diff_project.get_delta_changes(8, latest_version.name + 6) @@ -798,24 +879,51 @@ def test_project_delta(client, diff_project): os.path.join(working_dir, "base.gpkg"), ) push_change(initial_project, "added", "base.gpkg", working_dir) - response = client.get(f"v2/projects/{initial_project.id}/delta?since=0&to=1") + response = client.get(f"v2/projects/{initial_project.id}/delta?since=0") + assert response.status_code == 200 + delta = response.json + assert len(delta) == 1 + assert delta[0]["change"] == PushChangeType.CREATE.value + assert delta[0]["version"] == 1 + + # remove the file and get changes from 0 -> 2 where base gpgkg is removed -> transparent + push_change(initial_project, "removed", "base.gpkg", working_dir) + response = client.get(f"v2/projects/{initial_project.id}/delta?since=0") assert response.status_code == 200 + delta = response.json + assert len(delta) == 0 + + # non valid cases response = client.get(f"v2/projects/{diff_project.id}/delta") assert response.status_code == 400 - - response = client.get(f"v2/projects/{diff_project.id}/delta?since=-1&to=1") + response = client.get(f"v2/projects/{diff_project.id}/delta?since=2&to=1") + assert response.status_code == 400 + response = client.get(f"v2/projects/{diff_project.id}/delta?since=-2") + assert response.status_code == 400 + response = client.get(f"v2/projects/{diff_project.id}/delta?since=-2&to=-1") + assert response.status_code == 400 + # exceeding latest version + response = client.get(f"v2/projects/{diff_project.id}/delta?since=0&to=2000") + assert response.status_code == 400 + # no changes between versions with same number + response = client.get(f"v2/projects/{diff_project.id}/delta?since=1&to=1") assert response.status_code == 400 - response = client.get(f"v2/projects/{diff_project.id}/delta?since=1000&to=2000") - assert response.status_code == 404 - - response = client.get(f"v2/projects/{diff_project.id}/delta?since=1&to=10") + # since 1 to latest version + response = client.get(f"v2/projects/{diff_project.id}/delta?since=1") assert response.status_code == 200 assert len(response.json) == 1 assert response.json[0]["change"] == PushChangeType.CREATE.value assert response.json[0]["version"] == 9 + files = ( + ProjectVersion.query.filter_by( + project_id=diff_project.id, name=response.json[0]["version"] + ) + .first() + .files + ) - # simplate update + # simple update response = client.get(f"v2/projects/{diff_project.id}/delta?since=4&to=8") assert response.status_code == 200 delta = response.json @@ -823,25 +931,32 @@ def test_project_delta(client, diff_project): # simulate pull of delta[0] assert delta[0]["change"] == PushChangeType.UPDATE.value - assert delta[0]["version"] == 7 + assert delta[0]["version"] == 5 assert not delta[0].get("diffs") # integration test for pull mechanism -def test_project_pull(client, diff_project): - """Test project pull mechanisom in v2""" - - response = client.get(f"v2/projects/{diff_project.id}/delta?since=5&to=7") +def test_project_pull_diffs(client, diff_project): + """Test project pull mechanisom in v2 with diff files""" + since = 5 + to = 7 + # check diff files in database where we can get them with right order and metadata + current_diffs = ( + FileDiff.query.filter(FileDiff.version > since, FileDiff.version <= to) + .order_by(FileDiff.version) + .all() + ) + response = client.get(f"v2/projects/{diff_project.id}/delta?since={since}&to={to}") assert response.status_code == 200 delta = response.json assert len(delta) == 1 assert delta[0]["change"] == PushChangeType.UPDATE_DIFF.value assert delta[0]["version"] == 7 - diff = delta[0]["diffs"][0] - assert diff["path"].startswith("base.gpkg-") - response = client.get(f"v2/projects/{diff_project.id}/raw/diff/{diff['path']}") + first_diff = delta[0]["diffs"][0] + second_diff = delta[0]["diffs"][1] + assert first_diff["path"] == current_diffs[0].path + assert second_diff["path"] == current_diffs[1].path + response = client.get( + f"v2/projects/{diff_project.id}/raw/diff/{first_diff['path']}" + ) assert response.status_code == 200 - created_diff = FileDiff.query.filter_by(path=diff["path"]).first() - assert created_diff and os.path.exists(created_diff.abs_path) - assert created_diff.size > 0 - assert created_diff.checksum diff --git a/server/migrations/community/4b4648483770_add_project_version_delta.py b/server/migrations/community/4b4648483770_add_project_version_delta.py new file mode 100644 index 00000000..9f13eced --- /dev/null +++ b/server/migrations/community/4b4648483770_add_project_version_delta.py @@ -0,0 +1,129 @@ +"""Add project version delta + +Revision ID: 4b4648483770 +Revises: bd1ec73db389 +Create Date: 2025-10-24 09:55:18.286286 + +""" + +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision = "4b4648483770" +down_revision = "bd1ec73db389" +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.create_table( + "project_version_delta", + sa.Column("id", sa.BigInteger(), autoincrement=True, nullable=False), + sa.Column("version", sa.Integer(), nullable=False), + sa.Column("rank", sa.Integer(), nullable=False), + sa.Column("project_id", postgresql.UUID(as_uuid=True), nullable=False), + sa.Column("changes", postgresql.JSONB(astext_type=sa.Text()), nullable=False), + sa.ForeignKeyConstraint( + ["project_id"], + ["project.id"], + name=op.f("fk_project_version_delta_project_id_project"), + ondelete="CASCADE", + ), + sa.PrimaryKeyConstraint("id", name=op.f("pk_project_version_delta")), + sa.UniqueConstraint("project_id", "version", "rank", name="unique_deltas"), + ) + op.create_index( + op.f("ix_project_version_delta_project_id"), + "project_version_delta", + ["project_id"], + unique=False, + ) + op.create_index( + "ix_project_version_delta_project_id_version_rank", + "project_version_delta", + ["project_id", "version", "rank"], + unique=False, + ) + op.create_index( + op.f("ix_project_version_delta_rank"), + "project_version_delta", + ["rank"], + unique=False, + ) + op.create_index( + op.f("ix_project_version_delta_version"), + "project_version_delta", + ["version"], + unique=False, + ) + # ### end Alembic commands ### + + op.execute( + """ + INSERT INTO project_version_delta (project_id, version, rank, changes) + WITH delta AS ( + SELECT + h.version_id, + jsonb_agg( + jsonb_strip_nulls( + jsonb_build_object( + 'path', fp.path, + 'size', h.size, + 'change', h.change, + 'version', h.project_version_name, + 'checksum', h.checksum, + 'diff', fdj.diff_path + ) + ) + ) AS changes + FROM + file_history h + JOIN + project_file_path fp ON h.file_path_id = fp.id + LEFT JOIN LATERAL ( + SELECT + fd.path AS diff_path + FROM + file_diff fd + WHERE + fd.file_path_id = fp.id + AND fd.version = h.project_version_name + AND fd.rank = 0 + ) fdj ON TRUE + GROUP BY + -- Group by the single unique version identifier + h.version_id + ) + SELECT + pv.project_id, + pv.name, + 0 AS rank, + d.changes + FROM + delta AS d + JOIN project_version AS pv ON d.version_id = pv.id + ; + """ + ) + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_index( + op.f("ix_project_version_delta_version"), table_name="project_version_delta" + ) + op.drop_index( + op.f("ix_project_version_delta_rank"), table_name="project_version_delta" + ) + op.drop_index( + "ix_project_version_delta_project_id_version_rank", + table_name="project_version_delta", + ) + op.drop_index( + op.f("ix_project_version_delta_project_id"), table_name="project_version_delta" + ) + op.drop_table("project_version_delta") + # ### end Alembic commands ### diff --git a/server/migrations/community/9acf967e58ad_add_project_version_delta.py b/server/migrations/community/9acf967e58ad_add_project_version_delta.py deleted file mode 100644 index 793a5978..00000000 --- a/server/migrations/community/9acf967e58ad_add_project_version_delta.py +++ /dev/null @@ -1,108 +0,0 @@ -"""Add project version delta - -Revision ID: 9acf967e58ad -Revises: bd1ec73db389 -Create Date: 2025-10-10 17:33:31.740232 - -""" - -from alembic import op -import sqlalchemy as sa -from sqlalchemy.dialects import postgresql - -# revision identifiers, used by Alembic. -revision = "9acf967e58ad" -down_revision = "bd1ec73db389" -branch_labels = None -depends_on = None - - -def upgrade(): - # ### commands auto generated by Alembic - please adjust! ### - op.create_table( - "project_version_delta", - sa.Column("id", sa.BigInteger(), autoincrement=True, nullable=False), - sa.Column("rank", sa.Integer(), nullable=False), - sa.Column("version_id", sa.Integer(), nullable=False), - sa.Column("changes", postgresql.JSONB(astext_type=sa.Text()), nullable=False), - sa.ForeignKeyConstraint( - ["version_id"], - ["project_version.id"], - name=op.f("fk_project_version_delta_version_id_project_version"), - ondelete="CASCADE", - ), - sa.PrimaryKeyConstraint("id", name=op.f("pk_project_version_delta")), - sa.UniqueConstraint("version_id", "rank", name="unique_deltas"), - ) - op.create_index( - op.f("ix_project_version_delta_rank"), - "project_version_delta", - ["rank"], - unique=False, - ) - op.create_index( - op.f("ix_project_version_delta_version_id"), - "project_version_delta", - ["version_id"], - unique=False, - ) - op.create_index( - "ix_project_version_delta_version_id_rank", - "project_version_delta", - ["version_id", "rank"], - unique=False, - ) - # ### end Alembic commands ### - - # data migration - op.execute( - """ - INSERT INTO project_version_delta (version_id, rank, changes) - SELECT - h.version_id, - 0 AS rank, - jsonb_agg( - jsonb_strip_nulls( - jsonb_build_object( - 'path', fp.path, - 'size', h.size, - 'change', h.change, - 'version', h.project_version_name, - 'checksum', h.checksum, - 'diff', fdj.diff_path - ) - ) - ) AS changes - FROM - file_history h - JOIN - project_file_path fp ON h.file_path_id = fp.id - LEFT JOIN LATERAL ( - SELECT - fd.path AS diff_path - FROM - file_diff fd - WHERE - fd.file_path_id = fp.id - AND fd.version = h.project_version_name - AND fd.rank = 0 - ) fdj ON TRUE - GROUP BY - h.version_id; - """ - ) - - -def downgrade(): - # ### commands auto generated by Alembic - please adjust! ### - op.drop_index( - "ix_project_version_delta_version_id_rank", table_name="project_version_delta" - ) - op.drop_index( - op.f("ix_project_version_delta_version_id"), table_name="project_version_delta" - ) - op.drop_index( - op.f("ix_project_version_delta_rank"), table_name="project_version_delta" - ) - op.drop_table("project_version_delta") - # ### end Alembic commands ### From 99194b01d76a27a68740fa80473827c7b13ea1ae Mon Sep 17 00:00:00 2001 From: "marcel.kocisek" Date: Tue, 28 Oct 2025 15:58:33 +0100 Subject: [PATCH 15/17] add mechanism for handling previous history files. - handle delete project cleanup of delta checkpoints --- server/mergin/sync/files.py | 6 +- server/mergin/sync/models.py | 22 +++++- .../mergin/tests/test_project_controller.py | 4 + server/mergin/tests/test_public_api_v2.py | 77 ++++++++++++++----- 4 files changed, 81 insertions(+), 28 deletions(-) diff --git a/server/mergin/sync/files.py b/server/mergin/sync/files.py index f8103926..d7d995d6 100644 --- a/server/mergin/sync/files.py +++ b/server/mergin/sync/files.py @@ -286,12 +286,12 @@ def to_data_delta(self): @dataclass class DeltaChange(DeltaChangeBase): - """Delta items stored in database as list of this item with single diff file""" + """Change items stored in database as list of this item with single diff file""" diff: Optional[str] = None - def to_merged_delta(self) -> DeltaChangeMerged: - """Convert DeltaData to DeltaMerged with multiple diffs""" + def to_merged(self) -> DeltaChangeMerged: + """Convert to DeltaMerged with multiple diffs""" result = DeltaChangeMerged( path=self.path, size=self.size, diff --git a/server/mergin/sync/models.py b/server/mergin/sync/models.py index 0d342303..9e12b68f 100644 --- a/server/mergin/sync/models.py +++ b/server/mergin/sync/models.py @@ -275,6 +275,11 @@ def delete(self, removed_by: int = None): db.session.execute( upload_table.delete().where(upload_table.c.project_id == self.id) ) + # remove project version delta related to project + delta_table = ProjectVersionDelta.__table__ + db.session.execute( + delta_table.delete().where(delta_table.c.project_id == self.id) + ) self.project_users.clear() access_requests = ( AccessRequest.query.filter_by(project_id=self.id) @@ -391,7 +396,7 @@ def get_delta_changes( for checkpoint in expected_checkpoints: existing_delta = existing_delta_map.get((checkpoint.rank, checkpoint.end)) - # we have change in database, just return delta data from it + # we have delta in database, just return delta data from it if existing_delta: result.extend(DeltaChangeSchema(many=True).load(existing_delta.changes)) continue @@ -1021,6 +1026,7 @@ def merge_changes( Changes are merged based on file path and change type. """ result: Dict[str, DeltaChangeMerged] = {} + updating_files: Set[str] = set() # sorting changes by version to apply them in correct order items.sort(key=lambda x: x.version) @@ -1028,12 +1034,15 @@ def handle_replace(result, path, current, previous): result[path] = current def handle_delete(result, path, current, previous): - del result[path] + + if path in updating_files: + result[path] = current + else: + del result[path] def handle_update(result, path, current, previous): # handle update case, when previous change was create - just revert to create with new metadata current.change = previous.change - current.version = previous.version current.diffs = [] result[path] = current @@ -1072,12 +1081,17 @@ def handle_update_diff(result, path, current, previous): } for item in items: - current = item.to_merged_delta() + current = item.to_merged() path = current.path + # path is key for merging changes previous = result.get(path) + # adding new file change if not seen before if not previous: result[path] = current + # track existing paths to avoid deleting created files later + if current.change != PushChangeType.CREATE: + updating_files.add(path) continue handler = dispatch.get((previous.change, current.change)) diff --git a/server/mergin/tests/test_project_controller.py b/server/mergin/tests/test_project_controller.py index 5b34bfba..b0d89502 100644 --- a/server/mergin/tests/test_project_controller.py +++ b/server/mergin/tests/test_project_controller.py @@ -28,6 +28,7 @@ from ..sync.models import ( FileDiff, Project, + ProjectVersionDelta, Upload, ProjectVersion, SyncFailuresHistory, @@ -536,6 +537,7 @@ def test_delete_project(client): assert not Project.query.filter_by( workspace_id=test_workspace_id, name=test_project ).count() + assert not ProjectVersionDelta.query.filter_by(project_id=project.id).count() assert not os.path.exists(project_dir) rm_project = Project.query.get(project.id) assert rm_project.removed_at and not rm_project.storage_params @@ -1781,6 +1783,8 @@ def test_optimize_storage(app, client, diff_project): diff_project.latest_version = 8 ProjectVersion.query.filter_by(project_id=diff_project.id, name=9).delete() ProjectVersion.query.filter_by(project_id=diff_project.id, name=10).delete() + ProjectVersionDelta.query.filter_by(project_id=diff_project.id, version=9).delete() + ProjectVersionDelta.query.filter_by(project_id=diff_project.id, version=10).delete() db.session.commit() diff_project.cache_latest_files() assert diff_project.latest_version == 8 diff --git a/server/mergin/tests/test_public_api_v2.py b/server/mergin/tests/test_public_api_v2.py index eceee077..81ef52c1 100644 --- a/server/mergin/tests/test_public_api_v2.py +++ b/server/mergin/tests/test_public_api_v2.py @@ -369,12 +369,12 @@ def test_delta_merge_changes(): merged = ProjectVersionDelta.merge_changes([create, update]) assert len(merged) == 1 assert merged[0].change == PushChangeType.CREATE - assert merged[0].version == create.version + assert merged[0].version == update.version # check reverse order as well merged = ProjectVersionDelta.merge_changes([update, create]) assert len(merged) == 1 assert merged[0].change == PushChangeType.CREATE - assert merged[0].version == create.version + assert merged[0].version == update.version # CREATE + DELETE -> removed merged = ProjectVersionDelta.merge_changes([create, delete]) @@ -406,6 +406,36 @@ def test_delta_merge_changes(): assert merged[0].checksum == update_diff2.checksum assert [d.path for d in merged[0].diffs] == ["diff1", "diff2"] + # case when trying to delete already existing file in history + # copy create with new version number + delete = DeltaChange( + path="file1.gpkg", + change=PushChangeType.DELETE, + version=6, + size=0, + checksum="ghi", + ) + created = DeltaChange( + path="file1.gpkg", + change=PushChangeType.CREATE, + version=7, + size=100, + checksum="abc", + ) + delete8 = DeltaChange( + path="file1.gpkg", + change=PushChangeType.DELETE, + version=8, + size=0, + checksum="abc2", + ) + merged = ProjectVersionDelta.merge_changes([delete, create, delete8]) + assert len(merged) == 1 + assert merged[0].change == PushChangeType.DELETE + assert merged[0].version == delete8.version + assert merged[0].size == delete8.size + assert merged[0].checksum == delete8.checksum + def test_project_version_delta_changes(client, diff_project: Project): """Test that get_delta_changes and its schema work as expected""" @@ -419,6 +449,7 @@ def test_project_version_delta_changes(client, diff_project: Project): .order_by(ProjectVersionDelta.version) .all() ) + # check if deltas are created after pushes assert len(deltas) == 10 initial_delta = deltas[0] initial_version = ProjectVersion.query.filter_by( @@ -429,16 +460,16 @@ def test_project_version_delta_changes(client, diff_project: Project): assert initial_delta.rank == 0 assert initial_delta.version == 1 + # delete file delta = diff_project.get_delta_changes(1, 2) assert len(delta) == 1 assert delta[0].change == PushChangeType.DELETE - # no ranks created as we get here just first version with get_delta - assert ProjectVersionDelta.query.filter_by(rank=1).count() == 0 # delete + create version delta = diff_project.get_delta_changes(1, 3) assert len(delta) == 1 assert delta[0].change == PushChangeType.CREATE + # file was created in v3 assert delta[0].version == 3 assert delta[0].checksum == deltas[3].changes[0]["checksum"] @@ -450,10 +481,10 @@ def test_project_version_delta_changes(client, diff_project: Project): # create rank 1 checkpoint for v4 delta = diff_project.get_delta_changes(0, 4) - checkpoint_changes = ProjectVersionDelta.query.filter_by(rank=1) + checkpoint = ProjectVersionDelta.query.filter_by(rank=1) filediff_checkpoints = FileDiff.query.filter_by(rank=1) - checkpoint_change = checkpoint_changes.first() - assert checkpoint_changes.count() == 1 + checkpoint_change = checkpoint.first() + assert checkpoint.count() == 1 assert checkpoint_change.version == deltas[3].version assert filediff_checkpoints.count() == 0 # check if filediff basefile is correctly set @@ -463,7 +494,7 @@ def test_project_version_delta_changes(client, diff_project: Project): assert delta_base_gpkg # from history is clear, that we are just creating geopackage in this range assert delta_base_gpkg.change == PushChangeType.CREATE - assert delta_base_gpkg.version == 3 + assert delta_base_gpkg.version == 4 assert delta_base_gpkg.path == file_history.path assert delta_base_gpkg.size == file_history.size assert delta_base_gpkg.checksum == file_history.checksum @@ -476,7 +507,11 @@ def test_project_version_delta_changes(client, diff_project: Project): assert delta_test_gpkg assert delta_test_gpkg.change == PushChangeType.CREATE assert ProjectVersionDelta.query.filter_by(rank=1).count() == 2 - # base gpgk is transparent + assert ProjectVersionDelta.query.filter_by(rank=2).count() == 0 + # check if version is having rank 1 checkpoint with proper end version + assert ProjectVersionDelta.query.filter_by(rank=1, version=4).first() + assert ProjectVersionDelta.query.filter_by(rank=1, version=8).first() + # base gpgk is transparent, bacause we are requesting from 0 assert not next((c for c in delta if c.path == "base.gpkg"), None) delta = diff_project.get_delta_changes(latest_version.name - 3, latest_version.name) @@ -912,26 +947,26 @@ def test_project_delta(client, diff_project): # since 1 to latest version response = client.get(f"v2/projects/{diff_project.id}/delta?since=1") assert response.status_code == 200 - assert len(response.json) == 1 - assert response.json[0]["change"] == PushChangeType.CREATE.value + # create of test.gpkg and delete base.gpkg + assert len(response.json) == 2 + assert response.json[0]["change"] == PushChangeType.DELETE.value assert response.json[0]["version"] == 9 - files = ( - ProjectVersion.query.filter_by( - project_id=diff_project.id, name=response.json[0]["version"] - ) - .first() - .files - ) + assert response.json[0]["path"] == "base.gpkg" + assert response.json[0]["size"] == 98304 + + assert response.json[1]["change"] == PushChangeType.CREATE.value + assert response.json[1]["version"] == 9 + assert response.json[1]["path"] == "test.gpkg" + assert response.json[1]["size"] == 98304 # simple update response = client.get(f"v2/projects/{diff_project.id}/delta?since=4&to=8") assert response.status_code == 200 delta = response.json assert len(delta) == 1 - - # simulate pull of delta[0] assert delta[0]["change"] == PushChangeType.UPDATE.value - assert delta[0]["version"] == 5 + # version is new latest version of the change + assert delta[0]["version"] == 7 assert not delta[0].get("diffs") From 2077d89dcde02b157ed543a2d77b7b6740f197d1 Mon Sep 17 00:00:00 2001 From: "marcel.kocisek" Date: Tue, 28 Oct 2025 17:19:42 +0100 Subject: [PATCH 16/17] fix integrity test --- server/mergin/tests/test_project_controller.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/server/mergin/tests/test_project_controller.py b/server/mergin/tests/test_project_controller.py index b0d89502..916c2bc4 100644 --- a/server/mergin/tests/test_project_controller.py +++ b/server/mergin/tests/test_project_controller.py @@ -2341,6 +2341,10 @@ def _get_user_agent(): .order_by(desc(ProjectVersion.created)) .first() ) + # remove project version delta entries + ProjectVersionDelta.query.filter_by( + project_id=upload.project_id, version=pv.name + ).delete() db.session.delete(pv) db.session.commit() upload.project.cache_latest_files() From 3685e43c590fdb8e1cc5da14430d91f85bc26eff Mon Sep 17 00:00:00 2001 From: "marcel.kocisek" Date: Wed, 29 Oct 2025 17:52:26 +0100 Subject: [PATCH 17/17] Upgrade logic - update reponse to items: [] - make it more clear with changes logic (?) - @varmar05 --- server/mergin/sync/files.py | 13 +- server/mergin/sync/models.py | 187 +++++++++++++----- server/mergin/sync/public_api_v2.yaml | 15 +- .../mergin/sync/public_api_v2_controller.py | 8 +- server/mergin/tests/test_public_api_v2.py | 67 ++++--- 5 files changed, 198 insertions(+), 92 deletions(-) diff --git a/server/mergin/sync/files.py b/server/mergin/sync/files.py index d7d995d6..63c387fd 100644 --- a/server/mergin/sync/files.py +++ b/server/mergin/sync/files.py @@ -331,8 +331,8 @@ def patch_field(self, data, **kwargs): return data -class DeltaChangeRespSchema(DeltaChangeBaseSchema): - """Schema for delta data response""" +class DeltaChangeItemSchema(DeltaChangeBaseSchema): + """Schema for delta changes response""" diffs = fields.List(fields.Nested(DeltaChangeDiffFileSchema())) @@ -342,3 +342,12 @@ def patch_field(self, data, **kwargs): if not data.get("diffs"): data.pop("diffs", None) return data + + +class DeltaChangeRespSchema(ma.Schema): + """Schema for list of delta changes wrapped in items field""" + + items = fields.List(fields.Nested(DeltaChangeItemSchema())) + + class Meta: + unknown = EXCLUDE diff --git a/server/mergin/sync/models.py b/server/mergin/sync/models.py index 9e12b68f..985453c8 100644 --- a/server/mergin/sync/models.py +++ b/server/mergin/sync/models.py @@ -65,6 +65,16 @@ class FileSyncErrorType(Enum): SYNC_ERROR = "sync error" +class ChangeComparisonAction(Enum): + """Actions to take when comparing two changes""" + + REPLACE = "replace" + DELETE = "delete" + UPDATE = "update" + UPDATE_DIFF = "update_diff" + EXCLUDE = "exclude" # Return None to exclude the file + + class Project(db.Model): id = db.Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4) name = db.Column(db.String, index=True) @@ -993,7 +1003,7 @@ class ProjectVersionDelta(db.Model): version = db.Column(db.Integer, nullable=False, index=True) # exponential order of changes json rank = db.Column(db.Integer, nullable=False, index=True) - # to which project version is this linked + # to which project is this linked project_id = db.Column( UUID(as_uuid=True), db.ForeignKey("project.id", ondelete="CASCADE"), @@ -1025,94 +1035,167 @@ def merge_changes( Merge changes json array objects into one list of changes. Changes are merged based on file path and change type. """ - result: Dict[str, DeltaChangeMerged] = {} updating_files: Set[str] = set() # sorting changes by version to apply them in correct order items.sort(key=lambda x: x.version) - def handle_replace(result, path, current, previous): - result[path] = current - - def handle_delete(result, path, current, previous): + # Merge changes for each file in a single pass + result: Dict[str, DeltaChangeMerged] = {} + for item in items: + path = item.path + current = item.to_merged() - if path in updating_files: + # First change for this file + if path not in result: result[path] = current + # track existing paths to avoid deleting files that are already in history before + if current.change != PushChangeType.CREATE: + updating_files.add(path) + continue + + # Compare and merge with previous change for this file + can_delete = path in updating_files + new_change = ProjectVersionDelta._compare_changes( + result[path], current, can_delete + ) + + # Update result (or remove if no change is detected) + if new_change is not None: + result[path] = new_change else: del result[path] - def handle_update(result, path, current, previous): - # handle update case, when previous change was create - just revert to create with new metadata - current.change = previous.change - current.diffs = [] - result[path] = current + return list(result.values()) - def handle_update_diff(result, path, current, previous): - current.diffs = (previous.diffs or []) + (current.diffs or []) - result[path] = current + @staticmethod + def _compare_changes( + previous: DeltaChangeMerged, + new: DeltaChangeMerged, + prevent_delete_change: bool, + ) -> Optional[DeltaChangeMerged]: + """ + Compare and merge two changes for the same file. + + Args: + previous: Previously accumulated change + new: New change to compare + prevent_delete_change: Whether the change can be deleted when resolving create+delete sequences + + Returns: + Merged change or None if file should be excluded + """ - dispatch = { + # Map change type pairs to actions + action_map = { # create + delete = file is transparent for current changes -> delete it - (PushChangeType.CREATE, PushChangeType.DELETE): handle_delete, + ( + PushChangeType.CREATE, + PushChangeType.DELETE, + ): ChangeComparisonAction.DELETE, # create + update = create with updated info - (PushChangeType.CREATE, PushChangeType.UPDATE): handle_update, - (PushChangeType.CREATE, PushChangeType.UPDATE_DIFF): handle_update, - (PushChangeType.CREATE, PushChangeType.CREATE): None, + ( + PushChangeType.CREATE, + PushChangeType.UPDATE, + ): ChangeComparisonAction.UPDATE, + ( + PushChangeType.CREATE, + PushChangeType.UPDATE_DIFF, + ): ChangeComparisonAction.UPDATE, + ( + PushChangeType.CREATE, + PushChangeType.CREATE, + ): ChangeComparisonAction.EXCLUDE, # update + update_diff = update with latest info ( PushChangeType.UPDATE, PushChangeType.UPDATE_DIFF, - ): handle_update, - (PushChangeType.UPDATE, PushChangeType.UPDATE): handle_replace, - (PushChangeType.UPDATE, PushChangeType.DELETE): handle_replace, - (PushChangeType.UPDATE, PushChangeType.CREATE): handle_replace, + ): ChangeComparisonAction.UPDATE, + ( + PushChangeType.UPDATE, + PushChangeType.UPDATE, + ): ChangeComparisonAction.REPLACE, + ( + PushChangeType.UPDATE, + PushChangeType.DELETE, + ): ChangeComparisonAction.REPLACE, + ( + PushChangeType.UPDATE, + PushChangeType.CREATE, + ): ChangeComparisonAction.REPLACE, # update_diff + update_diff = update_diff with latest info with proper order of diffs ( PushChangeType.UPDATE_DIFF, PushChangeType.UPDATE_DIFF, - ): handle_update_diff, - (PushChangeType.UPDATE_DIFF, PushChangeType.UPDATE): handle_replace, - (PushChangeType.UPDATE_DIFF, PushChangeType.DELETE): handle_replace, - (PushChangeType.UPDATE_DIFF, PushChangeType.CREATE): None, - (PushChangeType.DELETE, PushChangeType.CREATE): handle_replace, - # delete + update = invalid sequence, keep delete - (PushChangeType.DELETE, PushChangeType.UPDATE): None, - (PushChangeType.DELETE, PushChangeType.UPDATE_DIFF): None, - (PushChangeType.DELETE, PushChangeType.DELETE): None, + ): ChangeComparisonAction.UPDATE_DIFF, + ( + PushChangeType.UPDATE_DIFF, + PushChangeType.UPDATE, + ): ChangeComparisonAction.REPLACE, + ( + PushChangeType.UPDATE_DIFF, + PushChangeType.DELETE, + ): ChangeComparisonAction.REPLACE, + ( + PushChangeType.UPDATE_DIFF, + PushChangeType.CREATE, + ): ChangeComparisonAction.EXCLUDE, + ( + PushChangeType.DELETE, + PushChangeType.CREATE, + ): ChangeComparisonAction.REPLACE, + # delete + update = invalid sequence + ( + PushChangeType.DELETE, + PushChangeType.UPDATE, + ): ChangeComparisonAction.EXCLUDE, + ( + PushChangeType.DELETE, + PushChangeType.UPDATE_DIFF, + ): ChangeComparisonAction.EXCLUDE, + ( + PushChangeType.DELETE, + PushChangeType.DELETE, + ): ChangeComparisonAction.EXCLUDE, } - for item in items: - current = item.to_merged() - path = current.path - # path is key for merging changes - previous = result.get(path) + action = action_map.get((previous.change, new.change)) + result = None + if action == ChangeComparisonAction.REPLACE: + result = new - # adding new file change if not seen before - if not previous: - result[path] = current - # track existing paths to avoid deleting created files later - if current.change != PushChangeType.CREATE: - updating_files.add(path) - continue + elif action == ChangeComparisonAction.DELETE: + # if change is create + delete, we can just remove the change from accumulated changes + # only if this action is allowed (file existed before) + if prevent_delete_change: + result = new - handler = dispatch.get((previous.change, current.change)) - if handler: - handler(result, path, current, previous) + elif action == ChangeComparisonAction.UPDATE: + # handle update case, when previous change was create - just revert to create with new metadata + new.change = previous.change + new.diffs = [] + result = new - return list(result.values()) + elif action == ChangeComparisonAction.UPDATE_DIFF: + new.diffs = (previous.diffs or []) + (new.diffs or []) + result = new + + return result @classmethod def create_checkpoint( cls, project_id: str, checkpoint: Checkpoint, - changes: List[ProjectVersionDelta] = [], + from_deltas: List[ProjectVersionDelta] = [], ) -> Optional[ProjectVersionDelta]: """ Creates and caches new checkpoint and any required FileDiff checkpoints. + Use from_deltas to create checkpoint from existing individual deltas. + Returns created ProjectVersionDelta object with checkpoint. """ delta_range = [ change - for change in changes + for change in from_deltas if checkpoint.start <= change.version <= checkpoint.end ] @@ -1122,7 +1205,7 @@ def create_checkpoint( ) return None - # dump delta objects from database and flatten list for merging + # dump changes lists from database and flatten list for merging changes = [] for delta in delta_range: changes.extend(DeltaChangeSchema(many=True).load(delta.changes)) diff --git a/server/mergin/sync/public_api_v2.yaml b/server/mergin/sync/public_api_v2.yaml index 9ac4a2d0..7436e71d 100644 --- a/server/mergin/sync/public_api_v2.yaml +++ b/server/mergin/sync/public_api_v2.yaml @@ -410,9 +410,7 @@ paths: content: application/json: schema: - type: array - items: - $ref: "#/components/schemas/ProjectDelta" + $ref: "#/components/schemas/ProjectDeltaResponse" "400": $ref: "#/components/responses/BadRequest" "404": @@ -841,7 +839,7 @@ components: type: string enum: [create, update, delete, update_diff] example: update - ProjectDelta: + ProjectDeltaChange: type: object required: - path @@ -873,3 +871,12 @@ components: path: type: string example: survey.gpkg-diff-1 + ProjectDeltaResponse: + type: object + required: + - items + properties: + items: + type: array + items: + $ref: "#/components/schemas/ProjectDeltaChange" diff --git a/server/mergin/sync/public_api_v2_controller.py b/server/mergin/sync/public_api_v2_controller.py index 5f539b50..dd1802df 100644 --- a/server/mergin/sync/public_api_v2_controller.py +++ b/server/mergin/sync/public_api_v2_controller.py @@ -414,10 +414,8 @@ def get_project_delta(id: str, since: int, to: Optional[int] = None): abort(400, "'to' version exceeds latest project version") if since >= to: - abort(400, "'since' version must be less than or equal to 'to' version") + abort(400, "'since' version must be less than 'to' version") - delta_changes = project.get_delta_changes(since, to) - if delta_changes is None: - abort(404) + delta_changes = project.get_delta_changes(since, to) or [] - return DeltaChangeRespSchema(many=True).dump(delta_changes), 200 + return DeltaChangeRespSchema().dump({"items": delta_changes}), 200 diff --git a/server/mergin/tests/test_public_api_v2.py b/server/mergin/tests/test_public_api_v2.py index 81ef52c1..d9efe7f1 100644 --- a/server/mergin/tests/test_public_api_v2.py +++ b/server/mergin/tests/test_public_api_v2.py @@ -415,7 +415,7 @@ def test_delta_merge_changes(): size=0, checksum="ghi", ) - created = DeltaChange( + create = DeltaChange( path="file1.gpkg", change=PushChangeType.CREATE, version=7, @@ -449,7 +449,7 @@ def test_project_version_delta_changes(client, diff_project: Project): .order_by(ProjectVersionDelta.version) .all() ) - # check if deltas are created after pushes + # check if deltas are created after pushes within ProjectVersion creation assert len(deltas) == 10 initial_delta = deltas[0] initial_version = ProjectVersion.query.filter_by( @@ -535,9 +535,18 @@ def test_project_version_delta_changes(client, diff_project: Project): fh = FileHistory.query.filter_by( project_version_name=latest_version.name - 1 ).first() - base_gpkg_checkpoint = FileDiff.query.filter_by(basefile_id=fh.id, rank=1).first() - assert base_gpkg_checkpoint - assert base_gpkg_checkpoint.version == latest_version.name + 6 + # testing constistency of db entries FileDiff and ProjectVersionDelta + test_gpkg_checkpoint = FileDiff.query.filter_by(basefile_id=fh.id, rank=1).first() + assert test_gpkg_checkpoint + assert test_gpkg_checkpoint.version == latest_version.name + 6 + delta_checkpoint = ProjectVersionDelta.query.filter_by( + project_id=diff_project.id, version=latest_version.name + 6, rank=1 + ).first() + assert delta_checkpoint + assert len(delta_checkpoint.changes) == 1 + assert delta_checkpoint.changes[0]["version"] == latest_version.name + 6 + assert delta_checkpoint.changes[0]["change"] == PushChangeType.UPDATE_DIFF.value + assert delta_checkpoint.changes[0]["diff"] == test_gpkg_checkpoint.path fh = FileHistory.query.filter_by( project_version_name=latest_version.name + 6 @@ -545,7 +554,7 @@ def test_project_version_delta_changes(client, diff_project: Project): delta = diff_project.get_delta_changes(12, latest_version.name + 6) assert len(delta) == 1 assert len(delta[0].diffs) == 1 - assert delta[0].diffs[0].path == base_gpkg_checkpoint.path + assert delta[0].diffs[0].path == test_gpkg_checkpoint.path assert delta[0].change == PushChangeType.UPDATE_DIFF assert delta[0].checksum == fh.checksum assert delta[0].size == fh.size @@ -916,17 +925,17 @@ def test_project_delta(client, diff_project): push_change(initial_project, "added", "base.gpkg", working_dir) response = client.get(f"v2/projects/{initial_project.id}/delta?since=0") assert response.status_code == 200 - delta = response.json - assert len(delta) == 1 - assert delta[0]["change"] == PushChangeType.CREATE.value - assert delta[0]["version"] == 1 + changes = response.json["items"] + assert len(changes) == 1 + assert changes[0]["change"] == PushChangeType.CREATE.value + assert changes[0]["version"] == 1 # remove the file and get changes from 0 -> 2 where base gpgkg is removed -> transparent push_change(initial_project, "removed", "base.gpkg", working_dir) response = client.get(f"v2/projects/{initial_project.id}/delta?since=0") assert response.status_code == 200 - delta = response.json - assert len(delta) == 0 + changes = response.json["items"] + assert len(changes) == 0 # non valid cases response = client.get(f"v2/projects/{diff_project.id}/delta") @@ -947,32 +956,32 @@ def test_project_delta(client, diff_project): # since 1 to latest version response = client.get(f"v2/projects/{diff_project.id}/delta?since=1") assert response.status_code == 200 + changes = response.json["items"] # create of test.gpkg and delete base.gpkg - assert len(response.json) == 2 - assert response.json[0]["change"] == PushChangeType.DELETE.value - assert response.json[0]["version"] == 9 - assert response.json[0]["path"] == "base.gpkg" - assert response.json[0]["size"] == 98304 + assert len(changes) == 2 + assert changes[0]["change"] == PushChangeType.DELETE.value + assert changes[0]["version"] == 9 + assert changes[0]["path"] == "base.gpkg" + assert changes[0]["size"] == 98304 - assert response.json[1]["change"] == PushChangeType.CREATE.value - assert response.json[1]["version"] == 9 - assert response.json[1]["path"] == "test.gpkg" - assert response.json[1]["size"] == 98304 + assert changes[1]["change"] == PushChangeType.CREATE.value + assert changes[1]["version"] == 9 + assert changes[1]["path"] == "test.gpkg" + assert changes[1]["size"] == 98304 # simple update response = client.get(f"v2/projects/{diff_project.id}/delta?since=4&to=8") assert response.status_code == 200 - delta = response.json - assert len(delta) == 1 - assert delta[0]["change"] == PushChangeType.UPDATE.value + changes = response.json["items"] + assert len(changes) == 1 + assert changes[0]["change"] == PushChangeType.UPDATE.value # version is new latest version of the change - assert delta[0]["version"] == 7 - assert not delta[0].get("diffs") + assert changes[0]["version"] == 7 + assert not changes[0].get("diffs") -# integration test for pull mechanism def test_project_pull_diffs(client, diff_project): - """Test project pull mechanisom in v2 with diff files""" + """Test project pull mechanisom in v2 with diff files. Integration test for pull mechanism""" since = 5 to = 7 # check diff files in database where we can get them with right order and metadata @@ -983,7 +992,7 @@ def test_project_pull_diffs(client, diff_project): ) response = client.get(f"v2/projects/{diff_project.id}/delta?since={since}&to={to}") assert response.status_code == 200 - delta = response.json + delta = response.json["items"] assert len(delta) == 1 assert delta[0]["change"] == PushChangeType.UPDATE_DIFF.value assert delta[0]["version"] == 7