diff --git a/server/mergin/config.py b/server/mergin/config.py index d3bc65dc..525d1b22 100644 --- a/server/mergin/config.py +++ b/server/mergin/config.py @@ -79,6 +79,8 @@ class Configuration(object): MERGIN_BASE_URL = config("MERGIN_BASE_URL", default="") # for link to logo in emails MERGIN_LOGO_URL = config("MERGIN_LOGO_URL", default="") + # for link to logos in EE branding + DASHBOARD_LOGO_URL = config("DASHBOARD_LOGO_URL", default=MERGIN_LOGO_URL) MERGIN_SUBSCRIPTIONS = config("MERGIN_SUBSCRIPTIONS", default=False, cast=bool) MERGIN_TESTING = config("MERGIN_TESTING", default=False, cast=bool) diff --git a/server/mergin/sync/models.py b/server/mergin/sync/models.py index 6527213c..cc7779d2 100644 --- a/server/mergin/sync/models.py +++ b/server/mergin/sync/models.py @@ -135,6 +135,36 @@ def workspace(self): project_workspace = current_app.ws_handler.get(self.workspace_id) return project_workspace + def get_latest_files_cache(self) -> List[int]: + """Get latest file history ids either from cached table or calculate them on the fly""" + if self.latest_project_files.file_history_ids is not None: + return self.latest_project_files.file_history_ids + + query = f""" + WITH latest_changes AS ( + SELECT + fp.id, + pv.project_id, + max(pv.name) AS version + FROM + project_version pv + LEFT OUTER JOIN file_history fh ON fh.version_id = pv.id + LEFT OUTER JOIN project_file_path fp ON fp.id = fh.file_path_id + WHERE + pv.project_id = :project_id + AND pv.name <= :latest_version + GROUP BY + fp.id, pv.project_id + ) + SELECT + fh.id + FROM latest_changes ch + LEFT OUTER JOIN file_history fh ON (fh.file_path_id = ch.id AND fh.project_version_name = ch.version AND fh.change != 'delete') + WHERE fh.id IS NOT NULL; + """ + params = {"project_id": self.id, "latest_version": self.latest_version} + return [row.id for row in db.session.execute(text(query), params).fetchall()] + def cache_latest_files(self) -> None: """Get project files from changes (FileHistory) and save them for later use.""" if self.latest_version is None: @@ -514,7 +544,11 @@ def generate_diff_name(self): class LatestProjectFiles(db.Model): - """Store project latest version files history ids""" + """Store project latest version files history ids. + + This is a caching table to store the latest relevant files history ids for further use in + Project.files and ProjectVersion.files. It is updated when ProjectVersion itself is created. + """ project_id = db.Column( UUID(as_uuid=True), @@ -743,22 +777,21 @@ def diffs_chain( return None, [] diffs = [] - cached_items = Checkpoint.get_checkpoints( - basefile.project_version_name, version - ) + checkpoints = Checkpoint.get_checkpoints(basefile.project_version_name, version) expected_diffs = ( FileDiff.query.filter_by( basefile_id=basefile.id, ) .filter( tuple_(FileDiff.rank, FileDiff.version).in_( - [(item.rank, item.end) for item in cached_items] + [(item.rank, item.end) for item in checkpoints] ) ) .all() ) - for item in cached_items: + for item in checkpoints: + diff_needs_to_be_created = False diff = next( ( d @@ -767,25 +800,38 @@ def diffs_chain( ), None, ) - if diff and os.path.exists(diff.abs_path): - diffs.append(diff) - elif item.rank > 0: - # fallback if checkpoint does not exist: replace merged diff with individual diffs - individual_diffs = ( - FileDiff.query.filter_by( - basefile_id=basefile.id, - rank=0, + if diff: + if os.path.exists(diff.abs_path): + diffs.append(diff) + else: + diff_needs_to_be_created = True + else: + # we do not have record in DB, create a checkpoint if it makes sense + if item.rank > 0 and FileDiff.can_create_checkpoint(file_id, item): + diff = FileDiff( + basefile=basefile, + version=item.end, + rank=item.rank, + path=basefile.file.generate_diff_name(), + size=None, + checksum=None, ) - .filter( - FileDiff.version >= item.start, FileDiff.version <= item.end + db.session.add(diff) + db.session.commit() + diff_needs_to_be_created = True + else: + # we asked for checkpoint where there was no change + continue + + if diff_needs_to_be_created: + diff_created = diff.construct_checkpoint() + if diff_created: + diffs.append(diff) + else: + logging.error( + f"Failed to create a diff for file {basefile.file.path} at version {basefile.project_version_name} of rank {item.rank}." ) - .order_by(FileDiff.version) - .all() - ) - diffs.extend(individual_diffs) - else: - # we asked for individual diff but there is no such diff as there was not change at that version - continue + return None, [] return basefile, diffs @@ -924,9 +970,10 @@ def construct_checkpoint(self) -> bool: return True if self.rank == 0: - raise ValueError( + logging.error( "Checkpoint of rank 0 should be created by user upload, cannot be constructed" ) + return False # merged diffs can only be created for certain versions if self.version % LOG_BASE: @@ -1434,7 +1481,7 @@ def __init__( latest_files_map = { fh.path: fh.id for fh in FileHistory.query.filter( - FileHistory.id.in_(self.project.latest_project_files.file_history_ids) + FileHistory.id.in_(self.project.get_latest_files_cache()) ).all() } @@ -1565,6 +1612,10 @@ def _files_from_end(self): files that were delete after the version (and thus not necessarily present now). From these candidates get the latest file change before or at the specific version. If that change was not 'delete', file is present. """ + # if we do not have cached file history ids use different strategy where it is not necessary + if self.project.latest_project_files.file_history_ids is None: + return self._files_from_start() + query = f""" WITH files_changes_before_version AS ( WITH files_candidates AS ( diff --git a/server/mergin/sync/public_api_controller.py b/server/mergin/sync/public_api_controller.py index 7a764e44..8e2b0ea8 100644 --- a/server/mergin/sync/public_api_controller.py +++ b/server/mergin/sync/public_api_controller.py @@ -8,6 +8,7 @@ import os import logging from dataclasses import asdict +from enum import Enum from typing import Dict from datetime import datetime @@ -80,6 +81,7 @@ generate_location, is_valid_uuid, get_device_id, + is_versioned_file, prepare_download_response, get_device_id, wkb2wkt, @@ -287,6 +289,12 @@ def delete_project(namespace, project_name): # noqa: E501 return NoContent, 200 +class DowloadFileAction(Enum): + FULL = "full" + FULL_GPKG = "full_gpkg" + DIFF = "diff" + + def download_project_file( project_name, namespace, file, version=None, diff=None ): # noqa: E501 @@ -307,10 +315,20 @@ def download_project_file( :rtype: file """ - project = require_project(namespace, project_name, ProjectPermissions.Read) - if diff and not version: + if not is_versioned_file(file): + action = DowloadFileAction.FULL + elif diff: + action = DowloadFileAction.DIFF + else: + action = DowloadFileAction.FULL_GPKG + + if action is DowloadFileAction.DIFF and not version: abort(400, f"Changeset must be requested for particular file version") + if action is DowloadFileAction.FULL and diff is True: + abort(404, f"No diff in particular file {file})") + + project = require_project(namespace, project_name, ProjectPermissions.Read) lookup_version = ( ProjectVersion.from_v_name(version) if version else project.latest_version ) @@ -329,24 +347,30 @@ def download_project_file( if not fh or fh.change == PushChangeType.DELETE.value: abort(404, f"File {file} not found") - if diff and version: - # get specific version of geodiff file modified in requested version - if not fh.diff: - abort(404, f"No diff in particular file {file} version") - file_path = fh.diff_file.location - else: - file_path = fh.location - - if version and not diff: - project.storage.restore_versioned_file( - file, ProjectVersion.from_v_name(version) - ) + # user asked for diff, but there is no diff at that version + if action is DowloadFileAction.DIFF and not fh.diff: + abort(404, f"No diff in particular file {file} version") + file_path = ( + fh.diff_file.location if action is DowloadFileAction.DIFF else fh.location + ) abs_path = os.path.join(project.storage.project_dir, file_path) - # check file exists (e.g. there might have been issue with restore) + if not os.path.exists(abs_path): - logging.error(f"Missing file {namespace}/{project_name}/{file_path}") - abort(404) + if action is DowloadFileAction.FULL_GPKG: + project.storage.restore_versioned_file( + file, ProjectVersion.from_v_name(version) + ) + + # check again after restore + if not os.path.exists(abs_path): + logging.error( + f"Failed to restore {namespace}/{project_name}/{file_path}" + ) + abort(404) + else: + logging.error(f"Missing file {namespace}/{project_name}/{file_path}") + abort(404) response = prepare_download_response(project.storage.project_dir, file_path) return response diff --git a/server/mergin/tests/test_file_restore.py b/server/mergin/tests/test_file_restore.py index 19e02ce2..42ee2446 100644 --- a/server/mergin/tests/test_file_restore.py +++ b/server/mergin/tests/test_file_restore.py @@ -8,7 +8,13 @@ from ..app import db from ..auth.models import User -from ..sync.models import ProjectVersion, Project, GeodiffActionHistory +from ..sync.models import ( + FileDiff, + ProjectFilePath, + ProjectVersion, + Project, + GeodiffActionHistory, +) from . import test_project_dir, TMP_DIR from .utils import ( create_project, @@ -163,6 +169,19 @@ def test_version_file_restore(diff_project): diff_project.storage.restore_versioned_file("base.gpkg", 7) assert os.path.exists(test_file) assert gpkgs_are_equal(test_file, test_file + "_backup") + # no merged diffs needed + file_path_id = ( + ProjectFilePath.query.filter_by(project_id=diff_project.id, path="base.gpkg") + .first() + .id + ) + assert ( + FileDiff.query.filter_by(file_path_id=file_path_id) + .filter(FileDiff.rank > 0) + .count() + == 0 + ) + # check we track performance of reconstruction gh = GeodiffActionHistory.query.filter_by( project_id=diff_project.id, target_version="v7" @@ -198,3 +217,44 @@ def test_version_file_restore(diff_project): diff_project.storage.restore_versioned_file("test.txt", 1) assert not os.path.exists(test_file) assert not os.path.exists(diff_project.storage.geodiff_working_dir) + + # let's add some dummy changes to test.gpkg so we can restore full gpkg using checkpoints created on demand + file_path_id = ( + ProjectFilePath.query.filter_by(project_id=diff_project.id, path="test.gpkg") + .first() + .id + ) + base_gpkg = os.path.join(diff_project.storage.project_dir, "test.gpkg") + shutil.copy( + os.path.join(diff_project.storage.project_dir, "v9", "test.gpkg"), base_gpkg + ) + for i in range(23): + sql = f"UPDATE simple SET rating={i}" + execute_query(base_gpkg, sql) + pv = push_change( + diff_project, "updated", "test.gpkg", diff_project.storage.project_dir + ) + assert diff_project.latest_version == pv.name == (11 + i) + file_diff = FileDiff.query.filter_by( + file_path_id=file_path_id, version=pv.name, rank=0 + ).first() + assert file_diff and os.path.exists(file_diff.abs_path) + + assert ( + FileDiff.query.filter_by(file_path_id=file_path_id) + .filter(FileDiff.rank > 0) + .count() + == 0 + ) + + test_file = os.path.join(diff_project.storage.project_dir, "v30", "test.gpkg") + os.rename(test_file, test_file + "_backup") + diff_project.storage.restore_versioned_file("test.gpkg", 30) + assert os.path.exists(test_file) + assert gpkgs_are_equal(test_file, test_file + "_backup") + assert ( + FileDiff.query.filter_by(file_path_id=file_path_id) + .filter(FileDiff.rank > 0) + .count() + > 0 + ) diff --git a/server/mergin/tests/test_project_controller.py b/server/mergin/tests/test_project_controller.py index 851915cf..ad04ad3c 100644 --- a/server/mergin/tests/test_project_controller.py +++ b/server/mergin/tests/test_project_controller.py @@ -880,6 +880,14 @@ def test_download_diff_file(client, diff_project): # download full version after file was removed os.remove(os.path.join(diff_project.storage.project_dir, file_change.location)) + file_path_id = ( + ProjectFilePath.query.filter_by(project_id=diff_project.id, path=test_file) + .first() + .id + ) + basefile, diffs = FileHistory.diffs_chain(file_path_id, 4) + # we construct full gpkg from basefile and single diff v4 + assert basefile is not None and len(diffs) == 1 resp = client.get( "/v1/project/raw/{}/{}?file={}&version=v4".format( test_workspace_name, test_project, test_file @@ -1902,11 +1910,17 @@ def test_file_diffs_chain(diff_project): assert len(diffs) == 1 assert diffs[0].version == 6 - # diff was used in v7, nothing happened in v8 (=v7) - basefile, diffs = FileHistory.diffs_chain(file_id, 8) + # diff was used in v7 + basefile, diffs = FileHistory.diffs_chain(file_id, 7) assert basefile.version.name == 5 assert len(diffs) == 2 + # nothing happened in v8 (=v7) but we have now merged diff in chain v5-v8 + basefile, diffs = FileHistory.diffs_chain(file_id, 8) + assert basefile.version.name == 5 + assert len(diffs) == 1 + assert diffs[0].rank == 1 and diffs[0].version == 8 + # file was removed in v9 basefile, diffs = FileHistory.diffs_chain(file_id, 9) assert not basefile diff --git a/server/mergin/tests/test_public_api_v2.py b/server/mergin/tests/test_public_api_v2.py index 85c6ab38..46b22cbc 100644 --- a/server/mergin/tests/test_public_api_v2.py +++ b/server/mergin/tests/test_public_api_v2.py @@ -304,24 +304,24 @@ def test_create_diff_checkpoint(diff_project): basefile, diffs = FileHistory.diffs_chain(file_path_id, 32) assert basefile.project_version_name == 9 - # so far we only have individual diffs - assert len(diffs) == 22 + assert len(diffs) == 3 + # also a lower diff rank was created + lower_diff = FileDiff.query.filter_by(version=24, rank=1).first() + assert os.path.exists(lower_diff.abs_path) # diff for v17-v20 from individual diffs assert FileDiff.can_create_checkpoint(file_path_id, Checkpoint(1, 5)) is True - diff = FileDiff( - basefile=basefile, path=f"test.gpkg-diff-{uuid.uuid4()}", version=20, rank=1 - ) - db.session.add(diff) - db.session.commit() - assert not os.path.exists(diff.abs_path) + diff = FileDiff.query.filter_by( + file_path_id=file_path_id, version=20, rank=1 + ).first() + os.remove(diff.abs_path) diff.construct_checkpoint() assert os.path.exists(diff.abs_path) basefile, diffs = FileHistory.diffs_chain(file_path_id, 20) assert basefile.project_version_name == 9 - # 6 individual diffs (v11-v16) + merged diff (v17-v20) as the last one - assert len(diffs) == 7 + # individual diff v12 + (v13-v16) + (v17-v20) as the last one + assert len(diffs) == 3 assert diffs[-1] == diff # repeat - nothing to do @@ -329,20 +329,6 @@ def test_create_diff_checkpoint(diff_project): diff.construct_checkpoint() assert mtime == os.path.getmtime(diff.abs_path) - # some lower rank diffs still missing - assert not FileDiff.query.filter_by(version=24, rank=1).count() - - # diff for v17-v32 with merged diffs, this will also create lower missing ranks - diff = FileDiff( - basefile=basefile, path=f"test.gpkg-diff-{uuid.uuid4()}", version=32, rank=2 - ) - db.session.add(diff) - db.session.commit() - diff.construct_checkpoint() - assert os.path.exists(diff.abs_path) - lower_diff = FileDiff.query.filter_by(version=24, rank=1).first() - assert os.path.exists(lower_diff.abs_path) - # assert gpkg diff is the same as it would be from merging all individual diffs individual_diffs = ( FileDiff.query.filter_by(file_path_id=file_path_id, rank=0) @@ -368,11 +354,12 @@ def test_create_diff_checkpoint(diff_project): # geodiff failure mock.side_effect = GeoDiffLibError - diff = FileDiff( - basefile=basefile, path=f"test.gpkg-diff-{uuid.uuid4()}", version=16, rank=1 - ) - db.session.add(diff) - db.session.commit() + diff = FileDiff.query.filter_by( + file_path_id=file_path_id, version=16, rank=1 + ).first() + assert os.path.exists(diff.abs_path) + os.remove(diff.abs_path) + diff.construct_checkpoint() assert mock.called assert not os.path.exists(diff.abs_path) diff --git a/web-app/packages/lib/src/common/components/AppOnboardingPage.vue b/web-app/packages/lib/src/common/components/AppOnboardingPage.vue index b1fa016b..3540525c 100644 --- a/web-app/packages/lib/src/common/components/AppOnboardingPage.vue +++ b/web-app/packages/lib/src/common/components/AppOnboardingPage.vue @@ -7,8 +7,16 @@ { 'onborading-page-logo': $slots.aside } ]" > - - + + + Not Found @@ -45,12 +53,35 @@