Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions server/mergin/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,8 @@ class Configuration(object):
MERGIN_BASE_URL = config("MERGIN_BASE_URL", default="")
# for link to logo in emails
MERGIN_LOGO_URL = config("MERGIN_LOGO_URL", default="")
# for link to logos in EE branding
DASHBOARD_LOGO_URL = config("DASHBOARD_LOGO_URL", default=MERGIN_LOGO_URL)

MERGIN_SUBSCRIPTIONS = config("MERGIN_SUBSCRIPTIONS", default=False, cast=bool)
MERGIN_TESTING = config("MERGIN_TESTING", default=False, cast=bool)
Expand Down
101 changes: 76 additions & 25 deletions server/mergin/sync/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,36 @@ def workspace(self):
project_workspace = current_app.ws_handler.get(self.workspace_id)
return project_workspace

def get_latest_files_cache(self) -> List[int]:
"""Get latest file history ids either from cached table or calculate them on the fly"""
if self.latest_project_files.file_history_ids is not None:
return self.latest_project_files.file_history_ids

query = f"""
WITH latest_changes AS (
SELECT
fp.id,
pv.project_id,
max(pv.name) AS version
FROM
project_version pv
LEFT OUTER JOIN file_history fh ON fh.version_id = pv.id
LEFT OUTER JOIN project_file_path fp ON fp.id = fh.file_path_id
WHERE
pv.project_id = :project_id
AND pv.name <= :latest_version
GROUP BY
fp.id, pv.project_id
)
SELECT
fh.id
FROM latest_changes ch
LEFT OUTER JOIN file_history fh ON (fh.file_path_id = ch.id AND fh.project_version_name = ch.version AND fh.change != 'delete')
WHERE fh.id IS NOT NULL;
"""
params = {"project_id": self.id, "latest_version": self.latest_version}
return [row.id for row in db.session.execute(text(query), params).fetchall()]

def cache_latest_files(self) -> None:
"""Get project files from changes (FileHistory) and save them for later use."""
if self.latest_version is None:
Expand Down Expand Up @@ -514,7 +544,11 @@ def generate_diff_name(self):


class LatestProjectFiles(db.Model):
"""Store project latest version files history ids"""
"""Store project latest version files history ids.

This is a caching table to store the latest relevant files history ids for further use in
Project.files and ProjectVersion.files. It is updated when ProjectVersion itself is created.
"""

project_id = db.Column(
UUID(as_uuid=True),
Expand Down Expand Up @@ -743,22 +777,21 @@ def diffs_chain(
return None, []

diffs = []
cached_items = Checkpoint.get_checkpoints(
basefile.project_version_name, version
)
checkpoints = Checkpoint.get_checkpoints(basefile.project_version_name, version)
expected_diffs = (
FileDiff.query.filter_by(
basefile_id=basefile.id,
)
.filter(
tuple_(FileDiff.rank, FileDiff.version).in_(
[(item.rank, item.end) for item in cached_items]
[(item.rank, item.end) for item in checkpoints]
)
)
.all()
)

for item in cached_items:
for item in checkpoints:
diff_needs_to_be_created = False
diff = next(
(
d
Expand All @@ -767,25 +800,38 @@ def diffs_chain(
),
None,
)
if diff and os.path.exists(diff.abs_path):
diffs.append(diff)
elif item.rank > 0:
# fallback if checkpoint does not exist: replace merged diff with individual diffs
individual_diffs = (
FileDiff.query.filter_by(
basefile_id=basefile.id,
rank=0,
if diff:
if os.path.exists(diff.abs_path):
diffs.append(diff)
else:
diff_needs_to_be_created = True
else:
# we do not have record in DB, create a checkpoint if it makes sense
if item.rank > 0 and FileDiff.can_create_checkpoint(file_id, item):
diff = FileDiff(
basefile=basefile,
version=item.end,
rank=item.rank,
path=basefile.file.generate_diff_name(),
size=None,
checksum=None,
)
.filter(
FileDiff.version >= item.start, FileDiff.version <= item.end
db.session.add(diff)
db.session.commit()
diff_needs_to_be_created = True
else:
# we asked for checkpoint where there was no change
continue

if diff_needs_to_be_created:
diff_created = diff.construct_checkpoint()
if diff_created:
diffs.append(diff)
else:
logging.error(
f"Failed to create a diff for file {basefile.file.path} at version {basefile.project_version_name} of rank {item.rank}."
)
.order_by(FileDiff.version)
.all()
)
diffs.extend(individual_diffs)
else:
# we asked for individual diff but there is no such diff as there was not change at that version
continue
return None, []

return basefile, diffs

Expand Down Expand Up @@ -924,9 +970,10 @@ def construct_checkpoint(self) -> bool:
return True

if self.rank == 0:
raise ValueError(
logging.error(
"Checkpoint of rank 0 should be created by user upload, cannot be constructed"
)
return False

# merged diffs can only be created for certain versions
if self.version % LOG_BASE:
Expand Down Expand Up @@ -1434,7 +1481,7 @@ def __init__(
latest_files_map = {
fh.path: fh.id
for fh in FileHistory.query.filter(
FileHistory.id.in_(self.project.latest_project_files.file_history_ids)
FileHistory.id.in_(self.project.get_latest_files_cache())
).all()
}

Expand Down Expand Up @@ -1565,6 +1612,10 @@ def _files_from_end(self):
files that were delete after the version (and thus not necessarily present now). From these candidates
get the latest file change before or at the specific version. If that change was not 'delete', file is present.
"""
# if we do not have cached file history ids use different strategy where it is not necessary
if self.project.latest_project_files.file_history_ids is None:
return self._files_from_start()

query = f"""
WITH files_changes_before_version AS (
WITH files_candidates AS (
Expand Down
58 changes: 41 additions & 17 deletions server/mergin/sync/public_api_controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import os
import logging
from dataclasses import asdict
from enum import Enum
from typing import Dict
from datetime import datetime

Expand Down Expand Up @@ -80,6 +81,7 @@
generate_location,
is_valid_uuid,
get_device_id,
is_versioned_file,
prepare_download_response,
get_device_id,
wkb2wkt,
Expand Down Expand Up @@ -287,6 +289,12 @@ def delete_project(namespace, project_name): # noqa: E501
return NoContent, 200


class DowloadFileAction(Enum):
FULL = "full"
FULL_GPKG = "full_gpkg"
DIFF = "diff"


def download_project_file(
project_name, namespace, file, version=None, diff=None
): # noqa: E501
Expand All @@ -307,10 +315,20 @@ def download_project_file(

:rtype: file
"""
project = require_project(namespace, project_name, ProjectPermissions.Read)
if diff and not version:
if not is_versioned_file(file):
action = DowloadFileAction.FULL
elif diff:
action = DowloadFileAction.DIFF
else:
action = DowloadFileAction.FULL_GPKG

if action is DowloadFileAction.DIFF and not version:
abort(400, f"Changeset must be requested for particular file version")

if action is DowloadFileAction.FULL and diff is True:
abort(404, f"No diff in particular file {file})")

project = require_project(namespace, project_name, ProjectPermissions.Read)
lookup_version = (
ProjectVersion.from_v_name(version) if version else project.latest_version
)
Expand All @@ -329,24 +347,30 @@ def download_project_file(
if not fh or fh.change == PushChangeType.DELETE.value:
abort(404, f"File {file} not found")

if diff and version:
# get specific version of geodiff file modified in requested version
if not fh.diff:
abort(404, f"No diff in particular file {file} version")
file_path = fh.diff_file.location
else:
file_path = fh.location

if version and not diff:
project.storage.restore_versioned_file(
file, ProjectVersion.from_v_name(version)
)
# user asked for diff, but there is no diff at that version
if action is DowloadFileAction.DIFF and not fh.diff:
abort(404, f"No diff in particular file {file} version")

file_path = (
fh.diff_file.location if action is DowloadFileAction.DIFF else fh.location
)
abs_path = os.path.join(project.storage.project_dir, file_path)
# check file exists (e.g. there might have been issue with restore)

if not os.path.exists(abs_path):
logging.error(f"Missing file {namespace}/{project_name}/{file_path}")
abort(404)
if action is DowloadFileAction.FULL_GPKG:
project.storage.restore_versioned_file(
file, ProjectVersion.from_v_name(version)
)

# check again after restore
if not os.path.exists(abs_path):
logging.error(
f"Failed to restore {namespace}/{project_name}/{file_path}"
)
abort(404)
else:
logging.error(f"Missing file {namespace}/{project_name}/{file_path}")
abort(404)

response = prepare_download_response(project.storage.project_dir, file_path)
return response
Expand Down
62 changes: 61 additions & 1 deletion server/mergin/tests/test_file_restore.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,13 @@

from ..app import db
from ..auth.models import User
from ..sync.models import ProjectVersion, Project, GeodiffActionHistory
from ..sync.models import (
FileDiff,
ProjectFilePath,
ProjectVersion,
Project,
GeodiffActionHistory,
)
from . import test_project_dir, TMP_DIR
from .utils import (
create_project,
Expand Down Expand Up @@ -163,6 +169,19 @@ def test_version_file_restore(diff_project):
diff_project.storage.restore_versioned_file("base.gpkg", 7)
assert os.path.exists(test_file)
assert gpkgs_are_equal(test_file, test_file + "_backup")
# no merged diffs needed
file_path_id = (
ProjectFilePath.query.filter_by(project_id=diff_project.id, path="base.gpkg")
.first()
.id
)
assert (
FileDiff.query.filter_by(file_path_id=file_path_id)
.filter(FileDiff.rank > 0)
.count()
== 0
)

# check we track performance of reconstruction
gh = GeodiffActionHistory.query.filter_by(
project_id=diff_project.id, target_version="v7"
Expand Down Expand Up @@ -198,3 +217,44 @@ def test_version_file_restore(diff_project):
diff_project.storage.restore_versioned_file("test.txt", 1)
assert not os.path.exists(test_file)
assert not os.path.exists(diff_project.storage.geodiff_working_dir)

# let's add some dummy changes to test.gpkg so we can restore full gpkg using checkpoints created on demand
file_path_id = (
ProjectFilePath.query.filter_by(project_id=diff_project.id, path="test.gpkg")
.first()
.id
)
base_gpkg = os.path.join(diff_project.storage.project_dir, "test.gpkg")
shutil.copy(
os.path.join(diff_project.storage.project_dir, "v9", "test.gpkg"), base_gpkg
)
for i in range(23):
sql = f"UPDATE simple SET rating={i}"
execute_query(base_gpkg, sql)
pv = push_change(
diff_project, "updated", "test.gpkg", diff_project.storage.project_dir
)
assert diff_project.latest_version == pv.name == (11 + i)
file_diff = FileDiff.query.filter_by(
file_path_id=file_path_id, version=pv.name, rank=0
).first()
assert file_diff and os.path.exists(file_diff.abs_path)

assert (
FileDiff.query.filter_by(file_path_id=file_path_id)
.filter(FileDiff.rank > 0)
.count()
== 0
)

test_file = os.path.join(diff_project.storage.project_dir, "v30", "test.gpkg")
os.rename(test_file, test_file + "_backup")
diff_project.storage.restore_versioned_file("test.gpkg", 30)
assert os.path.exists(test_file)
assert gpkgs_are_equal(test_file, test_file + "_backup")
assert (
FileDiff.query.filter_by(file_path_id=file_path_id)
.filter(FileDiff.rank > 0)
.count()
> 0
)
18 changes: 16 additions & 2 deletions server/mergin/tests/test_project_controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -880,6 +880,14 @@ def test_download_diff_file(client, diff_project):

# download full version after file was removed
os.remove(os.path.join(diff_project.storage.project_dir, file_change.location))
file_path_id = (
ProjectFilePath.query.filter_by(project_id=diff_project.id, path=test_file)
.first()
.id
)
basefile, diffs = FileHistory.diffs_chain(file_path_id, 4)
# we construct full gpkg from basefile and single diff v4
assert basefile is not None and len(diffs) == 1
resp = client.get(
"/v1/project/raw/{}/{}?file={}&version=v4".format(
test_workspace_name, test_project, test_file
Expand Down Expand Up @@ -1902,11 +1910,17 @@ def test_file_diffs_chain(diff_project):
assert len(diffs) == 1
assert diffs[0].version == 6

# diff was used in v7, nothing happened in v8 (=v7)
basefile, diffs = FileHistory.diffs_chain(file_id, 8)
# diff was used in v7
basefile, diffs = FileHistory.diffs_chain(file_id, 7)
assert basefile.version.name == 5
assert len(diffs) == 2

# nothing happened in v8 (=v7) but we have now merged diff in chain v5-v8
basefile, diffs = FileHistory.diffs_chain(file_id, 8)
assert basefile.version.name == 5
assert len(diffs) == 1
assert diffs[0].rank == 1 and diffs[0].version == 8

# file was removed in v9
basefile, diffs = FileHistory.diffs_chain(file_id, 9)
assert not basefile
Expand Down
Loading
Loading