diff --git a/README.md b/README.md index d0e9844..bb31546 100644 --- a/README.md +++ b/README.md @@ -81,9 +81,9 @@ Commands: share Fetch permissions to project share-add Add permissions to [users] to project share-remove Remove [users] permissions from project - show-file-changeset Displays information about project changes. - show-file-history Displays information about a single version of a... - show-version Displays information about a single version of a... + show-file-changeset Display information about project changes. + show-file-history Display information about a single version of a... + show-version Display information about a single version of a... status Show all changes in project files - upstream and... ``` @@ -99,7 +99,7 @@ To download a specific version of a project: $ mergin --username john download --version v42 john/project1 ~/mergin/project1 ``` -To download a sepecific version of a single file: +To download a specific version of a single file: 1. First you need to download the project: ``` diff --git a/mergin/cli.py b/mergin/cli.py index e479b88..d8b1b3a 100755 --- a/mergin/cli.py +++ b/mergin/cli.py @@ -412,17 +412,18 @@ def push(ctx): return directory = os.getcwd() try: - job = push_project_async(mc, directory) - if job is not None: # if job is none, we don't upload any files, and the transaction is finished already - with click.progressbar(length=job.total_size) as bar: - last_transferred_size = 0 - while push_project_is_running(job): - time.sleep(1 / 10) # 100ms - new_transferred_size = job.transferred_size - bar.update(new_transferred_size - last_transferred_size) # the update() needs increment only - last_transferred_size = new_transferred_size - push_project_finalize(job) - click.echo("Done") + jobs = push_project_async(mc, directory) + for job in jobs: + if job is not None: # if job is none, we don't upload any files, and the transaction is finished already + with click.progressbar(length=job.total_size) as bar: + last_transferred_size = 0 + while push_project_is_running(job): + time.sleep(1 / 10) # 100ms + new_transferred_size = job.transferred_size + bar.update(new_transferred_size - last_transferred_size) # the update() needs increment only + last_transferred_size = new_transferred_size + push_project_finalize(job) + click.echo("Done") except InvalidProject as e: click.secho("Invalid project directory ({})".format(str(e)), fg="red") except ClientError as e: @@ -430,7 +431,8 @@ def push(ctx): return except KeyboardInterrupt: click.secho("Cancelling...") - push_project_cancel(job) + for job in jobs: + push_project_cancel(job) except Exception as e: _print_unhandled_exception() @@ -473,7 +475,7 @@ def pull(ctx): @click.argument("version") @click.pass_context def show_version(ctx, version): - """Displays information about a single version of a project. `version` is 'v1', 'v2', etc.""" + """Display information about a single version of a project. `version` is 'v1', 'v2', etc.""" mc = ctx.obj["client"] if mc is None: return @@ -492,7 +494,7 @@ def show_version(ctx, version): @click.argument("path") @click.pass_context def show_file_history(ctx, path): - """Displays information about a single version of a project.""" + """Display information about a single version of a project.""" mc = ctx.obj["client"] if mc is None: return @@ -516,7 +518,7 @@ def show_file_history(ctx, path): @click.argument("version") @click.pass_context def show_file_changeset(ctx, path, version): - """Displays information about project changes.""" + """Display information about project changes.""" mc = ctx.obj["client"] if mc is None: return diff --git a/mergin/client.py b/mergin/client.py index 9e70a93..bfac501 100644 --- a/mergin/client.py +++ b/mergin/client.py @@ -93,7 +93,7 @@ def __init__( plugin_version=None, proxy_config=None, ): - self.url = url if url is not None else MerginClient.default_url() + self.url = (url if url is not None else MerginClient.default_url()).rstrip("/") + "/" self._auth_params = None self._auth_session = None self._user_info = None @@ -473,7 +473,8 @@ def create_project(self, project_name, is_public=False, namespace=None): def create_project_and_push(self, project_name, directory, is_public=False, namespace=None): """ - Convenience method to create project and push the initial version right after that. + Convenience method to create project and push the the files right after that. + Creates two versions when directory contains blocking and non-blocking changes. :param project_name: Project's full name (/) :type project_name: String @@ -896,11 +897,12 @@ def push_project(self, directory): :param directory: Project's directory :type directory: String """ - job = push_project_async(self, directory) - if job is None: + jobs = push_project_async(self, directory) + if not jobs: return # there is nothing to push (or we only deleted some files) - push_project_wait(job) - push_project_finalize(job) + for job in jobs: + push_project_wait(job) + push_project_finalize(job) def pull_project(self, directory): """ diff --git a/mergin/client_push.py b/mergin/client_push.py index 885db9a..6e0c904 100644 --- a/mergin/client_push.py +++ b/mergin/client_push.py @@ -15,10 +15,12 @@ import tempfile import concurrent.futures import os +from typing import Dict, List, Optional from .common import UPLOAD_CHUNK_SIZE, ClientError from .merginproject import MerginProject -from .editor import filter_changes +from .editor import is_editor_enabled, _apply_editor_filters +from .utils import is_qgis_file, is_versioned_file class UploadJob: @@ -82,8 +84,83 @@ def upload_blocking(self, mc, mp): raise ClientError("Mismatch between uploaded file chunk {} and local one".format(self.chunk_id)) -def push_project_async(mc, directory): - """Starts push of a project and returns pending upload job""" +class ChangesHandler: + """ + Handles preparation of file changes to be uploaded to the server. + + This class is responsible for: + - Filtering project file changes. + - Splitting changes into blocking and non-blocking groups. + - TODO: Applying limits such as max file count or size to break large uploads into smaller batches. + - Generating upload-ready change groups for asynchronous job creation. + """ + + def __init__(self, client, project_info, changes): + self.client = client + self.project_info = project_info + self._raw_changes = changes + + @staticmethod + def is_blocking_file(file): + return is_qgis_file(file["path"]) or is_versioned_file(file["path"]) + + def _filter_changes(self, changes: Dict[str, List[dict]]) -> Dict[str, List[dict]]: + """ + Filters the given changes dictionary based on the editor's enabled state. + + If the editor is not enabled, the changes dictionary is returned as-is. Otherwise, the changes are passed through the `_apply_editor_filters` method to apply any configured filters. + + Args: + changes (dict[str, list[dict]]): A dictionary mapping file paths to lists of change dictionaries. + + Returns: + dict[str, list[dict]]: The filtered changes dictionary. + """ + if not is_editor_enabled(self.client, self.project_info): + return changes + return _apply_editor_filters(changes) + + def _split_by_type(self, changes: Dict[str, List[dict]]) -> List[Dict[str, List[dict]]]: + """ + Split raw filtered changes into two batches: + 1. Blocking: updated/removed and added files that are blocking + 2. Non-blocking: added files that are not blocking + """ + blocking_changes = {"added": [], "updated": [], "removed": [], "renamed": []} + non_blocking_changes = {"added": [], "updated": [], "removed": [], "renamed": []} + + for f in changes.get("added", []): + if self.is_blocking_file(f): + blocking_changes["added"].append(f) + else: + non_blocking_changes["added"].append(f) + + for f in changes.get("updated", []): + blocking_changes["updated"].append(f) + + for f in changes.get("removed", []): + blocking_changes["removed"].append(f) + + result = [] + if any(len(v) for v in blocking_changes.values()): + result.append(blocking_changes) + if any(len(v) for v in non_blocking_changes.values()): + result.append(non_blocking_changes) + + return result + + def split(self) -> List[Dict[str, List[dict]]]: + """ + Applies all configured internal filters and returns a list of change ready to be uploaded. + """ + changes = self._filter_changes(self._raw_changes) + changes = self._split_by_type(changes) + # TODO: apply limits; changes = self._limit_by_file_count(changes) + return changes + + +def push_project_async(mc, directory) -> Optional[List[UploadJob]]: + """Starts push of a project and returns pending upload jobs""" mp = MerginProject(directory) if mp.has_unfinished_pull(): @@ -107,8 +184,8 @@ def push_project_async(mc, directory): username = mc.username() # permissions field contains information about update, delete and upload privileges of the user - # on a specific project. This is more accurate information then "writernames" field, as it takes - # into account namespace privileges. So we have to check only "permissions", namely "upload" one + # on a specific project. This is more accurate information than "writernames" field, as it takes + # into account namespace privileges. So we have to check only "permissions", namely "upload" once if not mc.has_writing_permissions(project_path): mp.log.error(f"--- push {project_path} - username {username} does not have write access") raise ClientError(f"You do not seem to have write access to the project (username '{username}')") @@ -121,93 +198,97 @@ def push_project_async(mc, directory): ) changes = mp.get_push_changes() - changes = filter_changes(mc, project_info, changes) - mp.log.debug("push changes:\n" + pprint.pformat(changes)) + changes_handler = ChangesHandler(mc, project_info, changes) + changes_list = changes_handler.split() tmp_dir = tempfile.TemporaryDirectory(prefix="python-api-client-") + jobs = [] - # If there are any versioned files (aka .gpkg) that are not updated through a diff, - # we need to make a temporary copy somewhere to be sure that we are uploading full content. - # That's because if there are pending transactions, checkpointing or switching from WAL mode - # won't work, and we would end up with some changes left in -wal file which do not get - # uploaded. The temporary copy using geodiff uses sqlite backup API and should copy everything. - for f in changes["updated"]: - if mp.is_versioned_file(f["path"]) and "diff" not in f: - mp.copy_versioned_file_for_upload(f, tmp_dir.name) - - for f in changes["added"]: - if mp.is_versioned_file(f["path"]): - mp.copy_versioned_file_for_upload(f, tmp_dir.name) - - if not sum(len(v) for v in changes.values()): - mp.log.info(f"--- push {project_path} - nothing to do") - return + for changes in changes_list: + mp.log.debug("push changes:\n" + pprint.pformat(changes)) - # drop internal info from being sent to server - for item in changes["updated"]: - item.pop("origin_checksum", None) - data = {"version": local_version, "changes": changes} + # If there are any versioned files (aka .gpkg) that are not updated through a diff, + # we need to make a temporary copy somewhere to be sure that we are uploading full content. + # That's because if there are pending transactions, checkpointing or switching from WAL mode + # won't work, and we would end up with some changes left in -wal file which do not get + # uploaded. The temporary copy using geodiff uses sqlite backup API and should copy everything. + for f in changes["updated"]: + if mp.is_versioned_file(f["path"]) and "diff" not in f: + mp.copy_versioned_file_for_upload(f, tmp_dir.name) - try: - resp = mc.post( - f"/v1/project/push/{project_path}", - data, - {"Content-Type": "application/json"}, - ) - except ClientError as err: - mp.log.error("Error starting transaction: " + str(err)) - mp.log.info("--- push aborted") - raise - server_resp = json.load(resp) - - upload_files = data["changes"]["added"] + data["changes"]["updated"] - - transaction_id = server_resp["transaction"] if upload_files else None - job = UploadJob(project_path, changes, transaction_id, mp, mc, tmp_dir) - - if not upload_files: - mp.log.info("not uploading any files") - job.server_resp = server_resp - push_project_finalize(job) - return None # all done - no pending job - - mp.log.info(f"got transaction ID {transaction_id}") - - upload_queue_items = [] - total_size = 0 - # prepare file chunks for upload - for file in upload_files: - if "diff" in file: - # versioned file - uploading diff - file_location = mp.fpath_meta(file["diff"]["path"]) - file_size = file["diff"]["size"] - elif "upload_file" in file: - # versioned file - uploading full (a temporary copy) - file_location = file["upload_file"] - file_size = file["size"] - else: - # non-versioned file - file_location = mp.fpath(file["path"]) - file_size = file["size"] - - for chunk_index, chunk_id in enumerate(file["chunks"]): - size = min(UPLOAD_CHUNK_SIZE, file_size - chunk_index * UPLOAD_CHUNK_SIZE) - upload_queue_items.append(UploadQueueItem(file_location, size, transaction_id, chunk_id, chunk_index)) - - total_size += file_size - - job.total_size = total_size - job.upload_queue_items = upload_queue_items - - mp.log.info(f"will upload {len(upload_queue_items)} items with total size {total_size}") - - # start uploads in background - job.executor = concurrent.futures.ThreadPoolExecutor(max_workers=4) - for item in upload_queue_items: - future = job.executor.submit(_do_upload, item, job) - job.futures.append(future) - - return job + for f in changes["added"]: + if mp.is_versioned_file(f["path"]): + mp.copy_versioned_file_for_upload(f, tmp_dir.name) + + if not any(len(v) for v in changes.values()): + mp.log.info(f"--- push {project_path} - nothing to do") + return + + # drop internal info from being sent to server + for item in changes["updated"]: + item.pop("origin_checksum", None) + data = {"version": local_version, "changes": changes} + + try: + resp = mc.post( + f"/v1/project/push/{project_path}", + data, + {"Content-Type": "application/json"}, + ) + except ClientError as err: + mp.log.error("Error starting transaction: " + str(err)) + mp.log.info("--- push aborted") + raise + server_resp = json.load(resp) + + upload_files = data["changes"]["added"] + data["changes"]["updated"] + + transaction_id = server_resp["transaction"] if upload_files else None + job = UploadJob(project_path, changes, transaction_id, mp, mc, tmp_dir) + + if not upload_files: + mp.log.info("not uploading any files") + job.server_resp = server_resp + push_project_finalize(job) + return None # all done - no pending job + + mp.log.info(f"got transaction ID {transaction_id}") + + upload_queue_items = [] + total_size = 0 + # prepare file chunks for upload + for file in upload_files: + if "diff" in file: + # versioned file - uploading diff + file_location = mp.fpath_meta(file["diff"]["path"]) + file_size = file["diff"]["size"] + elif "upload_file" in file: + # versioned file - uploading full (a temporary copy) + file_location = file["upload_file"] + file_size = file["size"] + else: + # non-versioned file + file_location = mp.fpath(file["path"]) + file_size = file["size"] + + for chunk_index, chunk_id in enumerate(file["chunks"]): + size = min(UPLOAD_CHUNK_SIZE, file_size - chunk_index * UPLOAD_CHUNK_SIZE) + upload_queue_items.append(UploadQueueItem(file_location, size, transaction_id, chunk_id, chunk_index)) + + total_size += file_size + + job.total_size = total_size + job.upload_queue_items = upload_queue_items + + mp.log.info(f"will upload {len(upload_queue_items)} items with total size {total_size}") + + # start uploads in background + job.executor = concurrent.futures.ThreadPoolExecutor(max_workers=4) + for item in upload_queue_items: + future = job.executor.submit(_do_upload, item, job) + job.futures.append(future) + jobs.append(job) + return jobs def push_project_wait(job): diff --git a/mergin/common.py b/mergin/common.py index 9e65a6c..c681841 100644 --- a/mergin/common.py +++ b/mergin/common.py @@ -15,7 +15,7 @@ this_dir = os.path.dirname(os.path.realpath(__file__)) -# Error code from the public API, add to the end of enum as we handle more eror +# Error code from the public API, add to the end of enum as we handle more error class ErrorCode(Enum): ProjectsLimitHit = "ProjectsLimitHit" StorageLimitHit = "StorageLimitHit" diff --git a/mergin/editor.py b/mergin/editor.py index 237b0ea..aa6420c 100644 --- a/mergin/editor.py +++ b/mergin/editor.py @@ -1,7 +1,7 @@ from itertools import filterfalse from typing import Callable, Dict, List -from .utils import is_mergin_config, is_qgis_file, is_versioned_file +from .utils import is_qgis_file EDITOR_ROLE_NAME = "editor" @@ -40,23 +40,6 @@ def _apply_editor_filters(changes: Dict[str, List[dict]]) -> Dict[str, List[dict return changes -def filter_changes(mc, project_info: dict, changes: Dict[str, List[dict]]) -> Dict[str, List[dict]]: - """ - Filters the given changes dictionary based on the editor's enabled state. - - If the editor is not enabled, the changes dictionary is returned as-is. Otherwise, the changes are passed through the `_apply_editor_filters` method to apply any configured filters. - - Args: - changes (dict[str, list[dict]]): A dictionary mapping file paths to lists of change dictionaries. - - Returns: - dict[str, list[dict]]: The filtered changes dictionary. - """ - if not is_editor_enabled(mc, project_info): - return changes - return _apply_editor_filters(changes) - - def prevent_conflicted_copy(path: str, mc, project_info: dict) -> bool: """ Decides whether a file path should be blocked from creating a conflicted copy. diff --git a/mergin/merginproject.py b/mergin/merginproject.py index 83d8510..5e46ac7 100644 --- a/mergin/merginproject.py +++ b/mergin/merginproject.py @@ -7,6 +7,7 @@ import uuid import tempfile from datetime import datetime +from typing import List, Dict from dateutil.tz import tzlocal from .editor import prevent_conflicted_copy @@ -314,17 +315,14 @@ def inspect_files(self): ) return files_meta - def compare_file_sets(self, origin, current): + def compare_file_sets(self, origin, current) -> Dict[str, List[dict]]: """ - Helper function to calculate difference between two sets of files metadata using file names and checksums. - + Calculate difference between two sets of files metadata using file names and checksums. :Example: - >>> origin = [{'checksum': '08b0e8caddafe74bf5c11a45f65cedf974210fed', 'path': 'base.gpkg', 'size': 2793, 'mtime': '2019-08-26T11:08:34.051221+02:00'}] >>> current = [{'checksum': 'c9a4fd2afd513a97aba19d450396a4c9df8b2ba4', 'path': 'test.qgs', 'size': 31980, 'mtime': '2019-08-26T11:09:30.051221+02:00'}] >>> self.compare_file_sets(origin, current) {"added": [{'checksum': 'c9a4fd2afd513a97aba19d450396a4c9df8b2ba4', 'path': 'test.qgs', 'size': 31980, 'mtime': '2019-08-26T11:09:30.051221+02:00'}], "removed": [[{'checksum': '08b0e8caddafe74bf5c11a45f65cedf974210fed', 'path': 'base.gpkg', 'size': 2793, 'mtime': '2019-08-26T11:08:34.051221+02:00'}]], "renamed": [], "updated": []} - :param origin: origin set of files metadata :type origin: list[dict] :param current: current set of files metadata to be compared against origin @@ -405,7 +403,7 @@ def get_pull_changes(self, server_files): changes["updated"] = [f for f in changes["updated"] if f not in not_updated] return changes - def get_push_changes(self): + def get_push_changes(self) -> Dict[str, List[dict]]: """ Calculate changes needed to be pushed to server. @@ -427,7 +425,7 @@ def get_push_changes(self): file["checksum"] = checksum file["chunks"] = [str(uuid.uuid4()) for i in range(math.ceil(file["size"] / UPLOAD_CHUNK_SIZE))] - # need to check for for real changes in geodiff files using geodiff tool (comparing checksum is not enough) + # need to check for real changes in geodiff files using geodiff tool (comparing checksum is not enough) not_updated = [] for file in changes["updated"]: path = file["path"] @@ -688,7 +686,7 @@ def apply_push_changes(self, changes): """ For geodiff files update basefiles according to changes pushed to server. - :param changes: metadata for pulled files + :param changes: metadata for pushed files :type changes: dict[str, list[dict]] """ for k, v in changes.items(): diff --git a/mergin/test/test_client.py b/mergin/test/test_client.py index f3d4c4b..aa31f8b 100644 --- a/mergin/test/test_client.py +++ b/mergin/test/test_client.py @@ -5,6 +5,7 @@ import tempfile import subprocess import shutil +from collections import defaultdict from datetime import datetime, timedelta, date import pytest import pytz @@ -21,7 +22,7 @@ TokenError, ServerType, ) -from ..client_push import push_project_async, push_project_cancel +from ..client_push import push_project_async, push_project_cancel, ChangesHandler from ..client_pull import ( download_project_async, download_project_wait, @@ -38,7 +39,7 @@ ) from ..merginproject import pygeodiff from ..report import create_report -from ..editor import EDITOR_ROLE_NAME, filter_changes, is_editor_enabled +from ..editor import EDITOR_ROLE_NAME, is_editor_enabled from ..common import ErrorCode, WorkspaceRole, ProjectRole SERVER_URL = os.environ.get("TEST_MERGIN_URL") @@ -85,7 +86,7 @@ def mcStorage(request): client_workspace_storage = client_workspace["storage"] def teardown(): - # back to original values... (1 project, api allowed ...) + # back to original values... (2 projects, api allowed ...) client.patch( f"/v1/tests/workspaces/{client_workspace_id}", {"limits_override": get_limit_overrides(client_workspace_storage)}, @@ -233,18 +234,18 @@ def test_create_remote_project_from_local(mc): assert source_mp.project_full_name() == f"{API_USER}/{test_project}" assert source_mp.project_name() == test_project assert source_mp.workspace_name() == API_USER - assert source_mp.version() == "v1" + assert source_mp.version() == "v2" # data was split to two pushes - blocking and non-blocking # check basic metadata about created project project_info = mc.project_info(project) - assert project_info["version"] == "v1" + assert project_info["version"] == "v2" assert project_info["name"] == test_project assert project_info["namespace"] == API_USER assert project_info["id"] == source_mp.project_id() # check project metadata retrieval by id project_info = mc.project_info(source_mp.project_id()) - assert project_info["version"] == "v1" + assert project_info["version"] == "v2" assert project_info["name"] == test_project assert project_info["namespace"] == API_USER assert project_info["id"] == source_mp.project_id() @@ -285,7 +286,7 @@ def test_push_pull_changes(mc): assert mp2.project_full_name() == f"{API_USER}/{test_project}" assert mp2.project_name() == test_project assert mp2.workspace_name() == API_USER - assert mp2.version() == "v1" + assert mp2.version() == "v2" # two pushes - blocking and non-blocking # test push changes (add, remove, rename, update) f_added = "new.txt" @@ -304,7 +305,7 @@ def test_push_pull_changes(mc): # check changes before applied pull_changes, push_changes, _ = mc.project_status(project_dir) - assert not sum(len(v) for v in pull_changes.values()) + assert not any(len(v) for v in pull_changes.values()) assert next((f for f in push_changes["added"] if f["path"] == f_added), None) assert next((f for f in push_changes["removed"] if f["path"] == f_removed), None) assert next((f for f in push_changes["updated"] if f["path"] == f_updated), None) @@ -317,10 +318,10 @@ def test_push_pull_changes(mc): mp = MerginProject(project_dir) assert mp.project_full_name() == f"{API_USER}/{test_project}" - assert mp.version() == "v2" + assert mp.version() == "v4" project_info = mc.project_info(project) - assert project_info["version"] == "v2" + assert project_info["version"] == "v4" assert not next((f for f in project_info["files"] if f["path"] == f_removed), None) assert not next((f for f in project_info["files"] if f["path"] == f_renamed), None) assert next((f for f in project_info["files"] if f["path"] == "renamed.txt"), None) @@ -329,7 +330,7 @@ def test_push_pull_changes(mc): assert generate_checksum(os.path.join(project_dir, f_updated)) == f_remote_checksum assert project_info["id"] == mp.project_id() assert len(project_info["files"]) == len(mp.inspect_files()) - project_version = mc.project_version_info(project_info["id"], "v2") + project_version = mc.project_version_info(project_info["id"], "v3") updated_file = [f for f in project_version["changes"]["updated"] if f["path"] == f_updated][0] assert "origin_checksum" not in updated_file # internal client info @@ -355,9 +356,9 @@ def test_push_pull_changes(mc): assert not os.path.exists(os.path.join(project_dir_2, f_removed)) assert not os.path.exists(os.path.join(project_dir_2, f_renamed)) assert os.path.exists(os.path.join(project_dir_2, "renamed.txt")) - assert os.path.exists(os.path.join(project_dir_2, conflicted_copy_file_name(f_updated, API_USER, 1))) + assert os.path.exists(os.path.join(project_dir_2, conflicted_copy_file_name(f_updated, API_USER, 2))) assert ( - generate_checksum(os.path.join(project_dir_2, conflicted_copy_file_name(f_updated, API_USER, 1))) + generate_checksum(os.path.join(project_dir_2, conflicted_copy_file_name(f_updated, API_USER, 2))) == f_conflict_checksum ) assert generate_checksum(os.path.join(project_dir_2, f_updated)) == f_remote_checksum @@ -388,13 +389,14 @@ def test_cancel_push(mc): # check changes before applied pull_changes, push_changes, _ = mc.project_status(project_dir) - assert not sum(len(v) for v in pull_changes.values()) + assert not any(len(v) for v in pull_changes.values()) assert next((f for f in push_changes["added"] if f["path"] == f_added), None) assert next((f for f in push_changes["updated"] if f["path"] == f_updated), None) # start pushing and then cancel the job - job = push_project_async(mc, project_dir) - push_project_cancel(job) + jobs = push_project_async(mc, project_dir) + for job in jobs: + push_project_cancel(job) # if cancelled properly, we should be now able to do the push without any problem mc.push_project(project_dir) @@ -402,7 +404,7 @@ def test_cancel_push(mc): # download the project to a different directory and check the version and content mc.download_project(project, project_dir_2) mp = MerginProject(project_dir_2) - assert mp.version() == "v2" + assert mp.version() == "v4" # 2 pushes when created + 2 when modified with add and update changes assert os.path.exists(os.path.join(project_dir_2, f_added)) with open(os.path.join(project_dir_2, f_updated), "r") as f: assert f.read() == modification @@ -464,7 +466,7 @@ def test_sync_diff(mc): # check project after push project_info = mc.project_info(project) - assert project_info["version"] == "v3" + assert project_info["version"] == "v4" # 2 pushes when project was created assert project_info["id"] == mp.project_id() f_remote = next((f for f in project_info["files"] if f["path"] == f_updated), None) assert next((f for f in project_info["files"] if f["path"] == "renamed.gpkg"), None) @@ -562,7 +564,9 @@ def test_force_gpkg_update(mc): # check project after push project_info = mc.project_info(project) - assert project_info["version"] == "v2" + assert ( + project_info["version"] == "v3" + ) # two pushes while creating the project with blocking and non-blocking change f_remote = next((f for f in project_info["files"] if f["path"] == f_updated), None) assert "diff" not in f_remote @@ -960,19 +964,19 @@ def test_download_versions(mc): mc.push_project(project_dir) project_info = mc.project_info(project) - assert project_info["version"] == "v2" + assert project_info["version"] == "v3" # 2 versions created with initial push of blocking and non-blocking files - mc.download_project(project, project_dir_v1, "v1") + mc.download_project(project, project_dir_v1, "v2") assert os.path.exists(os.path.join(project_dir_v1, "base.gpkg")) - assert not os.path.exists(os.path.join(project_dir_v2, f_added)) # added only in v2 + assert not os.path.exists(os.path.join(project_dir_v2, f_added)) # added only in v3 - mc.download_project(project, project_dir_v2, "v2") + mc.download_project(project, project_dir_v2, "v3") assert os.path.exists(os.path.join(project_dir_v2, f_added)) - assert os.path.exists(os.path.join(project_dir_v1, "base.gpkg")) # added in v1 but still present in v2 + assert os.path.exists(os.path.join(project_dir_v1, "base.gpkg")) # added in v1 but still present in v3 # try to download not-existing version with pytest.raises(ClientError): - mc.download_project(project, project_dir_v3, "v3") + mc.download_project(project, project_dir_v3, "v4") def test_paginated_project_list(mc): @@ -1066,11 +1070,13 @@ def create_versioned_project(mc, project_name, project_dir, updated_file, remove # create remote project shutil.copytree(TEST_DATA_DIR, project_dir) - mc.create_project_and_push(project, project_dir) + mc.create_project_and_push( + project, project_dir + ) # creates two versions because push was split to blocking and non-blocking mp = MerginProject(project_dir) - # create versions 2-4 + # create versions 3-6 changes = ( "inserted_1_A.gpkg", "inserted_1_A_mod.gpkg", @@ -1109,7 +1115,7 @@ def test_get_versions_with_file_changes(mc): mp = create_versioned_project(mc, test_project, project_dir, f_updated, remove=False) project_info = mc.project_info(project) - assert project_info["version"] == "v4" + assert project_info["version"] == "v5" assert project_info["id"] == mp.project_id() file_history = mc.project_file_history_info(project, f_updated) @@ -1119,21 +1125,21 @@ def test_get_versions_with_file_changes(mc): project, f_updated, version_from="v1", - version_to="v5", + version_to="v6", file_history=file_history, ) - assert "Wrong version parameters: 1-5" in str(e.value) - assert "Available versions: [1, 2, 3, 4]" in str(e.value) + assert "Wrong version parameters: 1-6" in str(e.value) + assert "Available versions: [1, 3, 4, 5]" in str(e.value) mod_versions = get_versions_with_file_changes( mc, project, f_updated, - version_from="v2", - version_to="v4", + version_from="v3", + version_to="v5", file_history=file_history, ) - assert mod_versions == [f"v{i}" for i in range(2, 5)] + assert mod_versions == [f"v{i}" for i in range(3, 6)] def check_gpkg_same_content(mergin_project, gpkg_path_1, gpkg_path_2): @@ -1154,7 +1160,7 @@ def test_download_file(mc): mp = create_versioned_project(mc, test_project, project_dir, f_updated) project_info = mc.project_info(project) - assert project_info["version"] == "v5" + assert project_info["version"] == "v6" assert project_info["id"] == mp.project_id() # Versioned file should have the following content at versions 2-4 @@ -1166,14 +1172,14 @@ def test_download_file(mc): # Download the base file at versions 2-4 and check the changes f_downloaded = os.path.join(project_dir, f_updated) - for ver in range(2, 5): + for ver in range(3, 6): mc.download_file(project_dir, f_updated, f_downloaded, version=f"v{ver}") - expected = os.path.join(TEST_DATA_DIR, expected_content[ver - 2]) # GeoPackage with expected content + expected = os.path.join(TEST_DATA_DIR, expected_content[ver - 3]) # GeoPackage with expected content assert check_gpkg_same_content(mp, f_downloaded, expected) # make sure there will be exception raised if a file doesn't exist in the version - with pytest.raises(ClientError, match=f"No \\[{f_updated}\\] exists at version v5"): - mc.download_file(project_dir, f_updated, f_downloaded, version="v5") + with pytest.raises(ClientError, match=f"No \\[{f_updated}\\] exists at version v6"): + mc.download_file(project_dir, f_updated, f_downloaded, version="v6") def test_download_diffs(mc): @@ -1188,24 +1194,24 @@ def test_download_diffs(mc): mp = create_versioned_project(mc, test_project, project_dir, f_updated, remove=False) project_info = mc.project_info(project) - assert project_info["version"] == "v4" + assert project_info["version"] == "v5" assert project_info["id"] == mp.project_id() - # Download diffs of updated file between versions 1 and 2 - mc.get_file_diff(project_dir, f_updated, diff_file, "v1", "v2") + # Download diffs of updated file between versions 1 and 3 + mc.get_file_diff(project_dir, f_updated, diff_file, "v1", "v3") assert os.path.exists(diff_file) assert mp.geodiff.has_changes(diff_file) assert mp.geodiff.changes_count(diff_file) == 1 - changes_file = diff_file + ".changes1-2" + changes_file = diff_file + ".changes1-3" mp.geodiff.list_changes_summary(diff_file, changes_file) with open(changes_file, "r") as f: changes = json.loads(f.read())["geodiff_summary"][0] assert changes["insert"] == 1 assert changes["update"] == 0 - # Download diffs of updated file between versions 2 and 4 - mc.get_file_diff(project_dir, f_updated, diff_file, "v2", "v4") - changes_file = diff_file + ".changes2-4" + # Download diffs of updated file between versions 3 and 5 + mc.get_file_diff(project_dir, f_updated, diff_file, "v3", "v5") + changes_file = diff_file + ".changes3-5" mp.geodiff.list_changes_summary(diff_file, changes_file) with open(changes_file, "r") as f: changes = json.loads(f.read())["geodiff_summary"][0] @@ -1213,14 +1219,14 @@ def test_download_diffs(mc): assert changes["update"] == 1 with pytest.raises(ClientError) as e: - mc.get_file_diff(project_dir, f_updated, diff_file, "v4", "v1") + mc.get_file_diff(project_dir, f_updated, diff_file, "v5", "v1") assert "Wrong version parameters" in str(e.value) assert "version_from needs to be smaller than version_to" in str(e.value) with pytest.raises(ClientError) as e: - mc.get_file_diff(project_dir, f_updated, diff_file, "v4", "v5") + mc.get_file_diff(project_dir, f_updated, diff_file, "v5", "v6") assert "Wrong version parameters" in str(e.value) - assert "Available versions: [1, 2, 3, 4]" in str(e.value) + assert "Available versions: [1, 3, 4, 5]" in str(e.value) def test_modify_project_permissions(mc): @@ -1987,13 +1993,13 @@ def test_project_versions_list(mc): project_dir = os.path.join(TMP_DIR, test_project) create_versioned_project(mc, test_project, project_dir, "base.gpkg") project_info = mc.project_info(project) - assert project_info["version"] == "v5" + assert project_info["version"] == "v6" # get all versions versions = mc.project_versions(project) - assert len(versions) == 5 + assert len(versions) == 6 assert versions[0]["name"] == "v1" - assert versions[-1]["name"] == "v5" + assert versions[-1]["name"] == "v6" # get first 3 versions versions = mc.project_versions(project, to="v3") @@ -2002,7 +2008,7 @@ def test_project_versions_list(mc): # get last 2 versions versions = mc.project_versions(project, since="v4") - assert len(versions) == 2 + assert len(versions) == 3 assert versions[0]["name"] == "v4" # get range v2-v4 @@ -2012,11 +2018,11 @@ def test_project_versions_list(mc): assert versions[-1]["name"] == "v4" versions_count = mc.project_versions_count(project) - assert versions_count == 5 + assert versions_count == 6 versions, _ = mc.paginated_project_versions(project, page=1, descending=True) - assert len(versions) == 5 - assert versions[0]["name"] == "v5" + assert len(versions) == 6 + assert versions[0]["name"] == "v6" assert versions[-1]["name"] == "v1" @@ -2027,10 +2033,10 @@ def test_report(mc): f_updated = "base.gpkg" mp = create_versioned_project(mc, test_project, project_dir, f_updated, remove=False, overwrite=True) - # create report for between versions 2 and 4 + # create report for between versions 3 and 5 directory = mp.dir - since = "v2" - to = "v4" + since = "v3" + to = "v5" proj_name = project.replace(os.path.sep, "-") report_file = os.path.join(TMP_DIR, "report", f"{proj_name}-{since}-{to}.csv") if os.path.exists(report_file): @@ -2057,7 +2063,7 @@ def test_report(mc): ) assert headers in content assert f"base.gpkg,simple,{API_USER}" in content - assert "v3,update,,,2" in content + assert "v4,update,,,2" in content # files not edited are not in reports assert "inserted_1_A.gpkg" not in content @@ -2066,7 +2072,7 @@ def test_report(mc): assert warnings # do report for v1 with added files and v5 with overwritten file - warnings = create_report(mc, directory, "v1", "v5", report_file) + warnings = create_report(mc, directory, "v1", "v6", report_file) assert warnings # rm local Mergin Maps project and try again @@ -2174,11 +2180,11 @@ def test_changesets_download(mc): mp = MerginProject(project_dir) os.makedirs(download_dir, exist_ok=True) - diff_file = os.path.join(download_dir, "base-v1-2.diff") - mc.get_file_diff(project_dir, test_gpkg, diff_file, "v1", "v2") + diff_file = os.path.join(download_dir, "base-v1-3.diff") + mc.get_file_diff(project_dir, test_gpkg, diff_file, "v1", "v3") assert os.path.exists(diff_file) assert mp.geodiff.has_changes(diff_file) - assert mp.geodiff.changes_count(diff_file) == 1 + assert mp.geodiff.changes_count(diff_file) == 3 diff_file = os.path.join(download_dir, "base-v2-3.diff") mc.get_file_diff(project_dir, test_gpkg, diff_file, "v2", "v3") @@ -2215,10 +2221,10 @@ def test_version_info(mc): shutil.copy(os.path.join(TEST_DATA_DIR, "inserted_1_A_mod.gpkg"), file_path) mc.push_project(project_dir) project_info = mc.project_info(project) - info = mc.project_version_info(project_info.get("id"), "v2") + info = mc.project_version_info(project_info.get("id"), "v3") assert info["namespace"] == API_USER assert info["project_name"] == test_project - assert info["name"] == "v2" + assert info["name"] == "v3" assert info["author"] == API_USER created = datetime.strptime(info["created"], "%Y-%m-%dT%H:%M:%SZ") assert created.date() == date.today() @@ -2344,8 +2350,8 @@ def test_reset_local_changes(mc: MerginClient): os.remove(mp.fpath("test.txt")) os.remove(mp.fpath("test_dir/test2.txt")) - # download version 2 and create MerginProject for it - mc.download_project(project, project_dir_2, version="v2") + # download version 3 and create MerginProject for it + mc.download_project(project, project_dir_2, version="v3") mp = MerginProject(project_dir_2) # make some changes @@ -2360,7 +2366,7 @@ def test_reset_local_changes(mc: MerginClient): assert len(push_changes["removed"]) == 2 assert len(push_changes["updated"]) == 0 - # reset back to original version we had - v2 + # reset back to original version we had - v3 mc.reset_local_changes(project_dir_2) # push changes after the reset - should be none @@ -2440,7 +2446,7 @@ def test_project_rename(mc: MerginClient): # validate project info project_info = mc.project_info(project_renamed) - assert project_info["version"] == "v1" + assert project_info["version"] == "v2" # teo version created in initial push assert project_info["name"] == test_project_renamed assert project_info["namespace"] == API_USER with pytest.raises(ClientError, match="The requested URL was not found on the server"): @@ -2481,7 +2487,7 @@ def test_download_files(mc: MerginClient): mp = create_versioned_project(mc, test_project, project_dir, f_updated) project_info = mc.project_info(project) - assert project_info["version"] == "v5" + assert project_info["version"] == "v6" assert project_info["id"] == mp.project_id() # Versioned file should have the following content at versions 2-4 @@ -2494,15 +2500,15 @@ def test_download_files(mc: MerginClient): downloaded_file = os.path.join(download_dir, f_updated) # if output_paths is specified look at that location - for ver in range(2, 5): + for ver in range(3, 6): mc.download_files(project_dir, [f_updated], [downloaded_file], version=f"v{ver}") - expected = os.path.join(TEST_DATA_DIR, expected_content[ver - 2]) # GeoPackage with expected content + expected = os.path.join(TEST_DATA_DIR, expected_content[ver - 3]) # GeoPackage with expected content assert check_gpkg_same_content(mp, downloaded_file, expected) # if output_paths is not specified look in the mergin project folder - for ver in range(2, 5): + for ver in range(3, 6): mc.download_files(project_dir, [f_updated], version=f"v{ver}") - expected = os.path.join(TEST_DATA_DIR, expected_content[ver - 2]) # GeoPackage with expected content + expected = os.path.join(TEST_DATA_DIR, expected_content[ver - 3]) # GeoPackage with expected content assert check_gpkg_same_content(mp, mp.fpath(f_updated), expected) # download two files from v1 and check their content @@ -2513,7 +2519,7 @@ def test_download_files(mc: MerginClient): project_dir, [f_updated, file_2], [downloaded_file, downloaded_file_2], - version="v1", + version="v2", ) assert check_gpkg_same_content(mp, downloaded_file, os.path.join(TEST_DATA_DIR, f_updated)) @@ -2525,11 +2531,11 @@ def test_download_files(mc: MerginClient): assert content_exp == content # make sure there will be exception raised if a file doesn't exist in the version - with pytest.raises(ClientError, match=f"No \\[{f_updated}\\] exists at version v5"): - mc.download_files(project_dir, [f_updated], version="v5") + with pytest.raises(ClientError, match=f"No \\[{f_updated}\\] exists at version v6"): + mc.download_files(project_dir, [f_updated], version="v6") - with pytest.raises(ClientError, match=f"No \\[non_existing\\.file\\] exists at version v3"): - mc.download_files(project_dir, [f_updated, "non_existing.file"], version="v3") + with pytest.raises(ClientError, match=f"No \\[non_existing\\.file\\] exists at version v4"): + mc.download_files(project_dir, [f_updated, "non_existing.file"], version="v4") def test_download_failure(mc): @@ -2592,7 +2598,8 @@ def test_editor(mc: MerginClient): "updated": [{"path": "/folder/project.updated.Qgs"}], "removed": [{"path": "/folder/project.removed.qgs"}], } - qgs_changeset = filter_changes(mc, project_info, qgs_changeset) + changes_handler = ChangesHandler(mc, project_info, qgs_changeset) + qgs_changeset = changes_handler._filter_changes(qgs_changeset) assert sum(len(v) for v in qgs_changeset.values()) == 2 @@ -2629,8 +2636,8 @@ def test_editor_push(mc: MerginClient, mc2: MerginClient): assert any(file["path"] == txt_file_name for file in project_info.get("files")) is True assert any(file["path"] == gpkg_file_name for file in project_info.get("files")) is True pull_changes, push_changes, push_changes_summary = mc.project_status(project_dir) - assert not sum(len(v) for v in pull_changes.values()) - assert not sum(len(v) for v in push_changes.values()) + assert not any(len(v) for v in pull_changes.values()) + assert not any(len(v) for v in push_changes.values()) # editor is trying to push row to gpkg file -> it's possible shutil.copy( @@ -2681,6 +2688,7 @@ def test_editor_push(mc: MerginClient, mc2: MerginClient): def test_error_push_already_named_project(mc: MerginClient): test_project = "test_push_already_existing" project_dir = os.path.join(TMP_DIR, test_project) + cleanup(mc, test_project, [project_dir]) with pytest.raises(ClientError) as e: mc.create_project_and_push(test_project, project_dir) @@ -2886,3 +2894,38 @@ def test_mc_without_login(): # without login should not be able to access workspaces with pytest.raises(ClientError, match="Authentication information is missing or invalid."): mc.workspaces_list() + +def sort_dict_of_files_by_path(d): + return { + k: sorted(v, key=lambda f: f["path"]) for k, v in d.items() + } + +def test_changes_handler(mc): + """ + Test methods of the ChangesHandler class + """ + # test _split_by_type + test_project = "test_changes_handler" + project = API_USER + "/" + test_project + project_dir = os.path.join(TMP_DIR, test_project) + cleanup(mc, project, [project_dir]) + shutil.copytree(TEST_DATA_DIR, project_dir) + mc.create_project(project) + project_info = mc.project_info(project) + mp = MerginProject(project_dir) + mp.write_metadata(project_dir, project_info) + + mixed_changes = mp.get_push_changes() + changes_handler = ChangesHandler(mc, project_info, mixed_changes) + split_changes = changes_handler._split_by_type(mixed_changes) + assert len(split_changes) == 2 + # all blocking files in the first dict and no blocking file in the second dict + assert all(ChangesHandler.is_blocking_file(f) for files in split_changes[0].values() for f in files) + assert all(not ChangesHandler.is_blocking_file(f) for files in split_changes[1].values() for f in files) + # merge the split changes dicts back into a single dict and check files are the same + merged = defaultdict(list) + for d in split_changes: + for k, v in d.items(): + merged[k].extend(v) + assert sort_dict_of_files_by_path(merged) == sort_dict_of_files_by_path(mixed_changes) + diff --git a/scripts/update_version.py b/scripts/update_version.py index 184d6a8..2020659 100644 --- a/scripts/update_version.py +++ b/scripts/update_version.py @@ -4,7 +4,7 @@ def replace_in_file(filepath, regex, sub): - with open(filepath, 'r') as f: + with open(filepath, "r") as f: content = f.read() content_new = re.sub(regex, sub, content, flags=re.M) @@ -15,14 +15,14 @@ def replace_in_file(filepath, regex, sub): dir_path = os.path.dirname(os.path.realpath(__file__)) parser = argparse.ArgumentParser() -parser.add_argument('--version', help='version to replace') +parser.add_argument("--version", help="version to replace") args = parser.parse_args() ver = args.version print("using version " + ver) about_file = os.path.join(dir_path, os.pardir, "mergin", "version.py") print("patching " + about_file) -replace_in_file(about_file, "__version__\s=\s\".*", "__version__ = \"" + ver + "\"") +replace_in_file(about_file, '__version__\s=\s".*', '__version__ = "' + ver + '"') setup_file = os.path.join(dir_path, os.pardir, "setup.py") print("patching " + setup_file) diff --git a/setup.py b/setup.py index 5bc34ac..2e3a4fb 100644 --- a/setup.py +++ b/setup.py @@ -4,35 +4,31 @@ from setuptools import setup, find_packages setup( - name='mergin-client', - version='0.10.0', - url='https://github.com/MerginMaps/python-api-client', - license='MIT', - author='Lutra Consulting Ltd.', - author_email='info@merginmaps.com', - description='Mergin Maps utils and client', - long_description='Mergin Maps utils and client', - + name="mergin-client", + version="0.10.0", + url="https://github.com/MerginMaps/python-api-client", + license="MIT", + author="Lutra Consulting Ltd.", + author_email="info@merginmaps.com", + description="Mergin Maps utils and client", + long_description="Mergin Maps utils and client", packages=find_packages(), - - platforms='any', + platforms="any", install_requires=[ - 'python-dateutil==2.8.2', - 'pygeodiff==2.0.4', - 'pytz==2022.1', - 'click==8.1.3', + "python-dateutil==2.8.2", + "pygeodiff==2.0.4", + "pytz==2022.1", + "click==8.1.3", ], - entry_points={ - 'console_scripts': ['mergin=mergin.cli:cli'], + "console_scripts": ["mergin=mergin.cli:cli"], }, - classifiers=[ - 'Development Status :: 5 - Production/Stable', - 'Intended Audience :: Developers', - 'License :: OSI Approved :: MIT License', - 'Operating System :: OS Independent', - 'Programming Language :: Python :: 3' + "Development Status :: 5 - Production/Stable", + "Intended Audience :: Developers", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent", + "Programming Language :: Python :: 3", ], - package_data={'mergin': ['cert.pem']} + package_data={"mergin": ["cert.pem"]}, )