diff --git a/Pipfile b/Pipfile index 3552b96..dd4be42 100644 --- a/Pipfile +++ b/Pipfile @@ -12,6 +12,7 @@ minio = "~=7.1" mergin-client = "==0.9.3" dynaconf = {extras = ["ini"],version = "~=3.1"} google-api-python-client = "==2.24" +azure-storage-blob = "~=12.0" [requires] python_version = "3" diff --git a/README.md b/README.md index 51e2522..60be4f7 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ # Mergin Maps Media Sync -Sync media files from Mergin Maps projects to other storage backends. Currently, supported backend are MinIO (S3-like) backend, Google Drive and local drive (mostly used for testing). +Sync media files from Mergin Maps projects to other storage backends. Currently, supported backends are MinIO (S3-like), Azure Blob Storage, Google Drive and local drive (mostly used for testing). Sync works in two modes, in COPY mode, where media files are only copied to external drive and MOVE mode, where files are subsequently removed from Mergin Maps project (on cloud). @@ -68,6 +68,37 @@ docker run -it \ The specification of `MINIO__BUCKET_SUBPATH` is optional and can be skipped if the files should be stored directly in `MINIO__BUCKET`. +#### Using Azure Blob Storage backend + +You will need an Azure Storage account. Retrieve the **account name** and one of the **account keys** from the Azure Portal under _Storage account → Access keys_. + +```shell +docker run -it \ + --name mergin-media-sync \ + -e MERGIN__USERNAME=john \ + -e MERGIN__PASSWORD=myStrongPassword \ + -e MERGIN__PROJECT_NAME=john/my_project \ + -e DRIVER=azure \ + -e AZURE_BLOB__ACCOUNT_NAME=mystorageaccount \ + -e AZURE_BLOB__ACCOUNT_KEY=base64encodedkey== \ + -e AZURE_BLOB__CONTAINER=my-container \ + lutraconsulting/mergin-media-sync python3 media_sync_daemon.py +``` + +The container is created automatically if it does not already exist. Uploaded files are accessible at: +``` +https://.blob.core.windows.net// +``` + +`AZURE_BLOB__BLOB_PATH_PREFIX` is optional. When set, all blobs are placed under that prefix inside the container (e.g. `AZURE_BLOB__BLOB_PATH_PREFIX=myproject` stores files at `myproject/img1.png`). + +| Environment variable | Required | Description | +|---|---|---| +| `AZURE_BLOB__ACCOUNT_NAME` | yes | Azure Storage account name | +| `AZURE_BLOB__ACCOUNT_KEY` | yes | Storage account access key (found under _Access keys_ in the portal) | +| `AZURE_BLOB__CONTAINER` | yes | Blob container name (created automatically if missing) | +| `AZURE_BLOB__BLOB_PATH_PREFIX` | no | Optional path prefix for all uploaded blobs | + #### Using Google Drive backend For setup instructions and more details, please refer to our [Google Drive guide](./docs/google-drive-setup.md). @@ -136,6 +167,10 @@ To run automatic tests: export TEST_MINIO_URL="localhost:9000" export TEST_MINIO_ACCESS_KEY=EXAMPLE export TEST_MINIO_SECRET_KEY=EXAMPLEKEY + # Azure Blob Storage backend tests (optional) + export TEST_AZURE_STORAGE_ACCOUNT_NAME= + export TEST_AZURE_STORAGE_ACCOUNT_KEY= + export TEST_AZURE_STORAGE_CONTAINER= pipenv run pytest test/ ``` diff --git a/config.py b/config.py index ca379ab..a53b772 100644 --- a/config.py +++ b/config.py @@ -33,6 +33,7 @@ def validate_config(config): config.driver == DriverType.LOCAL or config.driver == DriverType.MINIO or config.driver == DriverType.GOOGLE_DRIVE + or config.driver == DriverType.AZURE ): raise ConfigError("Config error: Unsupported driver") @@ -78,6 +79,17 @@ def validate_config(config): ): raise ConfigError("Config error: Incorrect GoogleDrive driver settings") + if config.driver == DriverType.AZURE and not ( + hasattr(config, "azure_blob") + and hasattr(config.azure_blob, "account_name") + and hasattr(config.azure_blob, "account_key") + and hasattr(config.azure_blob, "container") + and config.azure_blob.account_name + and config.azure_blob.account_key + and config.azure_blob.container + ): + raise ConfigError("Config error: Incorrect Azure Blob Storage driver settings") + def update_config_path( path_param: str, diff --git a/config.yaml.default b/config.yaml.default index d72d66f..0599bdd 100644 --- a/config.yaml.default +++ b/config.yaml.default @@ -25,10 +25,16 @@ minio: bucket_subpath: google_drive: - service_account_file: + service_account_file: folder: share_with: +azure_blob: + account_name: + account_key: + container: + blob_path_prefix: + references: - file: survey.gpkg table: notes diff --git a/drivers.py b/drivers.py index 0ec20bf..4ee7df4 100644 --- a/drivers.py +++ b/drivers.py @@ -21,11 +21,15 @@ from googleapiclient.discovery import build, Resource from googleapiclient.http import MediaFileUpload +from azure.storage.blob import BlobServiceClient +from azure.core.exceptions import AzureError + class DriverType(enum.Enum): LOCAL = "local" MINIO = "minio" GOOGLE_DRIVE = "google_drive" + AZURE = "azure" def __eq__(self, value): if isinstance(value, str): @@ -282,6 +286,46 @@ def _get_share_with(self, config_google_drive) -> typing.List[str]: return emails_to_share_with +class AzureBlobDriver(Driver): + """Driver to handle connection to Azure Blob Storage""" + + def __init__(self, config): + super(AzureBlobDriver, self).__init__(config) + + try: + self.account_name = config.azure_blob.account_name + connection_string = ( + f"DefaultEndpointsProtocol=https;" + f"AccountName={self.account_name};" + f"AccountKey={config.azure_blob.account_key};" + f"EndpointSuffix=core.windows.net" + ) + service_client = BlobServiceClient.from_connection_string(connection_string) + self.container = config.azure_blob.container + container_client = service_client.get_container_client(self.container) + if not container_client.exists(): + container_client.create_container() + self.client = container_client + + self.blob_path_prefix = None + if hasattr(config.azure_blob, "blob_path_prefix"): + if config.azure_blob.blob_path_prefix: + self.blob_path_prefix = config.azure_blob.blob_path_prefix + + except AzureError as e: + raise DriverError("Azure Blob Storage driver init error: " + str(e)) + + def upload_file(self, src: str, obj_path: str) -> str: + if self.blob_path_prefix: + obj_path = f"{self.blob_path_prefix}/{obj_path}" + try: + with open(src, "rb") as data: + self.client.upload_blob(name=obj_path, data=data, overwrite=True) + except AzureError as e: + raise DriverError("Azure Blob Storage driver error: " + str(e)) + return f"https://{self.account_name}.blob.core.windows.net/{self.container}/{obj_path}" + + def create_driver(config): """Create driver object based on type defined in config""" driver = None @@ -291,4 +335,6 @@ def create_driver(config): driver = MinioDriver(config) elif config.driver == DriverType.GOOGLE_DRIVE: driver = GoogleDriveDriver(config) + elif config.driver == DriverType.AZURE: + driver = AzureBlobDriver(config) return driver diff --git a/test/conftest.py b/test/conftest.py index 8f9af2a..962e567 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -20,6 +20,9 @@ GOOGLE_DRIVE_SERVICE_ACCOUNT_FILE = os.environ.get( "TEST_GOOGLE_DRIVE_SERVICE_ACCOUNT_FILE" ) +AZURE_STORAGE_ACCOUNT_NAME = os.environ.get("TEST_AZURE_STORAGE_ACCOUNT_NAME") +AZURE_STORAGE_ACCOUNT_KEY = os.environ.get("TEST_AZURE_STORAGE_ACCOUNT_KEY") +AZURE_STORAGE_CONTAINER = os.environ.get("TEST_AZURE_STORAGE_CONTAINER") @pytest.fixture(scope="function") @@ -49,6 +52,10 @@ def setup_config(): "MINIO__BUCKET_SUBPATH": "", "MINIO__SECURE": False, "MINIO__REGION": "", + "AZURE_BLOB__ACCOUNT_NAME": "", + "AZURE_BLOB__ACCOUNT_KEY": "", + "AZURE_BLOB__CONTAINER": "", + "AZURE_BLOB__BLOB_PATH_PREFIX": "", } ) diff --git a/test/test_sync.py b/test/test_sync.py index a9cfe00..95cebd1 100644 --- a/test/test_sync.py +++ b/test/test_sync.py @@ -11,7 +11,7 @@ import shutil import sqlite3 -from drivers import MinioDriver, LocalDriver, GoogleDriveDriver +from drivers import MinioDriver, LocalDriver, GoogleDriveDriver, AzureBlobDriver from media_sync import ( main, config, @@ -33,6 +33,9 @@ MINIO_SECRET_KEY, GOOGLE_DRIVE_SERVICE_ACCOUNT_FILE, GOOGLE_DRIVE_FOLDER, + AZURE_STORAGE_ACCOUNT_NAME, + AZURE_STORAGE_ACCOUNT_KEY, + AZURE_STORAGE_CONTAINER, cleanup, prepare_mergin_project, ) @@ -634,3 +637,111 @@ def test_google_drive_backend(mc): # files in mergin project still exist (copy mode) assert os.path.exists(os.path.join(work_project_dir, "img1.png")) assert os.path.exists(os.path.join(work_project_dir, "images", "img2.jpg")) + + +def test_azure_blob_backend(mc): + """Test media sync connected to Azure Blob Storage backend (needs valid Azure credentials)""" + project_name = "mediasync_test_azure" + full_project_name = WORKSPACE + "/" + project_name + work_project_dir = os.path.join(TMP_DIR, project_name + "_work") + + cleanup(mc, full_project_name, [work_project_dir]) + prepare_mergin_project(mc, full_project_name) + + # invalid config - missing required fields + config.update( + { + "MERGIN__USERNAME": API_USER, + "MERGIN__PASSWORD": USER_PWD, + "MERGIN__URL": SERVER_URL, + "MERGIN__PROJECT_NAME": full_project_name, + "PROJECT_WORKING_DIR": work_project_dir, + "OPERATION_MODE": "copy", + "REFERENCES": [ + { + "file": None, + "table": None, + "local_path_column": None, + "driver_path_column": None, + } + ], + "DRIVER": "azure", + "AZURE_BLOB__ACCOUNT_NAME": AZURE_STORAGE_ACCOUNT_NAME, + "AZURE_BLOB__ACCOUNT_KEY": "", + "AZURE_BLOB__CONTAINER": AZURE_STORAGE_CONTAINER, + } + ) + + with pytest.raises(ConfigError): + validate_config(config) + + # patch config to fit testing purposes + config.update( + { + "MERGIN__USERNAME": API_USER, + "MERGIN__PASSWORD": USER_PWD, + "MERGIN__URL": SERVER_URL, + "MERGIN__PROJECT_NAME": full_project_name, + "PROJECT_WORKING_DIR": work_project_dir, + "OPERATION_MODE": "copy", + "REFERENCES": [ + { + "file": None, + "table": None, + "local_path_column": None, + "driver_path_column": None, + } + ], + "DRIVER": "azure", + "AZURE_BLOB__ACCOUNT_NAME": AZURE_STORAGE_ACCOUNT_NAME, + "AZURE_BLOB__ACCOUNT_KEY": AZURE_STORAGE_ACCOUNT_KEY, + "AZURE_BLOB__CONTAINER": AZURE_STORAGE_CONTAINER, + } + ) + + main() + + # verify files were uploaded to Azure Blob Storage + driver = AzureBlobDriver(config) + blob_names = [b.name for b in driver.client.list_blobs()] + assert "img1.png" in blob_names + assert "images/img2.jpg" in blob_names + + # files in mergin project still exist (copy mode) + assert os.path.exists(os.path.join(work_project_dir, "img1.png")) + assert os.path.exists(os.path.join(work_project_dir, "images", "img2.jpg")) + + # test with blob_path_prefix + cleanup(mc, full_project_name, [work_project_dir]) + prepare_mergin_project(mc, full_project_name) + + config.update( + { + "MERGIN__USERNAME": API_USER, + "MERGIN__PASSWORD": USER_PWD, + "MERGIN__URL": SERVER_URL, + "MERGIN__PROJECT_NAME": full_project_name, + "PROJECT_WORKING_DIR": work_project_dir, + "OPERATION_MODE": "copy", + "REFERENCES": [ + { + "file": None, + "table": None, + "local_path_column": None, + "driver_path_column": None, + } + ], + "DRIVER": "azure", + "AZURE_BLOB__ACCOUNT_NAME": AZURE_STORAGE_ACCOUNT_NAME, + "AZURE_BLOB__ACCOUNT_KEY": AZURE_STORAGE_ACCOUNT_KEY, + "AZURE_BLOB__CONTAINER": AZURE_STORAGE_CONTAINER, + "AZURE_BLOB__BLOB_PATH_PREFIX": "subPath", + } + ) + + main() + + driver = AzureBlobDriver(config) + blob_names = [b.name for b in driver.client.list_blobs()] + assert "subPath/img1.png" in blob_names + assert "subPath/images/img2.jpg" in blob_names