Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ minio = "~=7.1"
mergin-client = "==0.9.3"
dynaconf = {extras = ["ini"],version = "~=3.1"}
google-api-python-client = "==2.24"
azure-storage-blob = "~=12.0"

[requires]
python_version = "3"
37 changes: 36 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Mergin Maps Media Sync
Sync media files from Mergin Maps projects to other storage backends. Currently, supported backend are MinIO (S3-like) backend, Google Drive and local drive (mostly used for testing).
Sync media files from Mergin Maps projects to other storage backends. Currently, supported backends are MinIO (S3-like), Azure Blob Storage, Google Drive and local drive (mostly used for testing).

Sync works in two modes, in COPY mode, where media files are only copied to external drive and MOVE mode, where files are
subsequently removed from Mergin Maps project (on cloud).
Expand Down Expand Up @@ -68,6 +68,37 @@ docker run -it \

The specification of `MINIO__BUCKET_SUBPATH` is optional and can be skipped if the files should be stored directly in `MINIO__BUCKET`.

#### Using Azure Blob Storage backend

You will need an Azure Storage account. Retrieve the **account name** and one of the **account keys** from the Azure Portal under _Storage account → Access keys_.

```shell
docker run -it \
--name mergin-media-sync \
-e MERGIN__USERNAME=john \
-e MERGIN__PASSWORD=myStrongPassword \
-e MERGIN__PROJECT_NAME=john/my_project \
-e DRIVER=azure \
-e AZURE_BLOB__ACCOUNT_NAME=mystorageaccount \
-e AZURE_BLOB__ACCOUNT_KEY=base64encodedkey== \
-e AZURE_BLOB__CONTAINER=my-container \
lutraconsulting/mergin-media-sync python3 media_sync_daemon.py
```

The container is created automatically if it does not already exist. Uploaded files are accessible at:
```
https://<account_name>.blob.core.windows.net/<container>/<blob_path>
```

`AZURE_BLOB__BLOB_PATH_PREFIX` is optional. When set, all blobs are placed under that prefix inside the container (e.g. `AZURE_BLOB__BLOB_PATH_PREFIX=myproject` stores files at `myproject/img1.png`).

| Environment variable | Required | Description |
|---|---|---|
| `AZURE_BLOB__ACCOUNT_NAME` | yes | Azure Storage account name |
| `AZURE_BLOB__ACCOUNT_KEY` | yes | Storage account access key (found under _Access keys_ in the portal) |
| `AZURE_BLOB__CONTAINER` | yes | Blob container name (created automatically if missing) |
| `AZURE_BLOB__BLOB_PATH_PREFIX` | no | Optional path prefix for all uploaded blobs |

#### Using Google Drive backend
For setup instructions and more details, please refer to our [Google Drive guide](./docs/google-drive-setup.md).

Expand Down Expand Up @@ -136,6 +167,10 @@ To run automatic tests:
export TEST_MINIO_URL="localhost:9000"
export TEST_MINIO_ACCESS_KEY=EXAMPLE
export TEST_MINIO_SECRET_KEY=EXAMPLEKEY
# Azure Blob Storage backend tests (optional)
export TEST_AZURE_STORAGE_ACCOUNT_NAME=<account_name>
export TEST_AZURE_STORAGE_ACCOUNT_KEY=<account_key>
export TEST_AZURE_STORAGE_CONTAINER=<container>
pipenv run pytest test/
```

Expand Down
12 changes: 12 additions & 0 deletions config.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ def validate_config(config):
config.driver == DriverType.LOCAL
or config.driver == DriverType.MINIO
or config.driver == DriverType.GOOGLE_DRIVE
or config.driver == DriverType.AZURE
):
raise ConfigError("Config error: Unsupported driver")

Expand Down Expand Up @@ -78,6 +79,17 @@ def validate_config(config):
):
raise ConfigError("Config error: Incorrect GoogleDrive driver settings")

if config.driver == DriverType.AZURE and not (
hasattr(config, "azure_blob")
and hasattr(config.azure_blob, "account_name")
and hasattr(config.azure_blob, "account_key")
and hasattr(config.azure_blob, "container")
and config.azure_blob.account_name
and config.azure_blob.account_key
and config.azure_blob.container
):
raise ConfigError("Config error: Incorrect Azure Blob Storage driver settings")


def update_config_path(
path_param: str,
Expand Down
8 changes: 7 additions & 1 deletion config.yaml.default
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,16 @@ minio:
bucket_subpath:

google_drive:
service_account_file:
service_account_file:
folder:
share_with:

azure_blob:
account_name:
account_key:
container:
blob_path_prefix:

references:
- file: survey.gpkg
table: notes
Expand Down
46 changes: 46 additions & 0 deletions drivers.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,15 @@
from googleapiclient.discovery import build, Resource
from googleapiclient.http import MediaFileUpload

from azure.storage.blob import BlobServiceClient
from azure.core.exceptions import AzureError


class DriverType(enum.Enum):
LOCAL = "local"
MINIO = "minio"
GOOGLE_DRIVE = "google_drive"
AZURE = "azure"

def __eq__(self, value):
if isinstance(value, str):
Expand Down Expand Up @@ -282,6 +286,46 @@ def _get_share_with(self, config_google_drive) -> typing.List[str]:
return emails_to_share_with


class AzureBlobDriver(Driver):
"""Driver to handle connection to Azure Blob Storage"""

def __init__(self, config):
super(AzureBlobDriver, self).__init__(config)

try:
self.account_name = config.azure_blob.account_name
connection_string = (
f"DefaultEndpointsProtocol=https;"
f"AccountName={self.account_name};"
f"AccountKey={config.azure_blob.account_key};"
f"EndpointSuffix=core.windows.net"
)
service_client = BlobServiceClient.from_connection_string(connection_string)
self.container = config.azure_blob.container
container_client = service_client.get_container_client(self.container)
if not container_client.exists():
container_client.create_container()
self.client = container_client

self.blob_path_prefix = None
if hasattr(config.azure_blob, "blob_path_prefix"):
if config.azure_blob.blob_path_prefix:
self.blob_path_prefix = config.azure_blob.blob_path_prefix

except AzureError as e:
raise DriverError("Azure Blob Storage driver init error: " + str(e))

def upload_file(self, src: str, obj_path: str) -> str:
if self.blob_path_prefix:
obj_path = f"{self.blob_path_prefix}/{obj_path}"
try:
with open(src, "rb") as data:
self.client.upload_blob(name=obj_path, data=data, overwrite=True)
except AzureError as e:
raise DriverError("Azure Blob Storage driver error: " + str(e))
return f"https://{self.account_name}.blob.core.windows.net/{self.container}/{obj_path}"


def create_driver(config):
"""Create driver object based on type defined in config"""
driver = None
Expand All @@ -291,4 +335,6 @@ def create_driver(config):
driver = MinioDriver(config)
elif config.driver == DriverType.GOOGLE_DRIVE:
driver = GoogleDriveDriver(config)
elif config.driver == DriverType.AZURE:
driver = AzureBlobDriver(config)
return driver
7 changes: 7 additions & 0 deletions test/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@
GOOGLE_DRIVE_SERVICE_ACCOUNT_FILE = os.environ.get(
"TEST_GOOGLE_DRIVE_SERVICE_ACCOUNT_FILE"
)
AZURE_STORAGE_ACCOUNT_NAME = os.environ.get("TEST_AZURE_STORAGE_ACCOUNT_NAME")
AZURE_STORAGE_ACCOUNT_KEY = os.environ.get("TEST_AZURE_STORAGE_ACCOUNT_KEY")
AZURE_STORAGE_CONTAINER = os.environ.get("TEST_AZURE_STORAGE_CONTAINER")


@pytest.fixture(scope="function")
Expand Down Expand Up @@ -49,6 +52,10 @@ def setup_config():
"MINIO__BUCKET_SUBPATH": "",
"MINIO__SECURE": False,
"MINIO__REGION": "",
"AZURE_BLOB__ACCOUNT_NAME": "",
"AZURE_BLOB__ACCOUNT_KEY": "",
"AZURE_BLOB__CONTAINER": "",
"AZURE_BLOB__BLOB_PATH_PREFIX": "",
}
)

Expand Down
113 changes: 112 additions & 1 deletion test/test_sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
import shutil
import sqlite3

from drivers import MinioDriver, LocalDriver, GoogleDriveDriver
from drivers import MinioDriver, LocalDriver, GoogleDriveDriver, AzureBlobDriver
from media_sync import (
main,
config,
Expand All @@ -33,6 +33,9 @@
MINIO_SECRET_KEY,
GOOGLE_DRIVE_SERVICE_ACCOUNT_FILE,
GOOGLE_DRIVE_FOLDER,
AZURE_STORAGE_ACCOUNT_NAME,
AZURE_STORAGE_ACCOUNT_KEY,
AZURE_STORAGE_CONTAINER,
cleanup,
prepare_mergin_project,
)
Expand Down Expand Up @@ -634,3 +637,111 @@ def test_google_drive_backend(mc):
# files in mergin project still exist (copy mode)
assert os.path.exists(os.path.join(work_project_dir, "img1.png"))
assert os.path.exists(os.path.join(work_project_dir, "images", "img2.jpg"))


def test_azure_blob_backend(mc):
"""Test media sync connected to Azure Blob Storage backend (needs valid Azure credentials)"""
project_name = "mediasync_test_azure"
full_project_name = WORKSPACE + "/" + project_name
work_project_dir = os.path.join(TMP_DIR, project_name + "_work")

cleanup(mc, full_project_name, [work_project_dir])
prepare_mergin_project(mc, full_project_name)

# invalid config - missing required fields
config.update(
{
"MERGIN__USERNAME": API_USER,
"MERGIN__PASSWORD": USER_PWD,
"MERGIN__URL": SERVER_URL,
"MERGIN__PROJECT_NAME": full_project_name,
"PROJECT_WORKING_DIR": work_project_dir,
"OPERATION_MODE": "copy",
"REFERENCES": [
{
"file": None,
"table": None,
"local_path_column": None,
"driver_path_column": None,
}
],
"DRIVER": "azure",
"AZURE_BLOB__ACCOUNT_NAME": AZURE_STORAGE_ACCOUNT_NAME,
"AZURE_BLOB__ACCOUNT_KEY": "",
"AZURE_BLOB__CONTAINER": AZURE_STORAGE_CONTAINER,
}
)

with pytest.raises(ConfigError):
validate_config(config)

# patch config to fit testing purposes
config.update(
{
"MERGIN__USERNAME": API_USER,
"MERGIN__PASSWORD": USER_PWD,
"MERGIN__URL": SERVER_URL,
"MERGIN__PROJECT_NAME": full_project_name,
"PROJECT_WORKING_DIR": work_project_dir,
"OPERATION_MODE": "copy",
"REFERENCES": [
{
"file": None,
"table": None,
"local_path_column": None,
"driver_path_column": None,
}
],
"DRIVER": "azure",
"AZURE_BLOB__ACCOUNT_NAME": AZURE_STORAGE_ACCOUNT_NAME,
"AZURE_BLOB__ACCOUNT_KEY": AZURE_STORAGE_ACCOUNT_KEY,
"AZURE_BLOB__CONTAINER": AZURE_STORAGE_CONTAINER,
}
)

main()

# verify files were uploaded to Azure Blob Storage
driver = AzureBlobDriver(config)
blob_names = [b.name for b in driver.client.list_blobs()]
assert "img1.png" in blob_names
assert "images/img2.jpg" in blob_names

# files in mergin project still exist (copy mode)
assert os.path.exists(os.path.join(work_project_dir, "img1.png"))
assert os.path.exists(os.path.join(work_project_dir, "images", "img2.jpg"))

# test with blob_path_prefix
cleanup(mc, full_project_name, [work_project_dir])
prepare_mergin_project(mc, full_project_name)

config.update(
{
"MERGIN__USERNAME": API_USER,
"MERGIN__PASSWORD": USER_PWD,
"MERGIN__URL": SERVER_URL,
"MERGIN__PROJECT_NAME": full_project_name,
"PROJECT_WORKING_DIR": work_project_dir,
"OPERATION_MODE": "copy",
"REFERENCES": [
{
"file": None,
"table": None,
"local_path_column": None,
"driver_path_column": None,
}
],
"DRIVER": "azure",
"AZURE_BLOB__ACCOUNT_NAME": AZURE_STORAGE_ACCOUNT_NAME,
"AZURE_BLOB__ACCOUNT_KEY": AZURE_STORAGE_ACCOUNT_KEY,
"AZURE_BLOB__CONTAINER": AZURE_STORAGE_CONTAINER,
"AZURE_BLOB__BLOB_PATH_PREFIX": "subPath",
}
)

main()

driver = AzureBlobDriver(config)
blob_names = [b.name for b in driver.client.list_blobs()]
assert "subPath/img1.png" in blob_names
assert "subPath/images/img2.jpg" in blob_names
Loading