Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions src/auto_archiver/core/orchestrator.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,6 +274,14 @@ def add_additional_args(self, parser: argparse.ArgumentParser = None):
default=False,
)

parser.add_argument(
"--metadata",
dest="requested_metadata",
help="An array of specific metadata fields to select from the collected content.",
default=[],
nargs="?",
)

def add_individual_module_args(
self, modules: list[LazyBaseModule] = None, parser: argparse.ArgumentParser = None
) -> None:
Expand Down
38 changes: 35 additions & 3 deletions src/auto_archiver/modules/metadata_enricher/metadata_enricher.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,19 +11,21 @@ class MetadataEnricher(Enricher):
Extracts metadata information from files using exiftool.
"""

def enrich(self, to_enrich: Metadata) -> None:
def enrich(self, to_enrich: Metadata, md_grocery_list=["author", "datetimes", "location"]) -> None:
logger.debug("Extracting EXIF metadata")

for i, m in enumerate(to_enrich.media):
if len(md := self.get_metadata(m.filename)):
to_enrich.media[i].set("metadata", md)
# feature flag has this currently turned on
specified_md = self.select_metadata(md, md_grocery_list)
to_enrich.media[i].set("metadata", specified_md)
# to_enrich.media[i].set("metadata", md)

def get_metadata(self, filename: str) -> dict:
try:
# Run ExifTool command to extract metadata from the file
cmd = ["exiftool", filename]
result = subprocess.run(cmd, capture_output=True, text=True)

# Process the output to extract individual metadata fields
metadata = {}
for line in result.stdout.splitlines():
Expand All @@ -35,3 +37,33 @@ def get_metadata(self, filename: str) -> dict:
except Exception as e:
logger.error(f"Error occurred: {e}: {traceback.format_exc()}")
return {}

def select_metadata(self, all_md, md_grocery_list):
"""
coordinates the selection of metadata from the general exiftool output to the user-specified grocery list
"""
# defining the batches of metadata that get pulled for special terms
author_key_terms = ["author", "producer", "creator"]
datetime_key_terms = ["date", "time"]
location_key_terms = ["gps", "latitude", "longitude"]

specified_md = {}
for md_key in all_md.keys():
md_key_lower = md_key.lower()
# checking for special baskets within the grocery list of requested metadata
if ("author" in md_grocery_list) and any(
term in md_key_lower and len(all_md[md_key]) for term in author_key_terms
):
specified_md[md_key] = all_md[md_key]
if ("datetime" in md_grocery_list) and any(
term in md_key_lower and len(all_md[md_key]) for term in datetime_key_terms
):
specified_md[md_key] = all_md[md_key]
if ("location" in md_grocery_list) and any(
term in md_key_lower and len(all_md[md_key]) for term in location_key_terms
):
specified_md[md_key] = all_md[md_key]
# if the metadata value is requested directly
if md_key_lower in md_grocery_list or md_key in md_grocery_list and len(all_md[md_key]):
specified_md[md_key] = all_md[md_key]
return specified_md
49 changes: 47 additions & 2 deletions tests/enrichers/test_metadata_enricher.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,13 +49,25 @@ def test_enrich_sets_metadata(enricher, mocker):
metadata.media = [media1, media2]
enricher.get_metadata = lambda f: {"key": "value"} if f == "img1.jpg" else {}

enricher.enrich(metadata)
enricher.enrich(metadata, ["key"])

media1.set.assert_called_once_with("metadata", {"key": "value"})
media2.set.assert_not_called()
assert metadata.media == [media1, media2]


def test_enrich_no_metadata_selection(enricher, mocker):
media1 = mocker.Mock(filename="img1.jpg")
media2 = mocker.Mock(filename="img2.jpg")
metadata = mocker.Mock()
metadata.media = [media1, media2]
enricher.get_metadata = lambda f: {"key": "value"} if f == "img1.jpg" else {}
enricher.enrich(metadata)
media1.set.assert_called_once_with("metadata", {})
media2.set.assert_not_called()
assert metadata.media == [media1, media2]


def test_enrich_empty_media(enricher, mocker):
metadata = mocker.Mock()
metadata.media = []
Expand All @@ -71,14 +83,47 @@ def test_get_metadata_error_handling(enricher, mocker):
assert "Error occurred: " in mock_log.call_args[0][0]


def test_metadata_pickle(enricher, unpickle, mocker):
# TODO depends on the expected functionality
"""
def test_default_metadata_pickle(enricher, unpickle, mocker):
mock_run = mocker.patch("subprocess.run")
# Uses pickled values
mock_run.return_value = unpickle("metadata_enricher_exif.pickle")
metadata = unpickle("metadata_enricher_ytshort_input.pickle")
expected = unpickle("metadata_enricher_ytshort_expected.pickle")
enricher.enrich(metadata)
expected_media = expected.media
print(expected_media)
actual_media = metadata.media

assert len(expected_media) == len(actual_media)
assert actual_media[0].properties.get("metadata") == expected_media[0].properties.get("metadata")
"""


def test_metadata_pickle_megapixel(enricher, unpickle, mocker):
mock_run = mocker.patch("subprocess.run")
mock_run.return_value = unpickle("metadata_enricher_exif.pickle")
metadata = unpickle("metadata_enricher_ytshort_input.pickle")

enricher.enrich(metadata, ["megapixels"])
actual_media = metadata.media

assert actual_media[0].properties.get("metadata") == {"Megapixels": "0.922"}


def test_metadata_specify_datetime_and_metapixels(enricher, unpickle, mocker):
mock_run = mocker.patch("subprocess.run")
mock_run.return_value = unpickle("metadata_enricher_exif.pickle")
metadata = unpickle("metadata_enricher_ytshort_input.pickle")
# expected_md = {"Metapixels":"0.922", "File Inode Change Date/Time":"2025:02:18 19:42:50+00:00"}

enricher.enrich(metadata, ["datetime", "megapixels"])
actual_media = metadata.media

assert actual_media[0].properties.get("metadata") == {
"File Modification Date/Time": "2025:02:18 19:42:50+00:00",
"File Access Date/Time": "2025:02:18 19:42:50+00:00",
"File Inode Change Date/Time": "2025:02:18 19:42:50+00:00",
"Megapixels": "0.922",
}