Skip to content

Commit a6ff39c

Browse files
committed
Added upload_file_to_queue method to bin/utils
1 parent ce5598b commit a6ff39c

File tree

2 files changed

+51
-0
lines changed

2 files changed

+51
-0
lines changed

bin/anthology/data.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
################################################################################
2323

2424
import os
25+
from enum import Enum
2526

2627
# this is the canonical URL. In contrast to all other
2728
# URL templates, it always links to the official anthology.
@@ -98,3 +99,8 @@ def get_journal_title(top_level_id, volume_title):
9899
return "Transactions of the Association for Computational Linguistics"
99100
else:
100101
return volume_title
102+
103+
104+
class ResourceType(Enum):
105+
PDF = 'pdf'
106+
ATTACHMENT = 'attachments'

bin/anthology/utils.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -492,3 +492,48 @@ def compute_hash(value: bytes) -> str:
492492
def compute_hash_from_file(path: str) -> str:
493493
with open(path, "rb") as f:
494494
return compute_hash(f.read())
495+
496+
497+
# For auto upload files to server
498+
# The root directory for files
499+
ANTHOLOGY_FILE_ROOT = "anthology-files"
500+
501+
# The ssh shortcut (in ~/.ssh/config) or full hostname
502+
ANTHOLOGY_HOST = "anth"
503+
504+
505+
def upload_file_to_queue(
506+
local_path: str,
507+
resource_type: data.ResourceType,
508+
venue_name: str,
509+
filename: str,
510+
file_hash: str,
511+
commit: bool = False,
512+
):
513+
actual_hash = compute_hash_from_file(local_path)
514+
if file_hash != actual_hash:
515+
raise Exception(
516+
f"Got unexpected hash, file contains incorrect data. (actual hash: {actual_hash}, expected: {file_hash})"
517+
)
518+
519+
mdkir_cmd = [
520+
'ssh',
521+
ANTHOLOGY_HOST,
522+
f'mkdir -p {ANTHOLOGY_FILE_ROOT}/queue/{resource_type.value}/{venue_name}',
523+
]
524+
if commit:
525+
subprocess.check_call(mdkir_cmd)
526+
else:
527+
logging.info(f"Would run: {mdkir_cmd}")
528+
529+
upload_cmd = [
530+
"rsync",
531+
"-lptgoDve",
532+
"ssh",
533+
local_path,
534+
f"{ANTHOLOGY_HOST}:{ANTHOLOGY_FILE_ROOT}/queue/{resource_type.value}/{venue_name}/{filename}.{file_hash}",
535+
]
536+
if commit:
537+
subprocess.check_call(upload_cmd)
538+
else:
539+
logging.info(f"Would run: {upload_cmd}")

0 commit comments

Comments
 (0)