Skip to content

Commit ce5598b

Browse files
committed
Added pdf and attachment hashes to bin/anthology
1 parent e9d54b1 commit ce5598b

File tree

2 files changed

+7
-0
lines changed

2 files changed

+7
-0
lines changed

bin/anthology/papers.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,10 @@ def videos(self):
9797
]
9898
return []
9999

100+
@cached_property
101+
def pdf_hash(self):
102+
return self.attrib.get("pdf_hash", None)
103+
100104
def _parse_revision_or_errata(self, tag):
101105
for item in self.attrib.get(tag, []):
102106
# Expand URLs with paper ID

bin/anthology/utils.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import re
2121
import requests
2222
import shutil
23+
import subprocess
2324

2425
from lxml import etree
2526
from urllib.parse import urlparse
@@ -415,11 +416,13 @@ def parse_element(xml_element):
415416
elif tag == "url":
416417
tag = "xml_url"
417418
value = element.text
419+
attrib['pdf_hash'] = element.get("hash")
418420
elif tag == "attachment":
419421
value = {
420422
"filename": element.text,
421423
"type": element.get("type", "attachment"),
422424
"url": element.text,
425+
"hash": element.get("hash"),
423426
}
424427
elif tag in ("author", "editor"):
425428
id_ = element.attrib.get("id", None)

0 commit comments

Comments
 (0)