Skip to content

Commit 602bf09

Browse files
authored
Merge pull request #415 from libris/feature/data-driven-compilation
Support declarative datasets in datacompiler
2 parents 2c4417e + ef59b04 commit 602bf09

File tree

5 files changed

+173
-309
lines changed

5 files changed

+173
-309
lines changed

common.py

Lines changed: 13 additions & 266 deletions
Original file line numberDiff line numberDiff line change
@@ -1,270 +1,17 @@
11
import os
2-
from rdflib import Graph, ConjunctiveGraph
3-
from lxltools.datacompiler import Compiler
4-
from urllib.parse import urljoin
5-
6-
7-
BASE = "https://id.kb.se/"
8-
9-
SCRIPT_DIR = os.path.dirname(__file__) or '.'
10-
11-
12-
def aslist(o):
13-
return o if isinstance(o, list) else [] if o is None else [o]
14-
15-
16-
compiler = Compiler(base_dir=SCRIPT_DIR,
17-
dataset_id=BASE + 'dataset/common',
18-
created='2013-10-17T14:07:48.000Z',
19-
tool_id=BASE + 'generator/datasetcompiler',
20-
context='sys/context/base.jsonld',
21-
record_thing_link='mainEntity',
22-
system_base_iri='',
23-
union='common.jsonld.lines')
24-
25-
26-
@compiler.dataset
27-
def enums():
28-
graph = Graph()
29-
rq = compiler.path('source/marc/construct-enums.rq').read_text('utf-8')
30-
graph += Graph().query(rq).graph
31-
32-
return "/marc/", "2014-01-23T10:34:17.981Z", graph
33-
34-
35-
@compiler.dataset
36-
def rdaterms():
37-
# NOTE: see also examples/mappings/rda-bf2-types.ttl for possibiliy of
38-
# extending our type system (instead).
39-
graph = compiler.construct(sources=[
40-
{
41-
'source': Graph().parse(str(compiler.path('source/rda-terms.ttl')), format='turtle'),
42-
'dataset': BASE + 'dataset/rdaterms'
43-
},
44-
45-
{'source': 'http://rdaregistry.info/termList/RDAContentType.nt'},
46-
{'source': 'http://id.loc.gov/vocabulary/contentTypes'},
47-
48-
{'source': 'http://rdaregistry.info/termList/RDAMediaType.nt'},
49-
{'source': 'http://id.loc.gov/vocabulary/mediaTypes'},
50-
51-
{'source': 'http://rdaregistry.info/termList/RDACarrierType.nt'},
52-
{'source': 'http://id.loc.gov/vocabulary/carriers'},
53-
54-
#{'source': 'http://rdaregistry.info/termList/ModeIssue'},
55-
#{'source': 'http://id.loc.gov/vocabulary/issuance.skos.rdf'},
56-
57-
],
58-
query="source/construct-rda-terms.rq")
59-
60-
return "/term/rda/", "2018-05-16T06:18:01.337Z", graph
61-
62-
63-
@compiler.dataset
64-
def materials():
65-
graph = compiler.construct(sources=[
66-
{
67-
"source": Graph().parse(str(compiler.path('source/materials.ttl')), format='turtle'),
68-
"dataset": BASE + "dataset/materials"
69-
},
70-
{
71-
"source": "http://rdaregistry.info/termList/RDAMaterial.nt"
72-
},
73-
{
74-
"source": "sparql/aat-materials",
75-
"construct": "source/remote/construct-aat-materials.rq",
76-
}
77-
],
78-
query="source/construct-materials.rq")
79-
80-
return "/material/", "2021-12-07T21:28:01.123Z", graph
81-
82-
@compiler.dataset
83-
def musnotationterms():
84-
graph = compiler.construct(sources=[
85-
{
86-
"source": Graph().parse(str(compiler.path('source/musicnotation.ttl')), format='turtle'),
87-
"dataset": BASE + "dataset/musnotationterms"
88-
},
89-
{
90-
"source": "http://rdaregistry.info/termList/MusNotation.nt"
91-
}
92-
],
93-
query="source/construct-musnotationsterms.rq")
94-
95-
return "/term/rda/musnotation/", "2021-05-21T23:59:01.337Z", graph
96-
97-
98-
@compiler.dataset
99-
def tacnotationterms():
100-
graph = compiler.construct(sources=[
101-
{
102-
"source": Graph().parse(str(compiler.path('source/tactilenotation.ttl')), format='turtle'),
103-
"dataset": BASE + "dataset/tacnotationterms"
104-
},
105-
{
106-
"source": "http://rdaregistry.info/termList/TacNotation.nt"
107-
}
108-
],
109-
query="source/construct-tacnotationterms.rq")
110-
111-
return "/term/rda/tacnotation/", "2021-05-21T23:59:10.456Z", graph
112-
113-
114-
@compiler.dataset
115-
def swepubterms():
116-
graph = Graph()
117-
for part in compiler.path('source/swepub').glob('**/*.ttl'):
118-
if part.stem == 'vocab':
119-
continue
120-
graph.parse(str(part), format='turtle')
121-
122-
graph.update(compiler.path('source/swepub/update.rq').read_text('utf-8'))
123-
124-
return "/term/swepub/", "2018-05-29T12:36:01.337Z", graph
125-
126-
127-
@compiler.dataset
128-
def relators():
129-
graph = compiler.construct(sources=[
130-
{
131-
"source": Graph().parse(str(compiler.path('source/relators.ttl')), format='turtle'),
132-
"dataset": BASE + "dataset/relators",
133-
},
134-
{
135-
"source": "http://id.loc.gov/vocabulary/relators"
136-
},
137-
{
138-
"source": "http://finto.fi/rest/v1/mts/data",
139-
"dataset": "http://urn.fi/URN:NBN:fi:au:mts:"
140-
},
141-
{
142-
"source": "http://d-nb.info/standards/elementset/gnd"
143-
},
144-
{
145-
"source": "sparql/bnf-roles",
146-
"construct": "source/remote/construct-bnf-roles.rq"
147-
}
148-
],
149-
query="source/construct-relators.rq")
150-
151-
return "/relator/", "2014-02-01T16:29:12.378Z", graph
152-
153-
154-
@compiler.dataset
155-
def languages():
156-
languages = Graph().parse(str(compiler.path('source/languages.ttl')), format='turtle')
157-
iso639_1 = Graph().parse(str(compiler.cache_url('http://id.loc.gov/vocabulary/iso639-1.nt')), format='nt')
158-
iso639_2 = Graph().parse(str(compiler.cache_url('http://id.loc.gov/vocabulary/iso639-2.nt')), format='nt')
159-
160-
graph = compiler.construct(sources=[
161-
{
162-
"source": languages,
163-
"dataset": BASE + "dataset/languages"
164-
},
165-
{
166-
"source": iso639_2,
167-
"dataset": "http://id.loc.gov/vocabulary/iso639-2"
168-
}
169-
],
170-
query="source/construct-languages-iso639-2.rq")
171-
172-
graph = compiler.construct(sources=[
173-
{
174-
"source": graph,
175-
"dataset": BASE + "dataset/languages"
176-
},
177-
{
178-
"source": iso639_1,
179-
"dataset": "http://id.loc.gov/vocabulary/iso639-1"
180-
}
181-
],
182-
query="source/construct-languages-iso639-1.rq")
183-
184-
return "/language/", "2014-08-01T07:56:51.110Z", graph
185-
186-
187-
@compiler.dataset
188-
def countries():
189-
countries = Graph().parse(str(compiler.path('source/countries.ttl')), format='turtle')
190-
191-
graph = compiler.construct(sources=[
192-
{
193-
"source": countries,
194-
"dataset": BASE + "dataset/countries"
195-
},
196-
{
197-
"source": "http://id.loc.gov/vocabulary/countries"
198-
}
199-
],
200-
query="source/construct-countries.rq")
201-
202-
return "/country/", "2014-02-01T12:21:14.008Z", graph
203-
204-
205-
@compiler.dataset
206-
def enumterms():
207-
graph = Graph().parse(str(compiler.path('source/kbv-enums.ttl')), format='turtle')
208-
209-
return "/term/enum/", "2018-05-29T12:36:01.337Z", graph
210-
211-
@compiler.dataset
212-
def reprterms():
213-
graph = Graph().parse(str(compiler.path('source/repr-terms.ttl')), format='turtle')
214-
215-
return "/term/repr/", "2021-02-22T10:32:01.337Z", graph
216-
217-
218-
@compiler.dataset
219-
def encodingFormatterms():
220-
graph = Graph().parse(str(compiler.path('source/encodingFormat-terms.ttl')), format='turtle')
221-
222-
return "/encodingFormat/", "2021-03-04T10:12:09.921Z", graph
223-
224-
225-
@compiler.dataset
226-
def bibdbterms():
227-
graph = ConjunctiveGraph()
228-
for part in compiler.path('source/bibdb').glob('**/*.ttl'):
229-
graph.parse(str(part), format='turtle')
230-
231-
return "/term/bibdb/", "2021-09-20T08:13:50.570Z", graph
232-
233-
234-
@compiler.dataset
235-
def policies():
236-
graph = Graph().parse(str(compiler.path('source/policies.ttl')), format='turtle')
237-
238-
return "/policy/", "2021-11-18T11:48:51Z", graph
239-
240-
241-
@compiler.dataset
242-
def containers():
243-
graph = Graph().parse(str(compiler.path('source/containers.ttl')), format='turtle')
244-
245-
return "/term/", "2019-07-11T13:04:17.964Z", graph
246-
247-
248-
@compiler.dataset
249-
def generators():
250-
graph = Graph().parse(str(compiler.path('source/generators.ttl')), format='turtle')
251-
252-
return "/generator/", "2018-04-25T18:55:14.723Z", graph
253-
254-
255-
@compiler.dataset
256-
def schemes():
257-
graph = Graph().parse(str(compiler.path('source/schemes.ttl')), format='turtle')
258-
259-
return "/", "2014-02-01T20:00:01.766Z", graph
260-
261-
262-
@compiler.dataset
263-
def nationalities():
264-
graph = Graph().parse(str(compiler.path('source/nationalities.ttl')), format='turtle')
265-
266-
return "/nationality/", "2014-02-01T13:08:56.596Z", graph
2672

3+
from lxltools.datacompiler import Compiler
2684

269-
if __name__ == '__main__':
5+
compiler = Compiler(
6+
base_dir=os.path.dirname(__file__),
7+
datasets_description="source/datasets/idkbse.ttl",
8+
dataset_id="https://id.kb.se/dataset/common",
9+
created="2013-10-17T14:07:48.000Z",
10+
tool_id="https://id.kb.se/generator/datasetcompiler",
11+
context="sys/context/base.jsonld",
12+
system_base_iri="",
13+
union="common.jsonld.lines",
14+
)
15+
16+
if __name__ == "__main__":
27017
compiler.main()

0 commit comments

Comments
 (0)