@@ -33,7 +33,8 @@ def __init__(self, *,
3333 context = None ,
3434 record_thing_link = 'mainEntity' ,
3535 system_base_iri = None ,
36- union = 'all.jsonld.lines' ):
36+ union = 'all.jsonld.lines' ,
37+ last_backwards_id_time = None ):
3738 self .datasets_description = datasets_description
3839 self .datasets = {}
3940 self .current_ds_resources = set ()
@@ -49,6 +50,11 @@ def __init__(self, *,
4950 self .current_ds_file = None
5051 self .no_records = False
5152
53+ self .last_backwards_id_time = (
54+ timeutil .w3c_dtz_to_ms (last_backwards_id_time )
55+ if isinstance (last_backwards_id_time , str )
56+ else None )
57+
5258 if datasets_description :
5359 self ._handlers_from_datasets_description (datasets_description )
5460
@@ -155,7 +161,8 @@ def _compile_dataset(self, name, result):
155161 data = self .to_jsonld (data )
156162
157163 ds_url = urljoin (self .dataset_id , name )
158- self ._create_dataset_description (ds_url , ds_created_ms , ds_modified_ms )
164+ self ._create_dataset_description (
165+ ds_url , ds_created_ms , ds_created_ms = ds_created_ms )
159166
160167 base_id = urljoin (self .dataset_id , base )
161168
@@ -172,10 +179,6 @@ def _compile_dataset(self, name, result):
172179 modified_ms = None
173180 fpath = urlparse (nodeid ).path [1 :]
174181
175- if self .no_records :
176- self .write (node , fpath )
177- continue
178-
179182 meta = node .pop ('meta' , None )
180183 if meta :
181184 if 'created' in meta :
@@ -189,10 +192,25 @@ def _compile_dataset(self, name, result):
189192 node ,
190193 created_ms ,
191194 modified_ms ,
192- datasets = [self .dataset_id , ds_url ])
193- self .write (desc , fpath )
195+ datasets = [self .dataset_id , ds_url ],
196+ ds_created_ms = ds_created_ms )
197+
198+ # Keep sameAs "fowards" form in meta even if no_records is used
199+ if self .no_records :
200+ meta = meta or {}
201+ sameas = meta .setdefault ('sameAs' , [])
202+ rec = desc ['@graph' ][0 ]
203+ if 'sameAs' in rec :
204+ sameas .append ({"@id" : rec ['@id' ]})
205+ for same in rec .get ('sameAs' , []):
206+ sameas .append (same )
207+ node ['meta' ] = meta
208+ self .write (node , fpath )
209+ else :
210+ self .write (desc , fpath )
194211
195- def _create_dataset_description (self , ds_url , created_ms , modified_ms = None , label = None ):
212+ def _create_dataset_description (self , ds_url , created_ms , modified_ms = None ,
213+ label = None , ds_created_ms = None ):
196214 if not label :
197215 label = ds_url .rsplit ('/' , 1 )[- 1 ]
198216 ds = {
@@ -211,7 +229,7 @@ def _create_dataset_description(self, ds_url, created_ms, modified_ms=None, labe
211229 return
212230
213231 desc = self ._to_node_description (ds , created_ms , modified_ms ,
214- datasets = {self .dataset_id , ds_url })
232+ datasets = {self .dataset_id , ds_url }, ds_created_ms = ds_created_ms )
215233
216234 record = desc ['@graph' ][0 ]
217235 if self .tool_id :
@@ -220,14 +238,16 @@ def _create_dataset_description(self, ds_url, created_ms, modified_ms=None, labe
220238 self .write (desc , ds_path )
221239
222240 def _to_node_description (self , node , created_ms ,
223- modified_ms = None , datasets = None ):
241+ modified_ms = None , datasets = None , ds_created_ms = None ):
224242 assert self .record_thing_link not in node
225243
226244 node_id = node ['@id' ]
227245
228246 record = OrderedDict ()
229247 record ['@type' ] = 'Record'
230- record ['@id' ] = self .generate_record_id (created_ms , node_id )
248+
249+ self .set_record_id (record , created_ms , node_id , ds_created_ms )
250+
231251 record [self .record_thing_link ] = {'@id' : node_id }
232252
233253 # Add provenance
@@ -241,9 +261,19 @@ def _to_node_description(self, node, created_ms,
241261
242262 return {'@graph' : items }
243263
244- def generate_record_id (self , created_ms , node_id ):
245- # FIXME: backwards_form=created_ms < 2015
246- slug = lxlslug .librisencode (created_ms , lxlslug .checksum (node_id ))
264+ def set_record_id (self , record , created_ms , node_id , ds_created_ms = None ):
265+ if ds_created_ms is None :
266+ ds_created_ms = created_ms
267+ backwards_form = ds_created_ms < self .last_backwards_id_time
268+ # TODO: use normal form and keep backwards_form as sameAs until "GC:able"?
269+ record ['@id' ] = self .generate_record_id (created_ms , node_id , backwards_form )
270+ if backwards_form :
271+ record ['sameAs' ] = [{'@id' : self .generate_record_id (created_ms , node_id )}]
272+
273+ def generate_record_id (self , created_ms , node_id , backwards_form = False ):
274+ slug = lxlslug .librisencode (
275+ created_ms , lxlslug .checksum (node_id ), backwards_form = backwards_form
276+ )
247277 return urljoin (self .system_base_iri , slug )
248278
249279 def write (self , node , name ):
0 commit comments