@@ -274,9 +274,9 @@ def is_cachable(self, ref):
274274
275275 def get_cached_path (self , url ):
276276 fpath = self .cachedir / quote (url .replace (CACHE_SPAQRL_BASE , 'sparql/' ), safe = "" )
277- print (url , fpath )
278277 return fpath
279278
279+ # TODO: now unused; either remove or use inside of cached_rdf (for more control)
280280 def cache_url (self , url , maxcache = MAX_CACHE ):
281281 path = self .get_cached_path (url )
282282 mtime = path .stat ().st_mtime if path .exists () else None
@@ -317,7 +317,9 @@ def cached_rdf(self, fpath, construct=None, graph=None):
317317 print ("No cache directory configured" , file = sys .stderr )
318318 elif construct :
319319 fpath = self .get_cached_path (fpath + '.ttl' )
320- if not fpath .is_file ():
320+
321+ if not (fpath .is_file () and fpath .stat ().st_size > 0 ):
322+ print (f'Caching result of { construct } as { fpath } ' , file = sys .stderr )
321323 with self .path (construct ).open () as fp :
322324 try :
323325 res = (graph or Graph ()).query (fp .read ())
@@ -327,14 +329,17 @@ def cached_rdf(self, fpath, construct=None, graph=None):
327329 except Exception as e :
328330 print (f'Failed to cache { fpath } : { e } ' , file = sys .stderr )
329331 else :
332+ print (f'Using cached { fpath } as result of { construct } ' , file = sys .stderr )
330333 source .parse (str (fpath ), format = 'turtle' )
334+
331335 return source
332336
333337 elif self .is_cachable (fpath ):
334338 remotepath = fpath
335339 fpath = self .get_cached_path (fpath + '.ttl' )
336- print (f'Using cached { fpath } for { remotepath } ' , file = sys .stderr )
337- if not fpath .is_file ():
340+
341+ if not (fpath .is_file () and fpath .stat ().st_size > 0 ):
342+ print (f'Caching { remotepath } as { fpath } ' , file = sys .stderr )
338343 fpath .parent .mkdir (parents = True , exist_ok = True )
339344 try :
340345 # At least rdaregistry is *very* picky about what is asked for,
@@ -347,6 +352,7 @@ def cached_rdf(self, fpath, construct=None, graph=None):
347352 source .serialize (str (fpath ), format = 'turtle' )
348353 return source
349354 else :
355+ print (f'Using cached { fpath } for { remotepath } ' , file = sys .stderr )
350356 return source .parse (str (fpath ), format = 'turtle' )
351357
352358 fmt = 'nt' if fpath .endswith ('.nt' ) else None
@@ -414,7 +420,7 @@ def _digest_source_data(src):
414420 source ['query' ] = src ['dataQuery' ]['uri' ]
415421 elif '@id' in src :
416422 assert 'source' not in source
417- source ['source' ] = str (src ['@id' ]) # TODO: bug in rdflib; URIRef in the json-ld
423+ source ['source' ] = str (src ['@id' ]) # TODO: bug in rdflib; URIRef in the JSON-LD
418424 elif 'uri' in src :
419425 instruct = 'result' if 'sourceData' in src else 'source'
420426 assert instruct not in source
@@ -425,7 +431,7 @@ def _digest_source_data(src):
425431 if 'representationOf' in src :
426432 instruct = 'dataset'
427433 assert instruct not in source
428- source [instruct ] = src ['representationOf' ]['@id' ]
434+ source [instruct ] = str ( src ['representationOf' ]['@id' ]) # TODO: (same as above)
429435 unhandled = False
430436
431437 for part in _aslist (src .get ('sourceData' )):
@@ -497,7 +503,7 @@ def _construct(compiler, sources, query=None):
497503 if isinstance (sourcedfn , str ):
498504 sourcedfn = {'source' : sourcedfn }
499505
500- source = sourcedfn .get ('source' , [] )
506+ source = sourcedfn .get ('source' ) or sourcedfn . get ( 'dataset' )
501507 graph = dataset .get_context (URIRef (sourcedfn .get ('dataset' ) or source ))
502508 if isinstance (source , (dict , list )):
503509 # TODO: was currently unused, and not yet supported in the data-driven form.
0 commit comments