@@ -318,171 +318,6 @@ def _save_plot(fig: Figure, plot_dir: Path, name: str) -> None:
318318 console .print (f" Saved: { plot_dir / name } .{{pdf,jpg}}" )
319319
320320
321- def plot_vm_comparison (tal_vm : List [dict ], ostrich_raw_files : List [Path ], plot_dir : Path ) -> None :
322- tal_data = load_tal_vm_by_version (tal_vm )
323- fig , ax = plt .subplots (figsize = (8 , 5 ))
324- versions = sorted (tal_data .keys ())
325- ax .plot (versions , [tal_data [v ] for v in versions ], label = "TAL" , marker = "" , linewidth = 1.5 )
326- if any (f .exists () for f in ostrich_raw_files ):
327- ost_data = load_ostrich_vm_by_version (ostrich_raw_files )
328- ost_versions = sorted (ost_data .keys ())
329- ax .plot (ost_versions , [ost_data [v ] for v in ost_versions ], label = "OSTRICH" , marker = "" , linewidth = 1.5 )
330- _format_log_axis (ax )
331- ax .set_xlabel ("Version" )
332- ax .set_ylabel ("Lookup time (ms)" )
333- ax .set_title ("VM: median across all triple patterns" )
334- ax .legend ()
335- ax .grid (True , alpha = 0.3 )
336- _save_plot (fig , plot_dir , "vm_comparison" )
337-
338-
339- def plot_dm_comparison (tal_dm : List [dict ], ostrich_raw_files : List [Path ], plot_dir : Path ) -> None :
340- tal_data = load_tal_dm_by_version (tal_dm )
341- fig , ax = plt .subplots (figsize = (8 , 5 ))
342- versions = sorted (tal_data .keys ())
343- ax .plot (versions , [tal_data [v ] for v in versions ], label = "TAL" , marker = "o" , linewidth = 1.5 , markersize = 4 )
344- if any (f .exists () for f in ostrich_raw_files ):
345- ost_data = load_ostrich_dm_by_version (ostrich_raw_files )
346- ost_versions = sorted (ost_data .keys ())
347- ax .plot (ost_versions , [ost_data [v ] for v in ost_versions ], label = "OSTRICH" , marker = "" , linewidth = 1.5 )
348- _format_log_axis (ax )
349- ax .set_xlabel ("Version (delta from V0)" )
350- ax .set_ylabel ("Lookup time (ms)" )
351- ax .set_title ("DM: median across all triple patterns from V0" )
352- ax .legend ()
353- ax .grid (True , alpha = 0.3 )
354- _save_plot (fig , plot_dir , "dm_comparison" )
355-
356-
357- def plot_vq_comparison (tal_vq : List [dict ], ostrich_raw_files : List [Path ], plot_dir : Path ) -> None :
358- systems = ["TAL" ]
359- values = [load_tal_vq_median (tal_vq )]
360- if any (f .exists () for f in ostrich_raw_files ):
361- systems .append ("OSTRICH" )
362- values .append (load_ostrich_vq_median (ostrich_raw_files ))
363- fig , ax = plt .subplots (figsize = (6 , 5 ))
364- bars = ax .bar (systems , values )
365- for bar , val in zip (bars , values ):
366- ax .text (bar .get_x () + bar .get_width () / 2 , bar .get_height (), f"{ val :.2f} " ,
367- ha = "center" , va = "bottom" , fontsize = 9 )
368- _format_log_axis (ax )
369- ax .set_ylabel ("Lookup time (ms)" )
370- ax .set_title ("VQ: median across all triple patterns" )
371- ax .grid (True , alpha = 0.3 , axis = "y" )
372- _save_plot (fig , plot_dir , "vq_comparison" )
373-
374-
375- def plot_by_pattern (tal_results : List [dict ], ostrich_raw_files : List [Path ],
376- query_type : str , load_tal_fn , load_ost_fn , plot_dir : Path ,
377- x_label : str , version_key : str ) -> None :
378- has_ostrich = any (f .exists () for f in ostrich_raw_files )
379- fig , axes = plt .subplots (1 , 2 , figsize = (14 , 5 ), sharey = True )
380- for i , pt in enumerate (["p" , "po" ]):
381- ax = axes [i ]
382- tal_data = load_tal_fn (tal_results , pattern_filter = pt )
383- versions = sorted (tal_data .keys ())
384- ax .plot (versions , [tal_data [v ] for v in versions ], label = "TAL" , linewidth = 1.5 )
385- if has_ostrich :
386- ost_data = load_ost_fn (ostrich_raw_files , pattern_filter = pt )
387- ost_versions = sorted (ost_data .keys ())
388- ax .plot (ost_versions , [ost_data [v ] for v in ost_versions ], label = "OSTRICH" , linewidth = 1.5 )
389- _format_log_axis (ax )
390- ax .set_xlabel (x_label )
391- ax .set_title (f"?{ 'P?' if pt == 'p' else 'PO' } patterns" )
392- ax .legend ()
393- ax .grid (True , alpha = 0.3 )
394- axes [0 ].set_ylabel ("Lookup time (ms)" )
395- fig .suptitle (f"{ query_type .upper ()} : median by pattern type" , fontsize = 13 )
396- fig .tight_layout ()
397- _save_plot (fig , plot_dir , f"{ query_type } _by_pattern" )
398-
399-
400- def generate_plots (data : dict , ostrich_raw_files : List [Path ], plot_dir : Path ,
401- disk_usage : dict [str , int | None ] | None = None ) -> None :
402- results = data .get ("results" , {})
403- vm_results = results .get ("vm" , [])
404- dm_results = results .get ("dm" , [])
405- vq_results = results .get ("vq" , [])
406-
407- if vm_results :
408- plot_vm_comparison (vm_results , ostrich_raw_files , plot_dir )
409- plot_by_pattern (vm_results , ostrich_raw_files , "vm" ,
410- load_tal_vm_by_version , load_ostrich_vm_by_version ,
411- plot_dir , "Version" , "version_index" )
412- if dm_results :
413- plot_dm_comparison (dm_results , ostrich_raw_files , plot_dir )
414- plot_by_pattern (dm_results , ostrich_raw_files , "dm" ,
415- load_tal_dm_by_version , load_ostrich_dm_by_version ,
416- plot_dir , "Version (delta from V0)" , "version_end" )
417- if vq_results :
418- plot_vq_comparison (vq_results , ostrich_raw_files , plot_dir )
419-
420- # Storage and memory
421- if disk_usage :
422- plot_storage_comparison (disk_usage , plot_dir )
423- plot_memory_comparison (data , plot_dir )
424-
425-
426- def plot_storage_comparison (disk_usage : dict [str , int | None ], plot_dir : Path ) -> None :
427- ocdm_ds = disk_usage ["ocdm_dataset_bytes" ]
428- ocdm_prov = disk_usage ["ocdm_provenance_bytes" ]
429- qlever = disk_usage ["qlever_index_bytes" ]
430- ostrich = disk_usage ["ostrich_store_bytes" ]
431-
432- if ocdm_ds is None and qlever is None and ostrich is None :
433- console .print (" [dim]Skipping storage_comparison (no data)[/dim]" )
434- return
435-
436- fig , ax = plt .subplots (figsize = (6 , 5 ))
437- systems = []
438- sizes_mb = []
439-
440- if ocdm_ds is not None or qlever is not None :
441- ocdm_total = (ocdm_ds or 0 ) + (ocdm_prov or 0 )
442- tal_total = ocdm_total + (qlever or 0 )
443- systems .append ("TAL\n (OCDM + QLever)" )
444- sizes_mb .append (tal_total / 1048576 )
445-
446- if ostrich is not None :
447- systems .append ("OSTRICH" )
448- sizes_mb .append (ostrich / 1048576 )
449-
450- bars = ax .bar (systems , sizes_mb , color = ["#1f77b4" , "#ff7f0e" ][:len (systems )])
451- for bar , val in zip (bars , sizes_mb ):
452- ax .text (bar .get_x () + bar .get_width () / 2 , bar .get_height (), f"{ val :.1f} MB" ,
453- ha = "center" , va = "bottom" , fontsize = 9 )
454- ax .set_ylabel ("Storage (MB)" )
455- ax .set_title ("Storage comparison" )
456- ax .grid (True , alpha = 0.3 , axis = "y" )
457- _save_plot (fig , plot_dir , "storage_comparison" )
458-
459-
460- def plot_memory_comparison (data : dict , plot_dir : Path ) -> None :
461- results = data .get ("results" , {})
462- query_types = []
463- medians_kb = []
464- for qt in ["vm" , "dm" , "vq" ]:
465- qt_results = results .get (qt , [])
466- valid = [r ["median_memory_bytes" ] for r in qt_results if r .get ("median_memory_bytes" ) is not None ]
467- if valid :
468- query_types .append (qt .upper ())
469- medians_kb .append (statistics .median (valid ) / 1024 )
470-
471- if not query_types :
472- console .print (" [dim]Skipping memory_comparison (no data)[/dim]" )
473- return
474-
475- fig , ax = plt .subplots (figsize = (6 , 5 ))
476- bars = ax .bar (query_types , medians_kb )
477- for bar , val in zip (bars , medians_kb ):
478- ax .text (bar .get_x () + bar .get_width () / 2 , bar .get_height (), f"{ val :.0f} KB" ,
479- ha = "center" , va = "bottom" , fontsize = 9 )
480- ax .set_ylabel ("Peak memory (KB)" )
481- ax .set_title ("TAL median peak memory by query type" )
482- ax .grid (True , alpha = 0.3 , axis = "y" )
483- _save_plot (fig , plot_dir , "memory_comparison" )
484-
485-
486321def generate_comparison_table (tal_results : dict , ocdm_timing_file : Path , qlever_timing_file : Path ) -> List [dict ]:
487322 rows = []
488323 for system_name , published in PUBLISHED_RESULTS .items ():
@@ -782,11 +617,6 @@ def main():
782617 console .print ()
783618 print_comparison_table (comparison )
784619
785- console .rule ("[bold]Generating plots" )
786- ostrich_raw_files = [DATA_DIR / f"ostrich_raw_{ pt } _{ args .granularity } .txt" for pt in ["p" , "po" ]]
787- plot_dir = output_dir / "plots"
788- generate_plots (data , ostrich_raw_files , plot_dir , disk_usage )
789-
790620
791621if __name__ == "__main__" :
792622 main ()
0 commit comments