88import logging
99import sys
1010import json
11+ from copy import deepcopy
1112from pathlib import Path
1213from typing import Any , List , Union , Dict , Optional
1314from urllib .parse import urlparse
1819from cwl_utils .parser import load_document_by_uri , InputArraySchemaTypes , \
1920 InputEnumSchemaTypes , InputRecordSchemaTypes , File , Directory , WorkflowInputParameter , InputRecordSchema , \
2021 InputEnumSchema , InputArraySchema , Workflow , CommandLineTool
21- from cwl_utils .utils import sanitise_schema_field , is_uri , to_pascal_case , get_value_from_uri , is_local_uri , \
22- load_schema_from_uri
22+ from cwl_utils .utils import sanitise_schema_field , is_uri , to_pascal_case , get_value_from_uri , is_local_uri
2323
2424_logger = logging .getLogger ("cwl-inputs-schema-gen" ) # pylint: disable=invalid-name
2525defaultStreamHandler = logging .StreamHandler () # pylint: disable=invalid-name
3535 "string" : "string" ,
3636 "int" : "integer" ,
3737 "float" : "number" ,
38+ "long" : "number" ,
39+ "double" : "number" ,
40+ "null" : "null"
3841}
3942
4043JSON_TEMPLATE_PATH = Path (__file__ ).parent .joinpath (
@@ -123,7 +126,11 @@ def generate_type_dict_from_type(self, type_item) -> Dict:
123126 return {
124127 "type" : PRIMITIVE_TYPES_MAPPING [type_item ]
125128 }
126- elif type_item in ["File" , "Directory" ]:
129+ elif type_item in ["stdin" ]:
130+ return {
131+ "$ref" : f"#/definitions/File"
132+ }
133+ elif type_item in ["File" , "Directory" , "Any" ]:
127134 return {
128135 "$ref" : f"#/definitions/{ type_item } "
129136 }
@@ -148,7 +155,7 @@ def generate_type_dict_from_type(self, type_item) -> Dict:
148155 return {
149156 "type" : "object" ,
150157 "properties" : {
151- get_value_from_uri (prop .id ): self .generate_type_dict_from_type (prop .type_ )
158+ get_value_from_uri (prop .name ): self .generate_type_dict_from_type (prop .type_ )
152159 for prop in type_item .fields
153160 }
154161 }
@@ -162,6 +169,16 @@ def generate_type_dict_from_type(self, type_item) -> Dict:
162169 }
163170 else :
164171 raise ValueError (f"Unknown type: { type_item } " )
172+ elif isinstance (type_item , List ):
173+ # Nested schema
174+ return {
175+ "oneOf" : list (
176+ map (
177+ lambda type_iter : self .generate_type_dict_from_type (type_iter ),
178+ type_item
179+ )
180+ )
181+ }
165182 else :
166183 raise ValueError (f"Unknown type: { type_item } " )
167184
@@ -222,7 +239,7 @@ def generate_json_schema_property_from_input_parameter(input_parameter: Workflow
222239 return JSONSchemaProperty (
223240 name = input_name ,
224241 type_ = input_parameter .type_ ,
225- description = doc
242+ description = doc if doc is not None else ""
226243 )
227244
228245
@@ -235,10 +252,15 @@ def generate_definition_from_schema(schema: InputRecordSchema) -> Dict:
235252
236253 # Sanitise each field of the schema
237254 sanitised_fields = {}
238- for field_key , field_value in schema .type_ .get ("fields" ).items ():
255+
256+ for field in schema .fields :
239257 sanitised_fields .update (
240258 {
241- field_key : sanitise_schema_field (field_value )
259+ get_value_from_uri (field .name ): sanitise_schema_field (
260+ {
261+ "type" : field .type_
262+ }
263+ )
242264 }
243265 )
244266
@@ -267,13 +289,13 @@ def generate_definition_from_schema(schema: InputRecordSchema) -> Dict:
267289 prop = JSONSchemaProperty (
268290 name = prop_name ,
269291 type_ = prop_obj .get ("type" ),
270- description = prop_obj .get ("doc" ),
292+ description = prop_obj .get ("doc" , "" ),
271293 required = required
272294 )
273295 property_list .append (prop )
274296
275297 return {
276- to_pascal_case (schema .type_ . get ( ' name' )): {
298+ to_pascal_case (get_value_from_uri ( schema .name )): {
277299 "type" : "object" ,
278300 "properties" : {
279301 prop .name : prop .type_dict
@@ -307,13 +329,41 @@ def cwl_to_jsonschema(cwl_obj: Union[Workflow, CommandLineTool]) -> Any:
307329 # Load in all $imports to be referred by complex input types
308330 workflow_schema_definitions_list = list (
309331 map (
310- lambda import_iter : generate_definition_from_schema (
311- load_schema_from_uri ( import_iter )
332+ lambda kv_schema_tuple_iter : generate_definition_from_schema (
333+ cwl_obj . loadingOptions . idx . get ( kv_schema_tuple_iter [ 1 ][ 0 ] )
312334 ),
313- cwl_obj .loadingOptions .imports
335+ filter (
336+ lambda idx_iter :
337+ isinstance (idx_iter [1 ][0 ], InputRecordSchemaTypes ) or
338+ isinstance (idx_iter [1 ][0 ], InputArraySchemaTypes ),
339+ cwl_obj .loadingOptions .idx
340+ )
314341 )
315342 )
316343
344+ if cwl_obj .requirements is not None :
345+ try :
346+ schema_def_requirement = next (
347+ filter (
348+ lambda requirement_iter : requirement_iter .class_ == "SchemaDefRequirement" ,
349+ cwl_obj .requirements
350+ )
351+ )
352+
353+ workflow_schema_definitions_list .extend (
354+ list (
355+ map (
356+ lambda schema_def_iter : generate_definition_from_schema (
357+ schema_def_iter
358+ ),
359+ schema_def_requirement .types
360+ )
361+ )
362+ )
363+
364+ except StopIteration :
365+ pass
366+
317367 # Convert schema definitions to dict
318368 workflow_schema_definitions_dict = {}
319369 for schema_definition in workflow_schema_definitions_list :
@@ -334,7 +384,19 @@ def cwl_to_jsonschema(cwl_obj: Union[Workflow, CommandLineTool]) -> Any:
334384 {
335385 "type" : "object" ,
336386 "properties" : {
337- prop .name : prop .type_dict
387+ prop .name : {
388+ "oneOf" : [
389+ {
390+ "type" : "null"
391+ },
392+ prop .type_dict
393+ ]
394+ }
395+ if prop .required is False
396+ else
397+ {
398+ prop .name : prop .type_dict
399+ }
338400 for prop in properties
339401 },
340402 "required" : [
@@ -350,6 +412,80 @@ def cwl_to_jsonschema(cwl_obj: Union[Workflow, CommandLineTool]) -> Any:
350412 workflow_schema_definitions_dict
351413 )
352414
415+ # Slim down the schema as required
416+ input_json_schema = slim_definitions (input_json_schema )
417+
418+ return input_json_schema
419+
420+
421+ def slim_definitions (input_json_schema : Dict ) -> Dict :
422+ """
423+ We have quite a few definitions that we're likely not using, particularly for a simple workflow.
424+
425+ Traverse the properties and return all definitions that are used.
426+
427+ Remove all other definitions
428+ """
429+
430+ # Traverse the properties and return all definitions that are used
431+ # https://stackoverflow.com/a/77537867/6946787
432+ def _recursive_search (json_data , target_key , result = None ):
433+ if result is None :
434+ result = []
435+
436+ if isinstance (json_data , dict ):
437+ for key , value in json_data .items ():
438+ if key == target_key :
439+ result .append (value )
440+ else :
441+ _recursive_search (value , target_key , result )
442+ elif isinstance (json_data , list ):
443+ for item in json_data :
444+ _recursive_search (item , target_key , result )
445+
446+ return result
447+
448+ # Get all the property dependencies
449+ def _get_all_ref_attributes (json_object ):
450+ return _recursive_search (json_object , "$ref" )
451+
452+ def get_property_dependencies (
453+ property_dict : Dict ,
454+ input_json_schema : Dict ,
455+ existing_property_dependencies : List = None
456+ ) -> List [str ]:
457+ # Initialise return list
458+ if existing_property_dependencies is None :
459+ existing_property_dependencies = []
460+
461+ # All reference attributes
462+ for reference_attribute in _get_all_ref_attributes (property_dict ):
463+ # Get the value from the reference attribute
464+ reference_value = get_value_from_uri (reference_attribute )
465+ # If the reference value is not in the existing property dependencies, add it
466+ if reference_value not in existing_property_dependencies :
467+ existing_property_dependencies .append (reference_value )
468+ # Get the property dependencies of the reference value
469+ existing_property_dependencies .extend (
470+ get_property_dependencies (
471+ input_json_schema ['definitions' ][reference_value ],
472+ input_json_schema ,
473+ existing_property_dependencies
474+ )
475+ )
476+
477+ return existing_property_dependencies
478+
479+ # Copy schema
480+ input_json_schema = deepcopy (input_json_schema )
481+
482+ # Get required definitions
483+ required_definitions = get_property_dependencies (input_json_schema .get ("properties" ), input_json_schema )
484+
485+ for definition_key in list (input_json_schema ['definitions' ].keys ()):
486+ if definition_key not in required_definitions :
487+ del input_json_schema ['definitions' ][definition_key ]
488+
353489 return input_json_schema
354490
355491
@@ -430,7 +566,7 @@ def run(args: argparse.Namespace) -> int:
430566 except Exception as e :
431567 _logger .exception ("Failed to generate JSON Schema from CWL inputs object. Error: %s" , e )
432568 return 1
433- args .output .write (json .dumps (jsonschema , indent = 2 ))
569+ args .output .write (json .dumps (jsonschema , indent = 2 ) + " \n " )
434570
435571 return 0
436572
0 commit comments