diff --git a/genData100.sh b/genData100.sh index e7f6ceda..c974cfcc 100755 --- a/genData100.sh +++ b/genData100.sh @@ -54,9 +54,9 @@ popd # Verify that schema files are valid pushd schema -python3 check_schemas.py $pwd +python3 check_schemas.py --schema_base $PWD # And check generated data against schemas. -python3 check_generated_data.py ../$TEMP_DIR/testData +python3 check_generated_data.py --schema_base ../$TEMP_DIR/testData popd ########## @@ -124,7 +124,7 @@ popd # Verify that test output matches schema. pushd schema -python3 check_test_output.py ../$TEMP_DIR/testOutput +python3 check_test_output.py --schema_base ../$TEMP_DIR/testOutput popd # Verify everything diff --git a/generateDataAndRun.sh b/generateDataAndRun.sh index 672b3195..f2f27057 100755 --- a/generateDataAndRun.sh +++ b/generateDataAndRun.sh @@ -56,9 +56,10 @@ popd # Verify that schema files are valid pushd schema -python3 check_schemas.py $pwd + +python3 check_schemas.py --schema_base $PWD # And check generated data against schemas. -python3 check_generated_data.py ../$TEMP_DIR/testData +python3 check_generated_data.py --schema_base ../$TEMP_DIR/testData popd ########## @@ -138,7 +139,7 @@ popd # Verify that test output matches schema. pushd schema -python3 check_test_output.py ../$TEMP_DIR/testOutput +python3 check_test_output.py --schema_base ../$TEMP_DIR/testOutput popd # Verify everything diff --git a/schema/check_generated_data.py b/schema/check_generated_data.py index df433de8..0d7779bd 100644 --- a/schema/check_generated_data.py +++ b/schema/check_generated_data.py @@ -7,6 +7,7 @@ from jsonschema import Draft7Validator, ValidationError +import argparse import logging import logging.config import os.path @@ -19,11 +20,19 @@ def main(args): logging.config.fileConfig("../logging.conf") + arg_parser = argparse.ArgumentParser(description='Schema check arguments') + arg_parser.add_argument('schema_base', help='Where to find the files to validate') + arg_parser.add_argument( + '--run_serial', action='store_true', + help='Set to process serially. Parallel is the default.') + + schema_options = arg_parser.parse_args(args[2:]) + if len(args) <= 1: logging.error('Please specify the path to test data directory') return else: - test_data_path = args[1] + test_data_path = schema_options.schema_base logging.debug('TEST DATA PATH = %s', test_data_path) @@ -44,12 +53,14 @@ def main(args): validator = schema_validator.ConformanceSchemaValidator() + validator.run_serial = schema_options.run_serial + # Todo: use setters to initialize validator validator.schema_base = '.' validator.test_data_base = test_data_path validator.icu_versions = sorted(icu_versions) validator.test_types = ALL_TEST_TYPES - validator.debug = 1 + validator.debug = None all_results = validator.validate_test_data_with_schema() logging.info(' %d results for generated test data', len(all_results)) @@ -101,6 +112,5 @@ def main(args): logging.info("All %d generated test data files match with schema", schema_count) - if __name__ == "__main__": main(sys.argv) diff --git a/schema/check_schemas.py b/schema/check_schemas.py index c16865cc..211d2b06 100644 --- a/schema/check_schemas.py +++ b/schema/check_schemas.py @@ -1,6 +1,7 @@ # Schema checker for the schemas in Conformance Testing # For ICU Conformance project, Data Driven Testing +import argparse from datetime import datetime import glob import json @@ -15,7 +16,6 @@ import schema_validator from schema_files import ALL_TEST_TYPES - class ValidateSchema: def __init__(self, schema_base='.'): self.schema_base = schema_base @@ -60,18 +60,26 @@ def save_schema_validation_summary(self, validation_status): return output_filename -def parallel_validate_schema(validator, file_names): - num_processors = multiprocessing.cpu_count() - logging.info('Schema validation: %s processors for %s schema validations', num_processors, len(file_names)) +def validate_all_schema(validator, file_names): + if validator.options.run_serial: + results = [] + logging.info('Schema serial validation of %s files!', + len(file_names)) + return [validator.validate_schema_file(file) for file in file_names] + else: + num_processors = multiprocessing.cpu_count() + logging.info('Schema parallel validation: %s processors for %s schema validations', + num_processors, len(file_names)) + + processor_pool = multiprocessing.Pool(num_processors) + # How to get all the results + result = None + try: + result = processor_pool.map(validator.validate_schema_file, file_names) + except multiprocessing.pool.MaybeEncodingError as error: + pass + return result - processor_pool = multiprocessing.Pool(num_processors) - # How to get all the results - result = None - try: - result = processor_pool.map(validator.validate_schema_file, file_names) - except multiprocessing.pool.MaybeEncodingError as error: - pass - return result def main(args): @@ -79,18 +87,22 @@ def main(args): logger.setLevel(logging.INFO) logger.info('+++ Test JSON Schema files') + arg_parser = argparse.ArgumentParser(description='Schema check arguments') + arg_parser.add_argument('schema_base', help='Where to find the files to validate') + arg_parser.add_argument( + '--run_serial', action='store_true', + help='Set to process serially. Parallel is the default.') + validator = schema_validator.ConformanceSchemaValidator() + # Todo: use setters to initialize validator - validator.schema_base = '.' + validator.options = arg_parser.parse_args(args[2:]) - if len(args) > 1: - schema_base = args[1] - else: - schema_base = '.' + schema_base = validator.options.schema_base schema_errors = [] schema_count = 0 - val_schema = ValidateSchema(schema_base) + val_schema = ValidateSchema(validator.options.schema_base) # An array of information to be reported on the main DDT page validation_status = [] @@ -101,7 +113,7 @@ def main(args): schema_file_names = glob.glob(schema_test_json_files) schema_file_paths.extend(schema_file_names) - results = parallel_validate_schema(validator, schema_file_paths) + results = validate_all_schema(validator, schema_file_paths) if not results: # This should stop the whole thing! exit(1) diff --git a/schema/check_test_output.py b/schema/check_test_output.py index 5d45d676..e026d146 100644 --- a/schema/check_test_output.py +++ b/schema/check_test_output.py @@ -5,6 +5,7 @@ import glob import json +import argparse import logging import logging.config import os.path @@ -18,12 +19,16 @@ def main(args): logging.config.fileConfig("../logging.conf") - if len(args) <= 1: - logging.error('Please specify the path to the test output directory') - sys.exit(1) - else: - test_output_path = args[1] + arg_parser = argparse.ArgumentParser(description='Schema check arguments') + arg_parser.add_argument('schema_base', help='Where to find the files to validate') + arg_parser.add_argument( + '--run_serial', action='store_true', + help='Set to process serially. Parallel is the default.') + + schema_options = arg_parser.parse_args(args[2:]) + # file_base + output_path + test_output_path = schema_options.schema_base logging.debug('TEST OUTPUT PATH = %s', test_output_path) logger = logging.Logger("Checking Test Data vs. Schemas LOGGER") @@ -43,6 +48,7 @@ def main(args): executor_set.add(os.path.basename(path)) icu_path = os.path.join(test_output_path, '*', 'icu*') + icu_dirs = glob.glob(icu_path) test_output_json_path = os.path.join(test_output_path, '*', 'icu*', '*.json') @@ -63,8 +69,11 @@ def main(args): logging.debug('ICU directories = %s', icu_versions) logging.debug('test types = %s', ALL_TEST_TYPES) + validator = schema_validator.ConformanceSchemaValidator() - # Todo: use setters to initialize validator + + # TODO: use setters to initialize validator + validator.run_serial = schema_options.run_serial validator.schema_base = '.' validator.test_output_base = test_output_path validator.test_data_base = None diff --git a/schema/schema_validator.py b/schema/schema_validator.py index 7c0e1fa6..325875eb 100644 --- a/schema/schema_validator.py +++ b/schema/schema_validator.py @@ -17,6 +17,7 @@ import schema_files from schema_files import SCHEMA_FILE_MAP + # ?? Move to the initialization ch = logging.StreamHandler() ch.setLevel(logging.INFO) @@ -24,18 +25,6 @@ # Given a directory, validate JSON files against expected schema - -def parallel_validate_schema(validator, file_names): - num_processors = mp.cpu_count() - logging.info('JSON validation: %s processors for %s plans', num_processors, len(file_names)) - - # How to get all the results - processor_pool = mp.Pool(num_processors) - with processor_pool as p: - result = p.map(validator.validate_schema_file, file_names) - return result - - class ConformanceSchemaValidator: def __init__(self): # Where to find these files @@ -46,6 +35,9 @@ def __init__(self): self.executors = [] self.icu_versions = [] self.debug_leve = 0 + self.schema_type = None + + self.run_serial = False logging.config.fileConfig("../logging.conf") @@ -67,7 +59,8 @@ def validate_json_file(self, schema_and_data_paths): try: schema_file = open(schema_file_path, encoding='utf-8', mode='r') except FileNotFoundError as err: - logging.fatal(' Cannot open schema file %s.\n Err = %s', schema_file_path, err) + logging.fatal('%s Cannot open schema file %s.\n Err = %s', + self.schema_type, schema_file_path, err) result_data['result'] = False result_data['error'] = err exit(1) @@ -75,7 +68,8 @@ def validate_json_file(self, schema_and_data_paths): try: data_file = open(data_file_path, encoding='utf-8', mode='r') except FileNotFoundError as err: - logging.fatal(' Cannot open data file %s.\n Err = %s', data_file_path, err) + logging.fatal('%s Cannot open data file %s.\n Err = %s', + self.schema_type, data_file_path, err) result_data['result'] = False result_data['error'] = err exit(1) @@ -86,7 +80,8 @@ def validate_json_file(self, schema_and_data_paths): except json.decoder.JSONDecodeError as err: result_data['result'] = False result_data['error'] = err - logging.error('Bad JSON schema: %s', schema_file_path) + logging.error('%s: Bad JSON schema: %s', + self.schema_type, schema_file_path) logging.fatal(' Error is %s', err) exit(1) @@ -96,7 +91,8 @@ def validate_json_file(self, schema_and_data_paths): # Cannot get the file result_data['result'] = False result_data['error'] = err - logging.error('Bad JSON data: %s', data_file_path) + logging.error('%s: Bad JSON data: %s', + self.schema_type, data_file_path) logging.fatal(' Error is %s', err) exit(1) @@ -108,15 +104,17 @@ def validate_json_file(self, schema_and_data_paths): except exceptions.ValidationError as err: result_data['result'] = False result_data['error'] = err - logging.error('ValidationError for test output %s and schema %s', - data_file_path, schema_file_path) + logging.error('%s: ValidationError for test output %s and schema %s', + self.schema_type, data_file_path, schema_file_path) logging.fatal(' Error = %s', err) exit(1) except exceptions.SchemaError as err: result_data['result'] = False result_data['error'] = err - logging.error('SchemaError: Cannot validate with test output %s and schema %s. ', - data_file_path, schema_file_path) + logging.error( + '%s SchemaError: Cannot validate with test output %s and schema %s. ', + self.schema_type, + data_file_path, schema_file_path) logging.fatal('Another failure: %s', err) exit(1) @@ -129,8 +127,13 @@ def validate_test_data_with_schema(self): # Check for all the possible files json_file_pattern = os.path.join(self.test_data_base, '*', '*.json') + logging.debug('JSON FILE_PATTERN: %s', json_file_pattern); + verify_pattern = os.path.join(self.test_data_base, '*', '*verify.json') + logging.debug('VERIFY_PATTERN: %s', verify_pattern); + json_verify_files_list = glob.glob(verify_pattern) + json_files_list = glob.glob(json_file_pattern) json_test_list = [] for file in json_files_list: @@ -141,17 +144,20 @@ def validate_test_data_with_schema(self): for test_type in self.test_types: for icu_version in self.icu_versions: file_path_pair = self.get_schema_data_info(icu_version, test_type) + logging.debug(' FILE PATH PAIR: %s', file_path_pair); if file_path_pair: schema_test_info.append(file_path_pair) else: test_data_files_not_found.append([icu_version, test_type]) - logging.debug('No data test file %s for %s, %s', file_path_pair, test_type, icu_version) + logging.debug('%s: No data test file %s for %s, %s', + self.schema_type, + file_path_pair, test_type, icu_version) pass if test_data_files_not_found: - logging.info('Note: %d potential test data sets were not found.', len(test_data_files_not_found)) + logging.debug('Note: %d potential test data sets were not found.', len(test_data_files_not_found)) - results = self.parallel_check_test_data_schema(schema_test_info) + results = self.check_all_test_data_schema(schema_test_info) for result_data in results: if not result_data['data_file_name']: @@ -165,17 +171,22 @@ def validate_test_data_with_schema(self): all_results.append(result_data) return all_results - def parallel_check_test_data_schema(self, schema_test_data): - num_processors = mp.cpu_count() - logging.info('Schema validation: %s processors for %s schema/test data pairs', - num_processors, - len(schema_test_data)) + def check_all_test_data_schema(self, schema_test_data): + if self.run_serial: + logging.info('test data running serially on %s files!', len(schema_test_data)) + return [self.check_test_data_against_schema(test_data) for test_data in schema_test_data] + else: + num_processors = mp.cpu_count() + logging.info('test data parallel validation: %s processors for %s schema/test data pairs', + num_processors, + len(schema_test_data)) + + # Returns all the results + processor_pool = mp.Pool(num_processors) + with processor_pool as p: + result = p.map(self.check_test_data_against_schema, schema_test_data) + return result - # Returns all the results - processor_pool = mp.Pool(num_processors) - with processor_pool as p: - result = p.map(self.check_test_data_against_schema, schema_test_data) - return result def get_schema_data_info(self, icu_version, test_type): # Gets pairs of schema and file names for test_type @@ -217,7 +228,8 @@ def check_test_data_against_schema(self, schema_info): def check_test_data_schema(self, icu_version, test_type): # Check the generated test data for structure against the schema - logging.debug('Validating %s with %s', test_type, icu_version) + logging.debug('Validating %s %s with %s', + self.schema_type, test_type, icu_version) # Check test output vs. the test data schema schema_verify_file = os.path.join(self.schema_base, test_type, 'test_schema.json') @@ -330,16 +342,17 @@ def validate_schema_file(self, schema_file_path): test_type = test_type_property['const'] except KeyError as err: test_type = None - logging.fatal('%s for %s. Cannot get test_type value', err, schema_file_path, test_type) + logging.fatal('%s for %s. Cannot get test_type value', + err, schema_file_path, test_type) return [False, err, schema_file_path, test_type] - logging.info('Checking schema %s', schema_file_path) try: # With just a schema, it validates the schema. # However Validator.check_schema doesn't fail as expected. validate(None, schema) except jsonschema.exceptions.SchemaError as err: - logging.fatal('Cannot validate schema %s', schema_file_path) + logging.fatal('%s" Cannot validate schema %s', + self.schema_type, schema_file_path) return [False, err, schema_file_path, test_type] except jsonschema.exceptions.ValidationError: # This is not an error because this is just validating a schema. @@ -369,18 +382,23 @@ def check_schema_files(self): return schema_errors - def validate_test_output_parallel(self): + def validate_all_test_output(self): test_validation_plans = self.get_test_validation_plans() - num_processors = mp.cpu_count() - logging.info('JSON test output validation: %s processors for %s plans', num_processors, - len(test_validation_plans)) + if self.run_serial: + logging.info('JSON test output serially validation on %s files!', len(test_validation_plans)) + return [self.validate_json_file(test_data) for test_data in test_validation_plans], \ + test_validation_plans + else: + num_processors = mp.cpu_count() + logging.info('JSON test output parallel validation: %s processors for %s plans', num_processors, + len(test_validation_plans)) - # How to get all the results - processor_pool = mp.Pool(num_processors) - with processor_pool as p: - results = p.map(self.validate_json_file, test_validation_plans) + # How to get all the results + processor_pool = mp.Pool(num_processors) + with processor_pool as p: + results = p.map(self.validate_json_file, test_validation_plans) - return results, test_validation_plans + return results, test_validation_plans def get_test_validation_plans(self): test_validation_plans = [] @@ -393,7 +411,7 @@ def get_test_validation_plans(self): return test_validation_plans def validate_test_output_with_schema(self): - return self.validate_test_output_parallel() + return self.validate_all_test_output() def process_args(args): diff --git a/testdriver/ddtargs.py b/testdriver/ddtargs.py index 0a91a5b0..8759d73d 100644 --- a/testdriver/ddtargs.py +++ b/testdriver/ddtargs.py @@ -35,15 +35,16 @@ def __init__(self): type_options = ['collation', 'datetime_fmt', 'decimal_fmt', 'display_names', 'number_fmt', 'lang_names', 'likely_subtags', 'list_fmt', - 'message_fmt2', 'rdt_fmt', 'plural_rules', - 'segmenter', + 'message_fmt2', 'rdt_fmt', 'plural_rules', 'segmenter', 'ALL'] +# Executor ids refering to ordering platforms in the dashboard. # Note that spaces in an executor name are replaced by '_' here. # component_count is an option to sort by number of test types present in test output, with largest cout at the left # TODO: when a new platform is added, put it in this option list. platform_order_options = ['alphabetic', 'component_count', 'ICU4C', 'ICU4J', 'ICU4X', 'NodeJS', 'Dart_Web', 'Dart_Native'] + class DdtArgs(): def __init__(self, args): self.options = None # A simple namespace with each field @@ -66,10 +67,6 @@ def __init__(self, args): self.parser.add_argument('--noverify', default=None) # self.parser.add_argument('--custom_verifier', default=None) # - self.parser.add_argument( - '--run_serial', default=None, - help='Set if execution should be done serially. Parallel is the default.') - self.options = self.parser.parse_args(args) def parse(self): @@ -100,19 +97,7 @@ def __init__(self, args): self.parser.add_argument('--test_verifier', help='Flag to run in test mode', default=None) - self.parser.add_argument('--run_serial', default=None, - help='Set if execution should be done serially. Parallel is the default.') - - # Order the output columns - self.parser.add_argument( - '--platform_order', - action='extend', nargs='*', - choices=platform_order_options, - help='The order of the platforms in the Summary dashboard, e.g., NodeJS ICU4X Dart_Web', - default=None) - self.options = self.parser.parse_args(args) - return def getOptions(self): @@ -169,6 +154,19 @@ def setCommonArgs(parser): parser.add_argument('--ignore', default=None) + parser.add_argument( + '--run_serial', action='store_true', + help='Set to process serially. Default is parallel.') + + # Order the output columns + parser.add_argument( + '--platform_order', + action='extend', nargs='*', + choices=platform_order_options, + help='The order of the platforms in the Summary dashboard, e.g., NodeJS ICU4X Dart_Web', + default=None) + + def argsTestData(): tests = [ ['--test_type', 'collation'], @@ -176,8 +174,7 @@ def argsTestData(): ['--test_type', 'collation', '--test_type', 'decimal_fmt', 'number_fmt', 'display_names', 'lang_names', 'likely_subtags', - 'plural_rules', - 'segmenter'], + 'plural_rules'], ['--test', 'collation', 'ALL', 'decimal_fmt'], ['--test_type', 'datetime_fmt'], @@ -195,6 +192,7 @@ def argsTestData(): ] return tests + def main(args): argparse = DdtArgs() diff --git a/testgen/generators/base.py b/testgen/generators/base.py index 66b99b1c..b39077f7 100644 --- a/testgen/generators/base.py +++ b/testgen/generators/base.py @@ -136,7 +136,7 @@ def readFile(self, filename, version="", filetype="txt"): try: with codecs.open(path, "r", encoding="utf-8") as testdata: return json.load(testdata) if filetype == "json" else testdata.read() - except BaseException as err: + except Exception as err: logging.warning("** readFile: %s", err) return None