diff --git a/config_example.yml b/config_example.yml index 681daca3..5f6aae40 100644 --- a/config_example.yml +++ b/config_example.yml @@ -7,7 +7,9 @@ General: Regions: - Name: "Signal_region" Variable: "jet_pt" - Filter: "lep_charge > 0" + Filters: + - Name: "lepton_charge" + Filter: "lep_charge > 0" Binning: [200, 300, 400, 500, 600] Samples: diff --git a/docs/config.rst b/docs/config.rst index c0259011..cc4659a0 100644 --- a/docs/config.rst +++ b/docs/config.rst @@ -32,12 +32,16 @@ Details about the setting blocks: Common options: --------------- -.. jsonschema:: ../src/cabinetry/schemas/config.json#/definitions/template_setting +.. jsonschema:: ../src/cabinetry/schemas/config.json#/definitions/filters_setting -.. jsonschema:: ../src/cabinetry/schemas/config.json#/definitions/samples_setting +.. jsonschema:: ../src/cabinetry/schemas/config.json#/definitions/filter_setting .. jsonschema:: ../src/cabinetry/schemas/config.json#/definitions/samplepath_setting .. jsonschema:: ../src/cabinetry/schemas/config.json#/definitions/regions_setting +.. jsonschema:: ../src/cabinetry/schemas/config.json#/definitions/template_setting + +.. jsonschema:: ../src/cabinetry/schemas/config.json#/definitions/samples_setting + .. jsonschema:: ../src/cabinetry/schemas/config.json#/definitions/smoothing_setting diff --git a/src/cabinetry/configuration.py b/src/cabinetry/configuration.py index 4984396f..d0e635a8 100644 --- a/src/cabinetry/configuration.py +++ b/src/cabinetry/configuration.py @@ -4,7 +4,7 @@ import logging import pathlib import pkgutil -from typing import Any, Dict, List, Optional, Union +from typing import Any, Dict, List, Optional, TypeVar, Union import jsonschema import yaml @@ -15,6 +15,10 @@ log = logging.getLogger(__name__) +# used in _setting_to_list below +T = TypeVar("T", str, Dict[str, Any]) + + def load(file_path_string: Union[str, pathlib.Path]) -> Dict[str, Any]: """Loads, validates, and returns a config file from the provided path. @@ -99,7 +103,7 @@ def print_overview(config: Dict[str, Any]) -> None: log.info(f" {len(config['Systematics'])} Systematic(s)") -def _setting_to_list(setting: Union[str, List[str]]) -> List[str]: +def _setting_to_list(setting: Union[T, List[T]]) -> List[T]: """Converts a configuration setting to a list. The config allows for two ways of specifying some settings, for example samples. A @@ -108,10 +112,12 @@ def _setting_to_list(setting: Union[str, List[str]]) -> List[str]: converted to a list. Args: - setting (Union[str, List[str]]): name of single setting value or list of values + setting (Union[Union[str, Dict[str, Any]], List[Union[str, Dict[str, Any]]]]): + single setting value (string or dictionary) or list of values (each being + strings or dictionaries) Returns: - list: name(s) of sample(s) + list: values (strings or dictionaries) in list form """ if not isinstance(setting, list): setting = [setting] diff --git a/src/cabinetry/route.py b/src/cabinetry/route.py index b3e66f80..dcf6b4ad 100644 --- a/src/cabinetry/route.py +++ b/src/cabinetry/route.py @@ -13,15 +13,15 @@ log = logging.getLogger(__name__) -# type of a function processing templates, takes sample-region-systematic-template, -# returns None +# type of a function processing templates, takes general-region-sample-systematic- +# template, returns None # template can be "Up" / "Down" for variations, or None for nominal ProcessorFunc = Callable[ [Dict[str, Any], Dict[str, Any], Dict[str, Any], Optional[Literal["Up", "Down"]]], None, ] -# type of a user-defined function for template processing, takes sample-region- +# type of a user-defined function for template processing, takes general-region-sample- # systematic-template, returns a boost_histogram.Histogram # template can be any string (to match "Up" / "Down"), or None / "*" to match nominal UserTemplateFunc = Callable[ diff --git a/src/cabinetry/schemas/config.json b/src/cabinetry/schemas/config.json index 62704b75..45bbf035 100644 --- a/src/cabinetry/schemas/config.json +++ b/src/cabinetry/schemas/config.json @@ -80,6 +80,11 @@ "description": "folder to save histograms to and read histograms from", "type": "string" }, + "Filters": { + "description": "selection criteria to apply", + "$$description": "only affects ntuple inputs", + "$ref": "#/definitions/filters_setting" + }, "Fixed": { "description": "list of parameters to treat as constant in fits", "type": "array", @@ -145,9 +150,10 @@ }, "uniqueItems": true }, - "Filter": { - "description": "selection criteria to apply", - "type": "string" + "Filters": { + "description": "selection criteria to apply (can override filters at general level)", + "$$description": "only affects ntuple inputs", + "$ref": "#/definitions/filters_setting" }, "RegionPath": { "description": "(part of) path to file containing region", @@ -182,9 +188,10 @@ "description": "name of tree", "type": "string" }, - "Filter": { - "description": "selection criteria to apply (override for region setting)", - "type": "string" + "Filters": { + "description": "selection criteria to apply (can override filters at general and region level)", + "$$description": "only affects ntuple inputs", + "$ref": "#/definitions/filters_setting" }, "Weight": { "description": "weight to apply to events", @@ -307,9 +314,10 @@ "description": "variable to bin in (override for nominal setting)", "type": "string" }, - "Filter": { - "description": "selection criteria to apply (override for region / sample setting)", - "type": "string" + "Filters": { + "description": "selection criteria to apply (can override filters at general, region and sample level)", + "$$description": "only affects ntuple inputs", + "$ref": "#/definitions/filters_setting" }, "RegionPath": { "description": "(part of) path to file containing region (override for nominal setting)", @@ -420,6 +428,45 @@ } }, "additionalProperties": false + }, + "filters_setting": { + "title": "Filters setting", + "$$target": "#/definitions/filters_setting", + "description": "filter(s) to apply for histogram creation", + "oneOf": [ + { + "description": "single filter", + "$ref": "#/definitions/filter_setting" + }, + { + "description": "list of filters", + "type": "array", + "minItems": 1, + "items": { + "description": "single filter", + "$ref": "#/definitions/filter_setting" + }, + "uniqueItems": true + } + ] + }, + "filter_setting": { + "title": "Filter setting", + "$$target": "#/definitions/filter_setting", + "description": "filter setting, filters with the same name override each other", + "type": "object", + "required": ["Name", "Filter"], + "properties": { + "Name": { + "description": "name of filter (filters can be overridden by other filters with the same name)", + "type": "string" + }, + "Filter": { + "description": "filter to apply", + "type": "string" + } + }, + "additionalProperties": false } }, "additionalProperties": false diff --git a/src/cabinetry/templates/__init__.py b/src/cabinetry/templates/__init__.py index 034fc327..a5bba3a5 100644 --- a/src/cabinetry/templates/__init__.py +++ b/src/cabinetry/templates/__init__.py @@ -35,7 +35,10 @@ def build( # create an instance of the class doing the template building histogram_folder = pathlib.Path(config["General"]["HistogramFolder"]) general_path = config["General"]["InputPath"] - template_builder = builder._Builder(histogram_folder, general_path, method) + general_filters = config["General"].get("Filters", {}) + template_builder = builder._Builder( + histogram_folder, general_path, general_filters, method + ) match_func: Optional[route.MatchFunc] = None if router is not None: diff --git a/src/cabinetry/templates/builder.py b/src/cabinetry/templates/builder.py index 3c848e7d..61abbf03 100644 --- a/src/cabinetry/templates/builder.py +++ b/src/cabinetry/templates/builder.py @@ -135,6 +135,7 @@ def _variable( def _filter( + general: Dict[str, Any], region: Dict[str, Any], sample: Dict[str, Any], systematic: Dict[str, Any], @@ -148,6 +149,7 @@ def _filter( the sample-specific filter if both are provided. Args: + general (Dict[str, Any]): containing general configuration information region (Dict[str, Any]): containing all region information sample (Dict[str, Any]): containing all sample information systematic (Dict[str, Any]): containing all systematic information @@ -157,21 +159,32 @@ def _filter( Returns: Optional[str]: expression for the filter to be used, or None for no filtering """ - selection_filter = region.get("Filter", None) + selection_filters = {} - # check for sample-specific overrides - selection_filter_override = sample.get("Filter", None) - if selection_filter_override is not None: - selection_filter = selection_filter_override + # general options can set standard values for filter + for filter in configuration._setting_to_list(general.get("Filters", [])): + selection_filters.update({filter["Name"]: filter["Filter"]}) + + # regions can set default filters (general level not implemented yet) + for filter in configuration._setting_to_list(region.get("Filters", [])): + selection_filters.update({filter["Name"]: filter["Filter"]}) + + # samples can append to and override filters + for filter in configuration._setting_to_list(sample.get("Filters", [])): + selection_filters.update({filter["Name"]: filter["Filter"]}) # check whether a systematic is being processed if template is not None: - # determine whether the template has an override specified - selection_filter_override = utils._check_for_override( - systematic, template, "Filter" - ) - if selection_filter_override is not None: - selection_filter = selection_filter_override + # templates can append to and override filters + template_filters = systematic.get(template, {}).get("Filters", []) + for filter in configuration._setting_to_list(template_filters): + selection_filters.update({filter["Name"]: filter["Filter"]}) + + if selection_filters == {}: + return None + + # combine all filters + selection_filter = " & ".join([f"({f})" for f in selection_filters.values()]) return selection_filter @@ -261,17 +274,23 @@ class _Builder: """Handles the instructions for backends to create histograms.""" def __init__( - self, histogram_folder: pathlib.Path, general_path: str, method: str + self, + histogram_folder: pathlib.Path, + general_path: str, + general_filters: Dict[str, Any], + method: str, ) -> None: """Creates an instance, sets histogram folder, path template and method. Args: histogram_folder (pathlib.Path): folder to save the histograms to general_path (str): template for paths to input files for histogram building + general_filters (Dict[str, Any]): dictionary with general filters to apply method (str): backend to use for histogram production """ self.histogram_folder = histogram_folder self.general_path = general_path + self.general_filters = general_filters self.method = method def _create_histogram( @@ -303,7 +322,9 @@ def _create_histogram( variable = _variable(region, sample, systematic, template) bins = _binning(region) weight = _weight(region, sample, systematic, template) - selection_filter = _filter(region, sample, systematic, template) + selection_filter = _filter( + self.general_filters, region, sample, systematic, template + ) # obtain the histogram if self.method == "uproot": diff --git a/tests/templates/test_templates.py b/tests/templates/test_templates.py index f7043fdb..7d4da7b6 100644 --- a/tests/templates/test_templates.py +++ b/tests/templates/test_templates.py @@ -11,10 +11,10 @@ def test_build(mock_builder, mock_apply): config = {"General": {"HistogramFolder": "path/", "InputPath": "file.root"}} method = "uproot" - # no router + # no router or filter templates.build(config, method=method) assert mock_builder.call_args_list == [ - ((pathlib.Path("path/"), "file.root", method), {}) + ((pathlib.Path("path/"), "file.root", {}, method), {}) ] assert mock_apply.call_count == 1 config_call, func_call = mock_apply.call_args[0] @@ -22,9 +22,15 @@ def test_build(mock_builder, mock_apply): assert func_call._extract_mock_name() == "_Builder()._create_histogram" assert mock_apply.call_args[1] == {"match_func": None} - # including a router + # including a router and general filter + filter_dict = {"Name": "f", "Filter": "c"} + config["General"].update({"Filters": filter_dict}) mock_router = mock.MagicMock() templates.build(config, method=method, router=mock_router) + assert mock_builder.call_args == ( + (pathlib.Path("path/"), "file.root", filter_dict, method), + {}, + ) # verify wrapper was set assert ( diff --git a/tests/templates/test_templates_builder.py b/tests/templates/test_templates_builder.py index 2c703590..beadf6b5 100644 --- a/tests/templates/test_templates_builder.py +++ b/tests/templates/test_templates_builder.py @@ -1,5 +1,6 @@ import logging import pathlib +from typing import Optional from unittest import mock import boost_histogram as bh @@ -104,44 +105,93 @@ def test__variable(): def test__filter(): - # no override - assert builder._filter({"Filter": "jet_pt > 0"}, {}, {}, None) == "jet_pt > 0" + # no override, dict provided + assert ( + builder._filter( + {"Filters": {"Name": "f", "Filter": "jet_pt > 0"}}, {}, {}, {}, None + ) + == "(jet_pt > 0)" + ) + + # list of dictionaries without overrides + assert ( + builder._filter( + {"Filters": [{"Name": "a", "Filter": "c1"}, {"Name": "b", "Filter": "c2"}]}, + {}, + {}, + {}, + None, + ) + == "(c1) & (c2)" + ) # no filter - assert builder._filter({}, {}, {}, None) is None + assert builder._filter({}, {}, {}, {}, None) is None # systematic with override assert ( builder._filter( - {"Filter": "jet_pt > 0"}, + {"Filters": {"Name": "f", "Filter": "jet_pt > 0"}}, {}, - {"Name": "variation", "Up": {"Filter": "jet_pt > 100"}}, + {}, + { + "Name": "variation", + "Up": {"Filters": {"Name": "f", "Filter": "jet_pt > 100"}}, + }, "Up", ) - == "jet_pt > 100" + == "(jet_pt > 100)" ) # systematic without override assert ( - builder._filter({"Filter": "jet_pt > 0"}, {}, {"Name": "variation"}, "Up") - == "jet_pt > 0" + builder._filter( + {"Filters": {"Name": "f", "Filter": "jet_pt > 0"}}, + {}, + {}, + {"Name": "variation"}, + "Up", + ) + == "(jet_pt > 0)" + ) + + # sample-specific override for part of a region filter + assert ( + builder._filter( + {}, + {"Filters": [{"Name": "a", "Filter": "c1"}, {"Name": "b", "Filter": "c2"}]}, + {"Filters": {"Name": "b", "Filter": "c3"}}, + {}, + None, + ) + == "(c1) & (c3)" ) - # sample-specific override + # filter with region-, sample-, and systematic override assert ( - builder._filter({"Filter": "jet_pt > 0"}, {"Filter": "jet_pt > 100"}, {}, None) - == "jet_pt > 100" + builder._filter( + {"Filters": {"Name": "f", "Filter": "jet_pt > 0"}}, + {"Filters": {"Name": "f", "Filter": "jet_pt > 100"}}, + {"Filters": {"Name": "f", "Filter": "jet_pt > 200"}}, + { + "Name": "variation", + "Up": {"Filters": {"Name": "f", "Filter": "jet_pt > 300"}}, + }, + "Up", + ) + == "(jet_pt > 300)" ) - # sample-specific override, again overridden by systematic + # filters at all levels combining assert ( builder._filter( - {"Filter": "jet_pt > 0"}, - {"Filter": "jet_pt > 100"}, - {"Name": "variation", "Up": {"Filter": "jet_pt > 200"}}, + {"Filters": {"Name": "a", "Filter": "c1"}}, + {"Filters": {"Name": "b", "Filter": "c2"}}, + {"Filters": {"Name": "c", "Filter": "c3"}}, + {"Name": "variation", "Up": {"Filters": {"Name": "d", "Filter": "c4"}}}, "Up", ) - == "jet_pt > 200" + == "(c1) & (c2) & (c3) & (c4)" ) @@ -196,26 +246,31 @@ def test__binning(): def test__Builder(): - builder_instance = builder._Builder(pathlib.Path("path"), "file.root", "uproot") + builder_instance = builder._Builder( + pathlib.Path("path"), "file.root", {"Name": "f", "Filter": "c"}, "uproot" + ) assert builder_instance.histogram_folder == pathlib.Path("path") assert builder_instance.general_path == "file.root" + assert builder_instance.general_filters == {"Name": "f", "Filter": "c"} assert builder_instance.method == "uproot" @mock.patch("cabinetry.templates.utils._name_and_save") @mock.patch("cabinetry.histo.Histogram", return_value="cabinetry_histogram") @mock.patch("cabinetry.contrib.histogram_creator.with_uproot", return_value="histogram") +@mock.patch("cabinetry.templates.builder._filter", return_value="(x>3)") @mock.patch( "cabinetry.templates.builder._ntuple_paths", return_value=[pathlib.Path("path_to_ntuple")], ) def test__Builder_create_histogram( - mock_path, mock_uproot_builder, mock_histo, mock_save + mock_path, mock_filter, mock_uproot_builder, mock_histo, mock_save ): histogram_folder = pathlib.Path("path") general_path = "{SamplePath}" + general_filters = {"Name": "f", "Filter": "c"} # the binning [0] is not a proper binning, but simplifies the comparison - region = {"Name": "test_region", "Variable": "x", "Binning": [0], "Filter": "x>3"} + region = {"Name": "test_region", "Variable": "x", "Binning": [0]} sample = { "Name": "sample", "Tree": "tree", @@ -225,7 +280,9 @@ def test__Builder_create_histogram( systematic = {} template = "Up" - builder_instance = builder._Builder(histogram_folder, general_path, "uproot") + builder_instance = builder._Builder( + histogram_folder, general_path, general_filters, "uproot" + ) builder_instance._create_histogram(region, sample, systematic, template) # call to path creation @@ -233,11 +290,16 @@ def test__Builder_create_histogram( ((general_path, region, sample, systematic, template), {}) ] + # filter creation + assert mock_filter.call_args_list == [ + ((general_filters, region, sample, systematic, template), {}) + ] + # verify the backend call happened properly assert mock_uproot_builder.call_args_list == [ ( ([pathlib.Path("path_to_ntuple")], "tree", "x", [0]), - {"weight": "weight_mc", "selection_filter": "x>3"}, + {"weight": "weight_mc", "selection_filter": "(x>3)"}, ) ] @@ -260,7 +322,7 @@ def test__Builder_create_histogram( ] # other backends - builder_unknown = builder._Builder(histogram_folder, "{SamplePath}", "unknown") + builder_unknown = builder._Builder(histogram_folder, "{SamplePath}", {}, "unknown") with pytest.raises(NotImplementedError, match="unknown backend unknown"): builder_unknown._create_histogram(region, sample, systematic, template) @@ -273,10 +335,10 @@ def test__Builder__wrap_custom_template_builder(mock_save): systematic = {} histogram_folder = pathlib.Path("path") - def test_func(reg, sam, sys, tem): + def test_func(reg: dict, sam: dict, sys: dict, tem: Optional[str]): return histogram - builder_instance = builder._Builder(histogram_folder, "file.root", "uproot") + builder_instance = builder._Builder(histogram_folder, "file.root", {}, "uproot") wrapped_func = builder_instance._wrap_custom_template_builder(test_func) # check the behavior of the wrapped function @@ -288,7 +350,7 @@ def test_func(reg, sam, sys, tem): ] # wrapped function returns wrong type - def test_func_wrong_return(reg, sam, sys, tem): + def test_func_wrong_return(reg: dict, sam: dict, sys: dict, tem: Optional[str]): return None wrapped_func_wrong_return = builder_instance._wrap_custom_template_builder( diff --git a/tests/test_configuration.py b/tests/test_configuration.py index 98cb88a0..7bf6284e 100644 --- a/tests/test_configuration.py +++ b/tests/test_configuration.py @@ -21,7 +21,7 @@ def test_validate(): "HistogramFolder": "", "InputPath": "", }, - "Regions": [{"Name": "", "Filter": "", "Variable": "", "Binning": [0, 1]}], + "Regions": [{"Name": "", "Variable": "", "Binning": [0, 1]}], "Samples": [{"Name": "", "Tree": "", "Data": True}], "NormFactors": [{"Name": ""}], } @@ -34,7 +34,7 @@ def test_validate(): "HistogramFolder": "", "InputPath": "", }, - "Regions": [{"Name": "", "Filter": "", "Variable": "", "Binning": [0, 1]}], + "Regions": [{"Name": "", "Variable": "", "Binning": [0, 1]}], "Samples": [{"Name": "", "Tree": ""}], "NormFactors": [{"Name": ""}], } diff --git a/tests/test_route.py b/tests/test_route.py index b1820953..7493ddf0 100644 --- a/tests/test_route.py +++ b/tests/test_route.py @@ -1,5 +1,6 @@ import functools import logging +from typing import Optional from unittest import mock import boost_histogram as bh @@ -13,7 +14,7 @@ class ProcessorExamples: @staticmethod def get_example_template_builder(): def example_template_builder( - reg: dict, sam: dict, sys: dict, tem: str + reg: dict, sam: dict, sys: dict, tem: Optional[str] ) -> bh.Histogram: hist = bh.Histogram(bh.axis.Variable([0, 1]), storage=bh.storage.Weight()) yields = np.asarray([2]) @@ -224,7 +225,7 @@ def wrapper(reg, sam, sys, tem): # assert wrapped_builder == expected_wrap # compare instead the behavior for an example call - assert wrapped_builder({}, {}, {}, {}) == expected_wrap({}, {}, {}, {}) + assert wrapped_builder({}, {}, {}, "") == expected_wrap({}, {}, {}, "") def test_apply_to_all_templates(): @@ -232,8 +233,8 @@ def test_apply_to_all_templates(): # define a custom override function that logs its arguments when called override_call_args = [] - def match_func(reg: str, sam: str, sys: str, tem: str): - def f(reg: dict, sam: dict, sys: dict, tem: str): + def match_func(reg: str, sam: str, sys: str, tem: Optional[str]): + def f(reg: dict, sam: dict, sys: dict, tem: Optional[str]): override_call_args.append((reg, sam, sys, tem)) return f @@ -255,23 +256,23 @@ def f(reg: dict, sam: dict, sys: dict, tem: str): # check that the default function was called for all templates assert default_func.call_count == 3 assert default_func.call_args_list[0] == ( - ({"Name": "test_region"}, {"Name": "sample"}, {}, None), + (example_config["Regions"][0], example_config["Samples"][0], {}, None), {}, ) assert default_func.call_args_list[1] == ( ( - {"Name": "test_region"}, - {"Name": "sample"}, - {"Name": "var", "Type": "NormPlusShape"}, + example_config["Regions"][0], + example_config["Samples"][0], + example_config["Systematics"][1], "Up", ), {}, ) assert default_func.call_args_list[2] == ( ( - {"Name": "test_region"}, - {"Name": "sample"}, - {"Name": "var", "Type": "NormPlusShape"}, + example_config["Regions"][0], + example_config["Samples"][0], + example_config["Systematics"][1], "Down", ), {}, @@ -281,21 +282,21 @@ def f(reg: dict, sam: dict, sys: dict, tem: str): route.apply_to_all_templates(example_config, default_func, match_func=match_func) assert len(override_call_args) == 3 assert override_call_args[0] == ( - {"Name": "test_region"}, - {"Name": "sample"}, + example_config["Regions"][0], + example_config["Samples"][0], {}, None, ) assert override_call_args[1] == ( - {"Name": "test_region"}, - {"Name": "sample"}, - {"Name": "var", "Type": "NormPlusShape"}, + example_config["Regions"][0], + example_config["Samples"][0], + example_config["Systematics"][1], "Up", ) assert override_call_args[2] == ( - {"Name": "test_region"}, - {"Name": "sample"}, - {"Name": "var", "Type": "NormPlusShape"}, + example_config["Regions"][0], + example_config["Samples"][0], + example_config["Systematics"][1], "Down", ) @@ -307,4 +308,4 @@ def f(reg: dict, sam: dict, sys: dict, tem: str): route.apply_to_all_templates(example_config, default_func) # previously 3 calls of default_func, now one more for nominal template assert default_func.call_count == 4 - assert default_func.call_args_list[3][0][3] is None + assert default_func.call_args_list[3][0][3] is None # template is None