Skip to content

Commit df6bd68

Browse files
feat: ✨ add RequiredCheck (#122)
# Description This PR handles the special case of requiring a field to be non-null by subclassing `CustomCheck` to create `RequiredCheck`. I thought this was a nice way of splitting out the logic for required rules and that required rules were special enough to warrant their own class. I also thought it made sense to limit what kind of JSON paths the required rule can be applied to. It makes sense to mark a dict property (e.g. `$.resources[*].title`) as required because it is very obvious what it means for this field to be missing. - But marking an array item itself (e.g. `$.resources[*]` or `$.resources[2]`) as required makes less sense to me. "I want the second resource to exist in particular" is a weird constraint. If someone cares about the number of resources, they should be checking the length of the array. - Same for "vague" JSON paths like `$..title`: selecting all `title` fields under the root node and then checking if they exist feels circular and unnecessary. :arrow_right: So I restricted `RequiredCheck` to sensible paths only, which also made the code simpler. Closes #120 Needs an in-depth review. ## Checklist - [x] Formatted Markdown - [x] Ran `just run-all` --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent 81031bf commit df6bd68

File tree

7 files changed

+173
-48
lines changed

7 files changed

+173
-48
lines changed

_quarto.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@ quartodoc:
7474
contents:
7575
- name: Config
7676
- name: CustomCheck
77+
- name: RequiredCheck
7778
- name: Exclusion
7879

7980
- title: Output

src/check_datapackage/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
from .check import check
44
from .config import Config
5-
from .custom_check import CustomCheck
5+
from .custom_check import CustomCheck, RequiredCheck
66
from .examples import example_package_properties, example_resource_properties
77
from .exclusion import Exclusion
88
from .issue import Issue
@@ -13,6 +13,7 @@
1313
"Exclusion",
1414
"Issue",
1515
"CustomCheck",
16+
"RequiredCheck",
1617
"example_package_properties",
1718
"example_resource_properties",
1819
"check",

src/check_datapackage/check.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
from check_datapackage.config import Config
99
from check_datapackage.constants import DATA_PACKAGE_SCHEMA_PATH, GROUP_ERRORS
10-
from check_datapackage.custom_check import apply_custom_checks
10+
from check_datapackage.custom_check import apply_extensions
1111
from check_datapackage.exclusion import exclude
1212
from check_datapackage.internals import (
1313
_filter,
@@ -44,7 +44,7 @@ class for more details, especially about the default values.
4444
_set_should_fields_to_required(schema)
4545

4646
issues = _check_object_against_json_schema(properties, schema)
47-
issues += apply_custom_checks(config.custom_checks, properties)
47+
issues += apply_extensions(properties, config.custom_checks)
4848
issues = exclude(issues, config.exclusions, properties)
4949

5050
return sorted(set(issues))

src/check_datapackage/config.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from dataclasses import dataclass, field
22
from typing import Literal
33

4-
from check_datapackage.custom_check import CustomCheck
4+
from check_datapackage.custom_check import CustomCheck, RequiredCheck
55
from check_datapackage.exclusion import Exclusion
66

77

@@ -12,8 +12,8 @@ class Config:
1212
Attributes:
1313
exclusions (list[Exclusion]): Any issues matching any of Exclusion objects will
1414
be excluded (i.e., removed from the output of the check function).
15-
custom_checks (list[CustomCheck]): Custom checks listed here will be done in
16-
addition to checks defined in the Data Package standard.
15+
custom_checks (list[CustomCheck | RequiredCheck]): Custom checks listed here
16+
will be done in addition to checks defined in the Data Package standard.
1717
strict (bool): Whether to include "SHOULD" checks in addition to "MUST" checks
1818
from the Data Package standard. If True, "SHOULD" checks will also be
1919
included. Defaults to False.
@@ -31,14 +31,18 @@ class Config:
3131
message="Data Packages may only be licensed under MIT.",
3232
check=lambda license_name: license_name == "mit",
3333
)
34+
required_title_check = cdp.RequiredCheck(
35+
jsonpath="$.title",
36+
message="A title is required.",
37+
)
3438
config = cdp.Config(
3539
exclusions=[exclusion_required],
36-
custom_checks=[license_check],
40+
custom_checks=[license_check, required_title_check],
3741
)
3842
```
3943
"""
4044

4145
exclusions: list[Exclusion] = field(default_factory=list)
42-
custom_checks: list[CustomCheck] = field(default_factory=list)
46+
custom_checks: list[CustomCheck | RequiredCheck] = field(default_factory=list)
4347
strict: bool = False
4448
version: Literal["v1", "v2"] = "v2"
Lines changed: 89 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,21 @@
1+
import re
12
from dataclasses import dataclass
23
from typing import Any, Callable
34

45
from check_datapackage.internals import (
6+
DescriptorField,
57
_filter,
68
_flat_map,
9+
_get_direct_jsonpaths,
710
_get_fields_at_jsonpath,
811
_map,
912
)
1013
from check_datapackage.issue import Issue
1114

1215

13-
@dataclass
16+
@dataclass(frozen=True)
1417
class CustomCheck:
15-
"""A custom check to be done on a Data Package descriptor.
18+
"""A custom check to be done on Data Package metadata.
1619
1720
Attributes:
1821
jsonpath (str): The location of the field or fields the custom check applies to,
@@ -45,49 +48,101 @@ class CustomCheck:
4548
check: Callable[[Any], bool]
4649
type: str = "custom"
4750

51+
def apply(self, properties: dict[str, Any]) -> list[Issue]:
52+
"""Applies the custom check to the properties.
4853
49-
def apply_custom_checks(
50-
custom_checks: list[CustomCheck], descriptor: dict[str, Any]
51-
) -> list[Issue]:
52-
"""Checks the descriptor for all custom checks and creates issues if any fail.
54+
Args:
55+
properties: The properties to check.
5356
54-
Args:
55-
custom_checks: The custom checks to apply to the descriptor.
56-
descriptor: The descriptor to check.
57+
Returns:
58+
A list of `Issue`s.
59+
"""
60+
fields: list[DescriptorField] = _get_fields_at_jsonpath(
61+
self.jsonpath,
62+
properties,
63+
)
64+
matches: list[DescriptorField] = _filter(
65+
fields,
66+
lambda field: not self.check(field.value),
67+
)
68+
return _map(
69+
matches,
70+
lambda field: Issue(
71+
jsonpath=field.jsonpath, type=self.type, message=self.message
72+
),
73+
)
5774

58-
Returns:
59-
A list of `Issue`s.
75+
76+
@dataclass(frozen=True)
77+
class RequiredCheck:
78+
"""Set a specific property as required.
79+
80+
Attributes:
81+
jsonpath (str): The location of the field or fields, expressed in [JSON
82+
path](https://jg-rp.github.io/python-jsonpath/syntax/) notation, to which
83+
the check applies (e.g., `$.resources[*].name`).
84+
message (str): The message that is shown when the check fails.
85+
86+
Examples:
87+
```{python}
88+
import check_datapackage as cdp
89+
required_title_check = cdp.RequiredCheck(
90+
jsonpath="$.title",
91+
message="A title is required.",
92+
)
93+
```
6094
"""
61-
return _flat_map(
62-
custom_checks,
63-
lambda custom_check: _apply_custom_check(custom_check, descriptor),
64-
)
6595

96+
jsonpath: str
97+
message: str
98+
99+
def apply(self, properties: dict[str, Any]) -> list[Issue]:
100+
"""Applies the required check to the properties.
101+
102+
Args:
103+
properties: The properties to check.
104+
105+
Returns:
106+
A list of `Issue`s.
107+
"""
108+
# TODO: check jsonpath when checking other user input
109+
field_name_match = re.search(r"(?<!\.)(\.\w+)$", self.jsonpath)
110+
if not field_name_match:
111+
return []
112+
field_name = field_name_match.group(1)
113+
114+
matching_paths = _get_direct_jsonpaths(self.jsonpath, properties)
115+
indirect_parent_path = self.jsonpath.removesuffix(field_name)
116+
direct_parent_paths = _get_direct_jsonpaths(indirect_parent_path, properties)
117+
missing_paths = _filter(
118+
direct_parent_paths,
119+
lambda path: f"{path}{field_name}" not in matching_paths,
120+
)
121+
return _map(
122+
missing_paths,
123+
lambda path: Issue(
124+
jsonpath=path + field_name,
125+
type="required",
126+
message=self.message,
127+
),
128+
)
66129

67-
def _apply_custom_check(
68-
custom_check: CustomCheck, descriptor: dict[str, Any]
69-
) -> list[Issue]:
70-
"""Applies the custom check to the descriptor.
71130

72-
If any fields fail the custom check, this function creates a list of issues
73-
for those fields.
131+
def apply_extensions(
132+
properties: dict[str, Any],
133+
# TODO: extensions: Extensions once Extensions implemented
134+
extensions: list[CustomCheck | RequiredCheck],
135+
) -> list[Issue]:
136+
"""Applies the extension checks to the properties.
74137
75138
Args:
76-
custom_check: The custom check to apply to the descriptor.
77-
descriptor: The descriptor to check.
139+
properties: The properties to check.
140+
extensions: The user-defined extensions to apply to the properties.
78141
79142
Returns:
80143
A list of `Issue`s.
81144
"""
82-
matching_fields = _get_fields_at_jsonpath(custom_check.jsonpath, descriptor)
83-
failed_fields = _filter(
84-
matching_fields, lambda field: not custom_check.check(field.value)
85-
)
86-
return _map(
87-
failed_fields,
88-
lambda field: Issue(
89-
jsonpath=field.jsonpath,
90-
type=custom_check.type,
91-
message=custom_check.message,
92-
),
145+
return _flat_map(
146+
extensions,
147+
lambda extension: extension.apply(properties),
93148
)

src/check_datapackage/internals.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,12 @@ def _get_fields_at_jsonpath(
2626
return _map(matches, _create_descriptor_field)
2727

2828

29+
def _get_direct_jsonpaths(jsonpath: str, descriptor: dict[str, Any]) -> list[str]:
30+
"""Returns all direct JSON paths that match a direct or indirect JSON path."""
31+
fields = _get_fields_at_jsonpath(jsonpath, descriptor)
32+
return _map(fields, lambda field: field.jsonpath)
33+
34+
2935
def _create_descriptor_field(match: JSONPathMatch) -> DescriptorField:
3036
return DescriptorField(
3137
jsonpath=match.path.replace("['", ".").replace("']", ""),

tests/test_custom_check.py

Lines changed: 64 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from check_datapackage.check import check
22
from check_datapackage.config import Config
3-
from check_datapackage.custom_check import CustomCheck
3+
from check_datapackage.custom_check import CustomCheck, RequiredCheck
44
from check_datapackage.examples import (
55
example_package_properties,
66
example_resource_properties,
@@ -54,12 +54,24 @@ def test_indirect_jsonpath():
5454

5555

5656
def test_multiple_custom_checks():
57-
properties = example_package_properties()
58-
properties["name"] = "ALLCAPS"
59-
properties["resources"][0]["name"] = "not starting with woolly"
57+
descriptor = example_package_properties()
58+
descriptor["name"] = "ALLCAPS"
59+
descriptor["resources"][0]["name"] = "not starting with woolly"
60+
del descriptor["version"]
61+
62+
version_check = RequiredCheck(
63+
jsonpath="$.version",
64+
message="Version is required.",
65+
)
6066

61-
config = Config(custom_checks=[lowercase_check, resource_name_check])
62-
issues = check(properties, config=config)
67+
config = Config(
68+
custom_checks=[
69+
lowercase_check,
70+
resource_name_check,
71+
version_check,
72+
]
73+
)
74+
issues = check(descriptor, config=config)
6375

6476
assert issues == [
6577
Issue(
@@ -72,6 +84,11 @@ def test_multiple_custom_checks():
7284
type=resource_name_check.type,
7385
message=resource_name_check.message,
7486
),
87+
Issue(
88+
jsonpath=version_check.jsonpath,
89+
type="required",
90+
message=version_check.message,
91+
),
7592
]
7693

7794

@@ -97,3 +114,44 @@ def test_no_matching_jsonpath():
97114
issues = check(properties, config=config)
98115

99116
assert issues == []
117+
118+
119+
def test_required_check_wildcard():
120+
descriptor = example_package_properties()
121+
id_check = RequiredCheck(
122+
jsonpath="$.*.id",
123+
message="All fields must have an id.",
124+
)
125+
config = Config(custom_checks=[id_check])
126+
127+
issues = check(descriptor, config=config)
128+
129+
assert len(issues) == 8
130+
131+
132+
def test_required_check_array_wildcard():
133+
descriptor = example_package_properties()
134+
descriptor["contributors"] = [
135+
{"path": "a/path"},
136+
{"path": "a/path"},
137+
{"path": "a/path", "name": "a name"},
138+
]
139+
name_check = RequiredCheck(
140+
jsonpath="$.contributors[*].name",
141+
message="Contributor name is required.",
142+
)
143+
config = Config(custom_checks=[name_check])
144+
issues = check(descriptor, config=config)
145+
146+
assert issues == [
147+
Issue(
148+
jsonpath="$.contributors[0].name",
149+
type="required",
150+
message=name_check.message,
151+
),
152+
Issue(
153+
jsonpath="$.contributors[1].name",
154+
type="required",
155+
message=name_check.message,
156+
),
157+
]

0 commit comments

Comments
 (0)