1- import pandas as pd
2- from typing import List , Union
3-
4- from metis .utils .result import DQResult
5- from metis .metric .metric import Metric
6-
7- class Consistency (Metric ):
8- def assess (self , data : pd .DataFrame , reference : Union [pd .DataFrame , None ] = None , metric_config : Union [str , None ] = None ) -> List [DQResult ]:
9- """
10- Assess the consistency of the data by checking data values against a set of rules.
11- Consistency metrics implemented according to the definition of Hinrichs, H. (2002). Doctoral dissertation, Universität Oldenburg.
12-
13- :param data: DataFrame to assess.
14- :param metric_config: Mandatory configuration for the metric, which at least contains a list of rules and optional weights.
15- :return: List of DQResult objects containing consistency results.
16- """
17- results = []
18- total_rows = len (data )
19- if metric_config is None or 'rules' not in metric_config :
20- raise ValueError ("metric_config must contain 'rules' key with a list of rules." )
21- rules = metric_config ['rules' ]
22- weight = metric_config ['weights' ] # TODO: set weights to 1 if not provided
23- valid_count = 0
24-
25- for idx , row in data .iterrows :
26- is_valid = all (rule (row ) for rule in rules )
27- if is_valid
28- valid_count += weight [idx ] if idx < len (weight ) else 1
29-
30- consistency = valid_count / total_rows if total_rows > 0 else 0
31-
32- result = DQResult (
33- mesTime = pd .Timestamp .now (),
34- DQvalue = consistency ,
35- DQdimension = "Consistency" ,
36- DQmetric = "Consistency" ,
37- columnNames = [column ],
38- )
39- results .append (result )
40-
41- return results
0 commit comments