diff --git a/.gitignore b/.gitignore index 49c61fc..ee1f4fa 100644 --- a/.gitignore +++ b/.gitignore @@ -25,6 +25,7 @@ var/ *.egg-info/ .installed.cfg *.egg +.idea/ # PyInstaller # Usually these files are written by a python script from a template diff --git a/fairml/orthogonal_projection.py b/fairml/orthogonal_projection.py index 3e735fa..d7fef8c 100644 --- a/fairml/orthogonal_projection.py +++ b/fairml/orthogonal_projection.py @@ -11,6 +11,7 @@ # import a few utility functions from .utils import mse from .utils import accuracy +from .utils import gini from .utils import replace_column_of_matrix from .utils import detect_feature_sign @@ -126,7 +127,7 @@ def audit_model(predict_function, input_dataframe, distance_metric="mse", input_dataframe -> dataframe with shape (n_samples, n_features) - distance_metric -> one of ["mse", "accuracy"], this + distance_metric -> one of ["mse", "accuracy", "gini"], this variable defaults to regression. direct_input_pertubation_strategy -> This is referring to how to zero out a @@ -155,8 +156,8 @@ def audit_model(predict_function, input_dataframe, distance_metric="mse", """ assert isinstance(input_dataframe, pd.DataFrame), ("Data must be a pandas " "dataframe") - assert distance_metric in ["mse", "accuracy"], ("Distance metric must be " - "'mse' or 'accuracy'") + assert distance_metric in ["mse", "accuracy", "gini"], ("Distance metric must be " + "'mse', 'accuracy' or 'gini'") assert direct_input_pertubation_strategy in ["constant-zero", "constant-median", "random-sample" @@ -219,6 +220,9 @@ def audit_model(predict_function, input_dataframe, distance_metric="mse", if distance_metric == "accuracy": output_difference_col = accuracy( output_constant_col, normal_black_box_output) + elif distance_metric == "gini": + output_difference_col = gini( + output_constant_col, normal_black_box_output) else: output_difference_col = mse( output_constant_col, normal_black_box_output) @@ -241,6 +245,9 @@ def audit_model(predict_function, input_dataframe, distance_metric="mse", if distance_metric == "accuracy": total_difference = accuracy( total_transformed_output, normal_black_box_output) + elif distance_metric == "gini": + total_difference = gini( + total_transformed_output, normal_black_box_output) else: total_difference = mse( total_transformed_output, normal_black_box_output) diff --git a/fairml/tests/test_orthogonal_projection.py b/fairml/tests/test_orthogonal_projection.py index f72f7e5..efee4fc 100644 --- a/fairml/tests/test_orthogonal_projection.py +++ b/fairml/tests/test_orthogonal_projection.py @@ -10,6 +10,7 @@ from fairml.utils import mse from fairml.utils import accuracy +from fairml.utils import gini from fairml.utils import detect_feature_sign from fairml.perturbation_strategies import constant_zero @@ -60,3 +61,11 @@ def test_accuracy(): test_acc = accuracy(y_pred, y_true) print(test_acc) assert test_acc == 0.5 + +def test_gini(): + y_pred = [0, 0, 0, 1] + y_true = [0, 0, 1, 1] + + test_gini= gini(y_true, y_pred) + print(test_gini) + assert test_gini == 0.5 diff --git a/fairml/utils.py b/fairml/utils.py index 0808d42..03a6511 100644 --- a/fairml/utils.py +++ b/fairml/utils.py @@ -3,6 +3,7 @@ from __future__ import print_function import numpy as np +from sklearn.metrics import roc_auc_score # import dictionary with perturbation strategies. from .perturbation_strategies import perturbation_strategy_dictionary @@ -40,6 +41,22 @@ def accuracy(y, y_hat): return accuracy +def gini(y, y_hat): + """ function to calculate gini of y_hat given y""" + y = np.array(y) + y_hat = np.array(y_hat) + + y = y.astype(int) + y_hat = y_hat.astype(float) + + y_hat = np.reshape(y_hat, (y_hat.shape[0],)) + y = np.reshape(y, (y.shape[0],)) + + ras = roc_auc_score(y, y_hat) + g = ras * 2 - 1 + + return g + def replace_column_of_matrix(X, col_num, random_sample, ptb_strategy):