From 7d048ed46b8b3b9bafe792ac79f7ec8e92cf924a Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Wed, 26 Feb 2025 17:17:32 +0000 Subject: [PATCH 01/42] Try adding second sorting for mki --- assesspy/metrics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assesspy/metrics.py b/assesspy/metrics.py index 0635796..5cd8283 100644 --- a/assesspy/metrics.py +++ b/assesspy/metrics.py @@ -212,7 +212,7 @@ def _calculate_gini( ) df = pd.concat([estimate, sale_price], axis=1) # Mergesort is required for stable sort results - df.sort_values(by="sale_price", kind="mergesort", inplace=True) + df.sort_values(by=["sale_price", "estimate"], kind="mergesort", inplace=True) df.reset_index(drop=True, inplace=True) a_sorted, sp_sorted = df["estimate"], df["sale_price"] n: int = a_sorted.size From bf585fd8073ad267556343f2ac3d196e442c7289 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Thu, 27 Feb 2025 19:50:47 +0000 Subject: [PATCH 02/42] Push test file --- assesspy/test.py | 172 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 172 insertions(+) create mode 100644 assesspy/test.py diff --git a/assesspy/test.py b/assesspy/test.py new file mode 100644 index 0000000..c8b6ec6 --- /dev/null +++ b/assesspy/test.py @@ -0,0 +1,172 @@ +import numpy as np +import pandas as pd +import random +from pandas.api.types import is_numeric_dtype +import math +from typing import Union + +import statsmodels.api as sm + +CCAO_LOWER_QUANTILE = .05 +CCAO_UPPER_QUANTILE = .95 + +# Pulled from data architecture master +def ccao_drop_outliers( + estimate: Union[list[int], list[float], pd.Series], + sale_price: Union[list[int], list[float], pd.Series], +) -> tuple[pd.Series, pd.Series, float]: + """ + Helper function to drop the top and bottom N% (usually 5%) of the input + ratios, per CCAO SOPs and IAAO recommendation. + """ + ratio: pd.Series = estimate / sale_price + ratio_not_outlier = ratio.between( + ratio.quantile(CCAO_LOWER_QUANTILE), + ratio.quantile(CCAO_UPPER_QUANTILE), + inclusive="neither", + ).reset_index(drop=True) + + estimate_no_outliers = estimate[ratio_not_outlier] + sale_price_no_outliers = sale_price[ratio_not_outlier] + n: float = float(estimate_no_outliers.size) + + return estimate_no_outliers, sale_price_no_outliers, n + + + +# Copied from master + +def check_inputs(*args, check_gt_zero: bool = True) -> None: + out_msg = [""] + for x in args: + check = pd.Series(x) + + if not is_numeric_dtype(check): + out_msg.append("All input values must be numeric.") + if check.isnull().any(): + out_msg.append("All input values cannot be null.") + if len(check) <= 1: + out_msg.append("All input values must have length greater than 1.") + if not all(np.isfinite(check) | check.isnull()): + out_msg.append("All input values cannot be infinite.") + if any(check <= 0) and check_gt_zero: + out_msg.append("All input values must be greater than 0.") + + lengths = [len(pd.Series(x)) for x in args] + if len(set(lengths)) > 1: + out_msg.append("All input values must have the same length.") + + out_msg_set = set(out_msg) + if len(out_msg_set) > 1: + raise Exception("\n".join(out_msg_set)) + +def _calculate_gini( + estimate: Union[list[int], list[float], pd.Series], + sale_price: Union[list[int], list[float], pd.Series], +) -> tuple[float, float]: + """ + Helper function to calculate the Gini coefficients of sales and estimated + values. Note that the estimated value Gini is based on the sale price order. + """ + check_inputs(estimate, sale_price) + + estimate = ( + pd.Series(estimate, dtype=float) + .rename("estimate") + .reset_index(drop=True) + ) + sale_price = ( + pd.Series(sale_price, dtype=float) + .rename("sale_price") + .reset_index(drop=True) + ) + df = pd.concat([estimate, sale_price], axis=1) + # Mergesort is required for stable sort results + # I think it's better here to add a second sort for fmv too + df.sort_values(by=["sale_price"], kind="mergesort", inplace=True) + df.reset_index(drop=True, inplace=True) + a_sorted, sp_sorted = df["estimate"], df["sale_price"] + n: int = a_sorted.size + + assessed_sum: float = sum(a_sorted[i] * (i + 1) for i in range(n)) + g_assessed: float = 2 * assessed_sum / a_sorted.sum() - (n + 1) + gini_assessed: float = g_assessed / float(n) + + sale_price_sum: float = sum(sp_sorted[i] * (i + 1) for i in range(n)) + g_sale_price: float = 2 * sale_price_sum / sp_sorted.sum() - (n + 1) + gini_sale_price: float = g_sale_price / float(n) + + return gini_assessed, gini_sale_price + +def mki( + estimate: Union[list[int], list[float], pd.Series], + sale_price: Union[list[int], list[float], pd.Series], +) -> float: + r""" + The Modified Kakwani Index (MKI) is a Gini-based measure to test for + vertical equity in assessment. It first orders properties by sale price + (ascending), then calculates the Gini coefficient for sale values + and estimated values (while remaining ordered by sale price). The + Modified Kakwani Index is the ratio between the coefficients: + $Gini of Estimated Values / Gini of Sale Prices$. + + For the Modified Kakwani Index: + + MKI < 1 is regressive + MKI = 1 is vertical equity + MKI > 1 is progressive + + .. Quintos, C. (2020). A Gini measure for vertical equity in property + assessments. https://researchexchange.iaao.org/jptaa/vol17/iss2/2 + + .. Quintos, C. (2021). A Gini decomposition of the sources of inequality in + property assessments. https://researchexchange.iaao.org/jptaa/vol18/iss2/6 + + :param estimate: + A list or ``pd.Series`` of estimated values. + Must be the same length as ``sale_price``. + :param sale_price: + A list or ``pd.Series`` of sale prices. + Must be the same length as ``estimate``. + :type estimate: Array-like numeric values + :type sale_price: Array-like numeric values + + :return: A single float value containing the MKI of the inputs. + :rtype: float + + :Example: + + .. code-block:: python + + # Calculate MKI: + import assesspy as ap + + ap.mki(ap.ccao_sample().estimate, ap.ccao_sample().sale_price) + """ + check_inputs(estimate, sale_price) + estimate = pd.Series(estimate, dtype=float) + sale_price = pd.Series(sale_price, dtype=float) + + gini_assessed, gini_sale_price = _calculate_gini(estimate, sale_price) + mki = float(gini_assessed / gini_sale_price) + + return mki + +df = pd.read_csv("assesspy/data/test_data.csv") + +# Exported two datasets here with different random seeds and they were identical +# There were no differences in the non-outliers +df_1 = ccao_drop_outliers(df.fmv, df.sale_price) +df_1 = pd.DataFrame({ + 'fmv': df_1[0], + 'sale_price': df_1[1] +}) + +output = mki(df_1.fmv, df_1.sale_price) + +random.seed(4356) + +output_2 = mki(df_1.fmv, df_1.sale_price) + +output +output_2 \ No newline at end of file From c60b5fb84dccbe22208e37b8201d86fb107f574a Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Thu, 27 Feb 2025 19:51:52 +0000 Subject: [PATCH 03/42] remove second mergesort --- assesspy/metrics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assesspy/metrics.py b/assesspy/metrics.py index 5cd8283..d022dc4 100644 --- a/assesspy/metrics.py +++ b/assesspy/metrics.py @@ -212,7 +212,7 @@ def _calculate_gini( ) df = pd.concat([estimate, sale_price], axis=1) # Mergesort is required for stable sort results - df.sort_values(by=["sale_price", "estimate"], kind="mergesort", inplace=True) + df.sort_values(by=["sale_price"], kind="mergesort", inplace=True) df.reset_index(drop=True, inplace=True) a_sorted, sp_sorted = df["estimate"], df["sale_price"] n: int = a_sorted.size From 8278bac7e2bb9cce73bc14dfbc347c23be77089c Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Fri, 29 Aug 2025 18:52:43 +0000 Subject: [PATCH 04/42] update sort --- assesspy/metrics.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/assesspy/metrics.py b/assesspy/metrics.py index d022dc4..c781e12 100644 --- a/assesspy/metrics.py +++ b/assesspy/metrics.py @@ -212,7 +212,12 @@ def _calculate_gini( ) df = pd.concat([estimate, sale_price], axis=1) # Mergesort is required for stable sort results - df.sort_values(by=["sale_price"], kind="mergesort", inplace=True) + df.sort_values( + by=["sale_price", "estimate"], + ascending=[True, False], + kind="mergesort", + inplace=True + ) df.reset_index(drop=True, inplace=True) a_sorted, sp_sorted = df["estimate"], df["sale_price"] n: int = a_sorted.size From 9de2cb9df1275616fe85f71ef690ef42e59113ec Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Tue, 2 Sep 2025 15:23:56 +0000 Subject: [PATCH 05/42] update quintos sample --- assesspy/data/quintos_sample.csv | 62 ++++++++++++++++---------------- 1 file changed, 31 insertions(+), 31 deletions(-) diff --git a/assesspy/data/quintos_sample.csv b/assesspy/data/quintos_sample.csv index 6c370c4..11e86ac 100644 --- a/assesspy/data/quintos_sample.csv +++ b/assesspy/data/quintos_sample.csv @@ -1,31 +1,31 @@ -estimate,sale_price -37299,32900 -40166,36000 -56317,54000 -66184,64500 -69487,68000 -71515,70000 -75338,74000 -81036,80000 -85673,84900 -85021,89000 -90046,94250 -94089,99000 -100227,105900 -103157,109000 -108290,115000 -117099,124500 -115347,129900 -119678,135000 -131631,149000 -137321,155800 -143974,163500 -153572,175000 -148457,179000 -153488,185600 -165040,199900 -176940,215000 -192959,235000 -180046,250000 -200240,279000 -211445,295000 + price assessment1 assessment2 assessment3 +1 32900 37299.37125 37299.37125 37299.37125 +2 36000 40165.89269 40165.89269 40165.89269 +3 54000 56317.4201 56317.4201 56317.4201 +4 64500 66183.77244 66183.77244 66183.77244 +5 68000 69486.97316 69486.97316 69486.97316 +6 70000 71514.52586 71514.52586 71514.52586 +7 74000 75338.28603 75338.28603 75338.28603 +8 80000 81035.95111 81035.95111 81035.95111 +9 84900 85672.85577 85672.85577 85672.85577 +10 89000 85021.0865 94088.93683 90046.33945 +11 89000 90046.33945 85021.0865 94088.93683 +12 89000 94088.93683 90046.33945 85021.0865 +13 105900 100227.0936 100227.0936 100227.0936 +14 109000 103156.7516 103156.7516 103156.7516 +15 115000 108290.1277 108290.1277 108290.1277 +16 124500 117098.7563 117098.7563 117098.7563 +17 129900 115346.9796 115346.9796 115346.9796 +18 135000 119678.4223 119678.4223 119678.4223 +19 149000 131630.9478 131630.9478 131630.9478 +20 155800 137321.2061 137321.2061 137321.2061 +21 163500 143973.5639 143973.5639 143973.5639 +22 175000 153571.8563 153571.8563 153571.8563 +23 179000 148456.8866 148456.8866 148456.8866 +24 185600 153488.3876 153488.3876 153488.3876 +25 199900 165039.8271 165039.8271 165039.8271 +26 215000 176939.5763 176939.5763 176939.5763 +27 235000 192959.3127 192959.3127 192959.3127 +28 250000 180046.1193 180046.1193 180046.1193 +29 279000 200240.2442 200240.2442 200240.2442 +30 295000 211445.4891 211445.4891 211445.4891 \ No newline at end of file From 20451f23ea041f32161bd0179bcefabfccf8cfb2 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Tue, 2 Sep 2025 18:23:08 +0000 Subject: [PATCH 06/42] Add test for mki ki matching --- assesspy/data/quintos_sample.csv | 2 +- assesspy/tests/conftest.py | 2 +- assesspy/tests/test_metrics.py | 19 +++++++++++++++++++ 3 files changed, 21 insertions(+), 2 deletions(-) diff --git a/assesspy/data/quintos_sample.csv b/assesspy/data/quintos_sample.csv index 11e86ac..8174116 100644 --- a/assesspy/data/quintos_sample.csv +++ b/assesspy/data/quintos_sample.csv @@ -1,4 +1,4 @@ - price assessment1 assessment2 assessment3 + sale_price estimate1 estimate2 estimate3 1 32900 37299.37125 37299.37125 37299.37125 2 36000 40165.89269 40165.89269 40165.89269 3 54000 56317.4201 56317.4201 56317.4201 diff --git a/assesspy/tests/conftest.py b/assesspy/tests/conftest.py index 725e6d4..08c69bd 100644 --- a/assesspy/tests/conftest.py +++ b/assesspy/tests/conftest.py @@ -24,7 +24,7 @@ def ccao_data() -> tuple: @pt.fixture(scope="session") def quintos_data() -> tuple: sample = ap.quintos_sample() - return sample.estimate, sample.sale_price + return sample.estimate1, sample.sale_price @pt.fixture(scope="session", params=["1_1", "1_4", "d_1", "d_2"]) diff --git a/assesspy/tests/test_metrics.py b/assesspy/tests/test_metrics.py index 50603c6..f5a867b 100644 --- a/assesspy/tests/test_metrics.py +++ b/assesspy/tests/test_metrics.py @@ -79,3 +79,22 @@ def test_metric_met_function_thresholds(self, metric, metric_val): "mki": False, } assert getattr(ap, f"{metric}_met")(metric_val) == expected[metric] + +def test_quintos_mki_ki_match(): + """MKI and KI results should be identical regardless of estimate1/2/3.""" + sample = ap.quintos_sample() + estimates = [sample["estimate1"], sample["estimate2"], sample["estimate3"]] + sales = sample["sale_price"] + + # Compute MKI + KI for each estimate column + results = [] + for est in estimates: + mki_val = ap.mki(est, sales) + ki_val = ap.ki(est, sales) + results.append((mki_val, ki_val)) + + # Use the first as reference and check all others match + ref_mki, ref_ki = results[0] + for i, (mki_val, ki_val) in enumerate(results[1:], start=2): + assert mki_val == ref_mki, f"MKI differs for estimate{i}" + assert ki_val == ref_ki, f"KI differs for estimate{i}" From e97c4132ed74a8fb136fadc637a4d514234d0980 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Tue, 2 Sep 2025 18:41:44 +0000 Subject: [PATCH 07/42] comma based seed --- assesspy/data/quintos_sample.csv | 62 ++++++++++++++++---------------- 1 file changed, 31 insertions(+), 31 deletions(-) diff --git a/assesspy/data/quintos_sample.csv b/assesspy/data/quintos_sample.csv index 8174116..c0951ef 100644 --- a/assesspy/data/quintos_sample.csv +++ b/assesspy/data/quintos_sample.csv @@ -1,31 +1,31 @@ - sale_price estimate1 estimate2 estimate3 -1 32900 37299.37125 37299.37125 37299.37125 -2 36000 40165.89269 40165.89269 40165.89269 -3 54000 56317.4201 56317.4201 56317.4201 -4 64500 66183.77244 66183.77244 66183.77244 -5 68000 69486.97316 69486.97316 69486.97316 -6 70000 71514.52586 71514.52586 71514.52586 -7 74000 75338.28603 75338.28603 75338.28603 -8 80000 81035.95111 81035.95111 81035.95111 -9 84900 85672.85577 85672.85577 85672.85577 -10 89000 85021.0865 94088.93683 90046.33945 -11 89000 90046.33945 85021.0865 94088.93683 -12 89000 94088.93683 90046.33945 85021.0865 -13 105900 100227.0936 100227.0936 100227.0936 -14 109000 103156.7516 103156.7516 103156.7516 -15 115000 108290.1277 108290.1277 108290.1277 -16 124500 117098.7563 117098.7563 117098.7563 -17 129900 115346.9796 115346.9796 115346.9796 -18 135000 119678.4223 119678.4223 119678.4223 -19 149000 131630.9478 131630.9478 131630.9478 -20 155800 137321.2061 137321.2061 137321.2061 -21 163500 143973.5639 143973.5639 143973.5639 -22 175000 153571.8563 153571.8563 153571.8563 -23 179000 148456.8866 148456.8866 148456.8866 -24 185600 153488.3876 153488.3876 153488.3876 -25 199900 165039.8271 165039.8271 165039.8271 -26 215000 176939.5763 176939.5763 176939.5763 -27 235000 192959.3127 192959.3127 192959.3127 -28 250000 180046.1193 180046.1193 180046.1193 -29 279000 200240.2442 200240.2442 200240.2442 -30 295000 211445.4891 211445.4891 211445.4891 \ No newline at end of file +sale_price,estimate1 estimate2 estimate3 +32900,37299.37125,37299.37125,37299.37125 +36000,40165.89269,40165.89269,40165.89269 +54000,56317.4201,56317.4201,56317.4201 +64500,66183.77244,66183.77244,66183.77244 +68000,69486.97316,69486.97316,69486.97316 +70000,71514.52586,71514.52586,71514.52586 +74000,75338.28603,75338.28603,75338.28603 +80000,81035.95111,81035.95111,81035.95111 +84900,85672.85577,85672.85577,85672.85577 +89000,85021.0865,94088.93683,90046.33945 +89000,90046.33945,85021.0865,94088.93683 +89000,94088.93683,90046.33945,85021.0865 +105900,100227.0936,100227.0936,100227.0936 +109000,103156.7516,103156.7516,103156.7516 +115000,108290.1277,108290.1277,108290.1277 +124500,117098.7563,117098.7563,117098.7563 +129900,115346.9796,115346.9796,115346.9796 +135000,119678.4223,119678.4223,119678.4223 +149000,131630.9478,131630.9478,131630.9478 +155800,137321.2061,137321.2061,137321.2061 +163500,143973.5639,143973.5639,143973.5639 +175000,153571.8563,153571.8563,153571.8563 +179000,148456.8866,148456.8866,148456.8866 +185600,153488.3876,153488.3876,153488.3876 +199900,165039.8271,165039.8271,165039.8271 +215000,176939.5763,176939.5763,176939.5763 +235000,192959.3127,192959.3127,192959.3127 +250000,180046.1193,180046.1193,180046.1193 +279000,200240.2442,200240.2442,200240.2442 +295000,211445.4891,211445.4891,211445.4891 \ No newline at end of file From d83c5416fd77be49d8ed2a933530d67966f33f47 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Tue, 2 Sep 2025 19:18:10 +0000 Subject: [PATCH 08/42] change to comma separated --- assesspy/data/quintos_sample.csv | 2 +- assesspy/tests/conftest.py | 2 +- assesspy/tests/test_metrics.py | 23 ++--------------------- 3 files changed, 4 insertions(+), 23 deletions(-) diff --git a/assesspy/data/quintos_sample.csv b/assesspy/data/quintos_sample.csv index c0951ef..9dea485 100644 --- a/assesspy/data/quintos_sample.csv +++ b/assesspy/data/quintos_sample.csv @@ -1,4 +1,4 @@ -sale_price,estimate1 estimate2 estimate3 +sale_price,estimate,estimate1,estimate2 32900,37299.37125,37299.37125,37299.37125 36000,40165.89269,40165.89269,40165.89269 54000,56317.4201,56317.4201,56317.4201 diff --git a/assesspy/tests/conftest.py b/assesspy/tests/conftest.py index 08c69bd..725e6d4 100644 --- a/assesspy/tests/conftest.py +++ b/assesspy/tests/conftest.py @@ -24,7 +24,7 @@ def ccao_data() -> tuple: @pt.fixture(scope="session") def quintos_data() -> tuple: sample = ap.quintos_sample() - return sample.estimate1, sample.sale_price + return sample.estimate, sample.sale_price @pt.fixture(scope="session", params=["1_1", "1_4", "d_1", "d_2"]) diff --git a/assesspy/tests/test_metrics.py b/assesspy/tests/test_metrics.py index f5a867b..a467eaf 100644 --- a/assesspy/tests/test_metrics.py +++ b/assesspy/tests/test_metrics.py @@ -20,7 +20,7 @@ def test_metric_value_is_correct_ccao(self, metric, metric_val): "prd": 1.0484192615223522, "prb": 0.0024757, "mki": 0.794, - "ki": -0.06, + "ki": -0.062, } assert pt.approx(metric_val, rel=0.01) == expected[metric] @@ -78,23 +78,4 @@ def test_metric_met_function_thresholds(self, metric, metric_val): "prb": True, "mki": False, } - assert getattr(ap, f"{metric}_met")(metric_val) == expected[metric] - -def test_quintos_mki_ki_match(): - """MKI and KI results should be identical regardless of estimate1/2/3.""" - sample = ap.quintos_sample() - estimates = [sample["estimate1"], sample["estimate2"], sample["estimate3"]] - sales = sample["sale_price"] - - # Compute MKI + KI for each estimate column - results = [] - for est in estimates: - mki_val = ap.mki(est, sales) - ki_val = ap.ki(est, sales) - results.append((mki_val, ki_val)) - - # Use the first as reference and check all others match - ref_mki, ref_ki = results[0] - for i, (mki_val, ki_val) in enumerate(results[1:], start=2): - assert mki_val == ref_mki, f"MKI differs for estimate{i}" - assert ki_val == ref_ki, f"KI differs for estimate{i}" + assert getattr(ap, f"{metric}_met")(metric_val) == expected[metric] \ No newline at end of file From d8bd9cf867fe900a723b18e8a4473600878b114f Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Tue, 2 Sep 2025 19:22:18 +0000 Subject: [PATCH 09/42] precommit --- assesspy/metrics.py | 2 +- assesspy/test.py | 25 ++++++++++++------------- assesspy/tests/test_metrics.py | 2 +- 3 files changed, 14 insertions(+), 15 deletions(-) diff --git a/assesspy/metrics.py b/assesspy/metrics.py index c781e12..9313e70 100644 --- a/assesspy/metrics.py +++ b/assesspy/metrics.py @@ -216,7 +216,7 @@ def _calculate_gini( by=["sale_price", "estimate"], ascending=[True, False], kind="mergesort", - inplace=True + inplace=True, ) df.reset_index(drop=True, inplace=True) a_sorted, sp_sorted = df["estimate"], df["sale_price"] diff --git a/assesspy/test.py b/assesspy/test.py index c8b6ec6..554450d 100644 --- a/assesspy/test.py +++ b/assesspy/test.py @@ -1,14 +1,13 @@ +import random +from typing import Union + import numpy as np import pandas as pd -import random from pandas.api.types import is_numeric_dtype -import math -from typing import Union -import statsmodels.api as sm +CCAO_LOWER_QUANTILE = 0.05 +CCAO_UPPER_QUANTILE = 0.95 -CCAO_LOWER_QUANTILE = .05 -CCAO_UPPER_QUANTILE = .95 # Pulled from data architecture master def ccao_drop_outliers( @@ -33,9 +32,9 @@ def ccao_drop_outliers( return estimate_no_outliers, sale_price_no_outliers, n - # Copied from master + def check_inputs(*args, check_gt_zero: bool = True) -> None: out_msg = [""] for x in args: @@ -59,7 +58,8 @@ def check_inputs(*args, check_gt_zero: bool = True) -> None: out_msg_set = set(out_msg) if len(out_msg_set) > 1: raise Exception("\n".join(out_msg_set)) - + + def _calculate_gini( estimate: Union[list[int], list[float], pd.Series], sale_price: Union[list[int], list[float], pd.Series], @@ -98,6 +98,7 @@ def _calculate_gini( return gini_assessed, gini_sale_price + def mki( estimate: Union[list[int], list[float], pd.Series], sale_price: Union[list[int], list[float], pd.Series], @@ -152,15 +153,13 @@ def mki( return mki + df = pd.read_csv("assesspy/data/test_data.csv") # Exported two datasets here with different random seeds and they were identical # There were no differences in the non-outliers df_1 = ccao_drop_outliers(df.fmv, df.sale_price) -df_1 = pd.DataFrame({ - 'fmv': df_1[0], - 'sale_price': df_1[1] -}) +df_1 = pd.DataFrame({"fmv": df_1[0], "sale_price": df_1[1]}) output = mki(df_1.fmv, df_1.sale_price) @@ -169,4 +168,4 @@ def mki( output_2 = mki(df_1.fmv, df_1.sale_price) output -output_2 \ No newline at end of file +output_2 diff --git a/assesspy/tests/test_metrics.py b/assesspy/tests/test_metrics.py index a467eaf..00dd644 100644 --- a/assesspy/tests/test_metrics.py +++ b/assesspy/tests/test_metrics.py @@ -78,4 +78,4 @@ def test_metric_met_function_thresholds(self, metric, metric_val): "prb": True, "mki": False, } - assert getattr(ap, f"{metric}_met")(metric_val) == expected[metric] \ No newline at end of file + assert getattr(ap, f"{metric}_met")(metric_val) == expected[metric] From 7e518e0b260c1318f6463fd03042c358d0307681 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Tue, 2 Sep 2025 19:51:02 +0000 Subject: [PATCH 10/42] add matching test --- assesspy/tests/test_metrics.py | 35 ++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/assesspy/tests/test_metrics.py b/assesspy/tests/test_metrics.py index 00dd644..8168055 100644 --- a/assesspy/tests/test_metrics.py +++ b/assesspy/tests/test_metrics.py @@ -1,3 +1,4 @@ +import numpy as np import pytest as pt import assesspy as ap @@ -79,3 +80,37 @@ def test_metric_met_function_thresholds(self, metric, metric_val): "mki": False, } assert getattr(ap, f"{metric}_met")(metric_val) == expected[metric] + + +@pt.mark.parametrize("metric", ["mki", "ki"]) +def test_quintos_metric_matches_across_estimates(metric): + """ + For the quintos dataset, MKI/KI should be identical based + on the ordering of estimates. + """ + sample = ap.quintos_sample() + # Clean header whitespace/tabs just in case + sample.columns = sample.columns.astype(str).str.strip() + + assert ( + "sale_price" in sample.columns + ), "sale_price missing from quintos_sample" + + estimate_cols = [ + c + for c in ["estimate", "estimate1", "estimate2"] + if c in sample.columns + ] + + sales = sample["sale_price"] + + # Use the first present estimate column as the reference + ref_col = estimate_cols[0] + ref_val = getattr(ap, metric)(sample[ref_col], sales) + + for col in estimate_cols[1:]: + val = getattr(ap, metric)(sample[col], sales) + assert np.isclose(val, ref_val, rtol=1e-9, atol=1e-12), ( + f"{metric.upper()} differs between {ref_col} and {col}: " + f"{ref_val} vs {val}" + ) From 3432b95121fb0d06b3f162b741f45b92f9923cee Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Tue, 2 Sep 2025 19:57:59 +0000 Subject: [PATCH 11/42] precommit --- assesspy/tests/test_metrics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assesspy/tests/test_metrics.py b/assesspy/tests/test_metrics.py index 8168055..cf7ca9a 100644 --- a/assesspy/tests/test_metrics.py +++ b/assesspy/tests/test_metrics.py @@ -101,7 +101,7 @@ def test_quintos_metric_matches_across_estimates(metric): for c in ["estimate", "estimate1", "estimate2"] if c in sample.columns ] - + sales = sample["sale_price"] # Use the first present estimate column as the reference From 0fe2336d4d2e38ce0d28c514012406e28511d924 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Tue, 2 Sep 2025 20:53:17 +0000 Subject: [PATCH 12/42] Remove excess code --- assesspy/tests/test_metrics.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/assesspy/tests/test_metrics.py b/assesspy/tests/test_metrics.py index cf7ca9a..da0257d 100644 --- a/assesspy/tests/test_metrics.py +++ b/assesspy/tests/test_metrics.py @@ -89,13 +89,6 @@ def test_quintos_metric_matches_across_estimates(metric): on the ordering of estimates. """ sample = ap.quintos_sample() - # Clean header whitespace/tabs just in case - sample.columns = sample.columns.astype(str).str.strip() - - assert ( - "sale_price" in sample.columns - ), "sale_price missing from quintos_sample" - estimate_cols = [ c for c in ["estimate", "estimate1", "estimate2"] From 6908bb2d901c310b7194ee062b0b83d95291dc06 Mon Sep 17 00:00:00 2001 From: Damonamajor <56321109+Damonamajor@users.noreply.github.com> Date: Tue, 2 Sep 2025 16:10:44 -0500 Subject: [PATCH 13/42] Delete assesspy/test.py --- assesspy/test.py | 171 ----------------------------------------------- 1 file changed, 171 deletions(-) delete mode 100644 assesspy/test.py diff --git a/assesspy/test.py b/assesspy/test.py deleted file mode 100644 index 554450d..0000000 --- a/assesspy/test.py +++ /dev/null @@ -1,171 +0,0 @@ -import random -from typing import Union - -import numpy as np -import pandas as pd -from pandas.api.types import is_numeric_dtype - -CCAO_LOWER_QUANTILE = 0.05 -CCAO_UPPER_QUANTILE = 0.95 - - -# Pulled from data architecture master -def ccao_drop_outliers( - estimate: Union[list[int], list[float], pd.Series], - sale_price: Union[list[int], list[float], pd.Series], -) -> tuple[pd.Series, pd.Series, float]: - """ - Helper function to drop the top and bottom N% (usually 5%) of the input - ratios, per CCAO SOPs and IAAO recommendation. - """ - ratio: pd.Series = estimate / sale_price - ratio_not_outlier = ratio.between( - ratio.quantile(CCAO_LOWER_QUANTILE), - ratio.quantile(CCAO_UPPER_QUANTILE), - inclusive="neither", - ).reset_index(drop=True) - - estimate_no_outliers = estimate[ratio_not_outlier] - sale_price_no_outliers = sale_price[ratio_not_outlier] - n: float = float(estimate_no_outliers.size) - - return estimate_no_outliers, sale_price_no_outliers, n - - -# Copied from master - - -def check_inputs(*args, check_gt_zero: bool = True) -> None: - out_msg = [""] - for x in args: - check = pd.Series(x) - - if not is_numeric_dtype(check): - out_msg.append("All input values must be numeric.") - if check.isnull().any(): - out_msg.append("All input values cannot be null.") - if len(check) <= 1: - out_msg.append("All input values must have length greater than 1.") - if not all(np.isfinite(check) | check.isnull()): - out_msg.append("All input values cannot be infinite.") - if any(check <= 0) and check_gt_zero: - out_msg.append("All input values must be greater than 0.") - - lengths = [len(pd.Series(x)) for x in args] - if len(set(lengths)) > 1: - out_msg.append("All input values must have the same length.") - - out_msg_set = set(out_msg) - if len(out_msg_set) > 1: - raise Exception("\n".join(out_msg_set)) - - -def _calculate_gini( - estimate: Union[list[int], list[float], pd.Series], - sale_price: Union[list[int], list[float], pd.Series], -) -> tuple[float, float]: - """ - Helper function to calculate the Gini coefficients of sales and estimated - values. Note that the estimated value Gini is based on the sale price order. - """ - check_inputs(estimate, sale_price) - - estimate = ( - pd.Series(estimate, dtype=float) - .rename("estimate") - .reset_index(drop=True) - ) - sale_price = ( - pd.Series(sale_price, dtype=float) - .rename("sale_price") - .reset_index(drop=True) - ) - df = pd.concat([estimate, sale_price], axis=1) - # Mergesort is required for stable sort results - # I think it's better here to add a second sort for fmv too - df.sort_values(by=["sale_price"], kind="mergesort", inplace=True) - df.reset_index(drop=True, inplace=True) - a_sorted, sp_sorted = df["estimate"], df["sale_price"] - n: int = a_sorted.size - - assessed_sum: float = sum(a_sorted[i] * (i + 1) for i in range(n)) - g_assessed: float = 2 * assessed_sum / a_sorted.sum() - (n + 1) - gini_assessed: float = g_assessed / float(n) - - sale_price_sum: float = sum(sp_sorted[i] * (i + 1) for i in range(n)) - g_sale_price: float = 2 * sale_price_sum / sp_sorted.sum() - (n + 1) - gini_sale_price: float = g_sale_price / float(n) - - return gini_assessed, gini_sale_price - - -def mki( - estimate: Union[list[int], list[float], pd.Series], - sale_price: Union[list[int], list[float], pd.Series], -) -> float: - r""" - The Modified Kakwani Index (MKI) is a Gini-based measure to test for - vertical equity in assessment. It first orders properties by sale price - (ascending), then calculates the Gini coefficient for sale values - and estimated values (while remaining ordered by sale price). The - Modified Kakwani Index is the ratio between the coefficients: - $Gini of Estimated Values / Gini of Sale Prices$. - - For the Modified Kakwani Index: - - MKI < 1 is regressive - MKI = 1 is vertical equity - MKI > 1 is progressive - - .. Quintos, C. (2020). A Gini measure for vertical equity in property - assessments. https://researchexchange.iaao.org/jptaa/vol17/iss2/2 - - .. Quintos, C. (2021). A Gini decomposition of the sources of inequality in - property assessments. https://researchexchange.iaao.org/jptaa/vol18/iss2/6 - - :param estimate: - A list or ``pd.Series`` of estimated values. - Must be the same length as ``sale_price``. - :param sale_price: - A list or ``pd.Series`` of sale prices. - Must be the same length as ``estimate``. - :type estimate: Array-like numeric values - :type sale_price: Array-like numeric values - - :return: A single float value containing the MKI of the inputs. - :rtype: float - - :Example: - - .. code-block:: python - - # Calculate MKI: - import assesspy as ap - - ap.mki(ap.ccao_sample().estimate, ap.ccao_sample().sale_price) - """ - check_inputs(estimate, sale_price) - estimate = pd.Series(estimate, dtype=float) - sale_price = pd.Series(sale_price, dtype=float) - - gini_assessed, gini_sale_price = _calculate_gini(estimate, sale_price) - mki = float(gini_assessed / gini_sale_price) - - return mki - - -df = pd.read_csv("assesspy/data/test_data.csv") - -# Exported two datasets here with different random seeds and they were identical -# There were no differences in the non-outliers -df_1 = ccao_drop_outliers(df.fmv, df.sale_price) -df_1 = pd.DataFrame({"fmv": df_1[0], "sale_price": df_1[1]}) - -output = mki(df_1.fmv, df_1.sale_price) - -random.seed(4356) - -output_2 = mki(df_1.fmv, df_1.sale_price) - -output -output_2 From 50082a96cdbd011c9ecf5b0979f24cc3ba5098eb Mon Sep 17 00:00:00 2001 From: Damonamajor <56321109+Damonamajor@users.noreply.github.com> Date: Wed, 3 Sep 2025 10:19:16 -0500 Subject: [PATCH 14/42] rename test more accurately --- assesspy/tests/test_metrics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assesspy/tests/test_metrics.py b/assesspy/tests/test_metrics.py index da0257d..3f7245d 100644 --- a/assesspy/tests/test_metrics.py +++ b/assesspy/tests/test_metrics.py @@ -83,7 +83,7 @@ def test_metric_met_function_thresholds(self, metric, metric_val): @pt.mark.parametrize("metric", ["mki", "ki"]) -def test_quintos_metric_matches_across_estimates(metric): +def test_mki_matches_based_on_tied_estimates(metric): """ For the quintos dataset, MKI/KI should be identical based on the ordering of estimates. From d49193318c2de4674d2ca82a102cfbec29c4c1e8 Mon Sep 17 00:00:00 2001 From: Damonamajor <56321109+Damonamajor@users.noreply.github.com> Date: Wed, 3 Sep 2025 10:20:18 -0500 Subject: [PATCH 15/42] update name again --- assesspy/tests/test_metrics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assesspy/tests/test_metrics.py b/assesspy/tests/test_metrics.py index 3f7245d..a7f406b 100644 --- a/assesspy/tests/test_metrics.py +++ b/assesspy/tests/test_metrics.py @@ -83,7 +83,7 @@ def test_metric_met_function_thresholds(self, metric, metric_val): @pt.mark.parametrize("metric", ["mki", "ki"]) -def test_mki_matches_based_on_tied_estimates(metric): +def test_mki_matches_based_on_tied_sales(metric): """ For the quintos dataset, MKI/KI should be identical based on the ordering of estimates. From 8477716b0b9b0595664262112bce0f9de44b782d Mon Sep 17 00:00:00 2001 From: Damonamajor <56321109+Damonamajor@users.noreply.github.com> Date: Wed, 3 Sep 2025 11:09:29 -0500 Subject: [PATCH 16/42] Update assesspy/tests/test_metrics.py Co-authored-by: Jean Cochrane --- assesspy/tests/test_metrics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assesspy/tests/test_metrics.py b/assesspy/tests/test_metrics.py index a7f406b..498533d 100644 --- a/assesspy/tests/test_metrics.py +++ b/assesspy/tests/test_metrics.py @@ -103,7 +103,7 @@ def test_mki_matches_based_on_tied_sales(metric): for col in estimate_cols[1:]: val = getattr(ap, metric)(sample[col], sales) - assert np.isclose(val, ref_val, rtol=1e-9, atol=1e-12), ( + assert val == ref_val, ( f"{metric.upper()} differs between {ref_col} and {col}: " f"{ref_val} vs {val}" ) From e49fb163c73a32e4e6e4039f5ef80664309e40bb Mon Sep 17 00:00:00 2001 From: Damonamajor <56321109+Damonamajor@users.noreply.github.com> Date: Wed, 3 Sep 2025 11:09:37 -0500 Subject: [PATCH 17/42] Update assesspy/data/quintos_sample.csv Co-authored-by: Jean Cochrane --- assesspy/data/quintos_sample.csv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assesspy/data/quintos_sample.csv b/assesspy/data/quintos_sample.csv index 9dea485..607fc46 100644 --- a/assesspy/data/quintos_sample.csv +++ b/assesspy/data/quintos_sample.csv @@ -1,4 +1,4 @@ -sale_price,estimate,estimate1,estimate2 +sale_price,estimate,estimate_alt_sort_1,estimate_alt_sort_2 32900,37299.37125,37299.37125,37299.37125 36000,40165.89269,40165.89269,40165.89269 54000,56317.4201,56317.4201,56317.4201 From 4fb55e96c137f643f981a442d44cd66ef757a6c5 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Wed, 3 Sep 2025 20:36:22 +0000 Subject: [PATCH 18/42] use both csv files --- assesspy/data/quintos_sample.csv | 62 +++++++++---------- .../data/quintos_sample_with_tiebreaks.csv | 31 ++++++++++ 2 files changed, 62 insertions(+), 31 deletions(-) create mode 100644 assesspy/data/quintos_sample_with_tiebreaks.csv diff --git a/assesspy/data/quintos_sample.csv b/assesspy/data/quintos_sample.csv index 607fc46..0db90c9 100644 --- a/assesspy/data/quintos_sample.csv +++ b/assesspy/data/quintos_sample.csv @@ -1,31 +1,31 @@ -sale_price,estimate,estimate_alt_sort_1,estimate_alt_sort_2 -32900,37299.37125,37299.37125,37299.37125 -36000,40165.89269,40165.89269,40165.89269 -54000,56317.4201,56317.4201,56317.4201 -64500,66183.77244,66183.77244,66183.77244 -68000,69486.97316,69486.97316,69486.97316 -70000,71514.52586,71514.52586,71514.52586 -74000,75338.28603,75338.28603,75338.28603 -80000,81035.95111,81035.95111,81035.95111 -84900,85672.85577,85672.85577,85672.85577 -89000,85021.0865,94088.93683,90046.33945 -89000,90046.33945,85021.0865,94088.93683 -89000,94088.93683,90046.33945,85021.0865 -105900,100227.0936,100227.0936,100227.0936 -109000,103156.7516,103156.7516,103156.7516 -115000,108290.1277,108290.1277,108290.1277 -124500,117098.7563,117098.7563,117098.7563 -129900,115346.9796,115346.9796,115346.9796 -135000,119678.4223,119678.4223,119678.4223 -149000,131630.9478,131630.9478,131630.9478 -155800,137321.2061,137321.2061,137321.2061 -163500,143973.5639,143973.5639,143973.5639 -175000,153571.8563,153571.8563,153571.8563 -179000,148456.8866,148456.8866,148456.8866 -185600,153488.3876,153488.3876,153488.3876 -199900,165039.8271,165039.8271,165039.8271 -215000,176939.5763,176939.5763,176939.5763 -235000,192959.3127,192959.3127,192959.3127 -250000,180046.1193,180046.1193,180046.1193 -279000,200240.2442,200240.2442,200240.2442 -295000,211445.4891,211445.4891,211445.4891 \ No newline at end of file +estimate,sale_price +37299,32900 +40166,36000 +56317,54000 +66184,64500 +69487,68000 +71515,70000 +75338,74000 +81036,80000 +85673,84900 +85021,89000 +90046,94250 +94089,99000 +100227,105900 +103157,109000 +108290,115000 +117099,124500 +115347,129900 +119678,135000 +131631,149000 +137321,155800 +143974,163500 +153572,175000 +148457,179000 +153488,185600 +165040,199900 +176940,215000 +192959,235000 +180046,250000 +200240,279000 +211445,295000 \ No newline at end of file diff --git a/assesspy/data/quintos_sample_with_tiebreaks.csv b/assesspy/data/quintos_sample_with_tiebreaks.csv new file mode 100644 index 0000000..607fc46 --- /dev/null +++ b/assesspy/data/quintos_sample_with_tiebreaks.csv @@ -0,0 +1,31 @@ +sale_price,estimate,estimate_alt_sort_1,estimate_alt_sort_2 +32900,37299.37125,37299.37125,37299.37125 +36000,40165.89269,40165.89269,40165.89269 +54000,56317.4201,56317.4201,56317.4201 +64500,66183.77244,66183.77244,66183.77244 +68000,69486.97316,69486.97316,69486.97316 +70000,71514.52586,71514.52586,71514.52586 +74000,75338.28603,75338.28603,75338.28603 +80000,81035.95111,81035.95111,81035.95111 +84900,85672.85577,85672.85577,85672.85577 +89000,85021.0865,94088.93683,90046.33945 +89000,90046.33945,85021.0865,94088.93683 +89000,94088.93683,90046.33945,85021.0865 +105900,100227.0936,100227.0936,100227.0936 +109000,103156.7516,103156.7516,103156.7516 +115000,108290.1277,108290.1277,108290.1277 +124500,117098.7563,117098.7563,117098.7563 +129900,115346.9796,115346.9796,115346.9796 +135000,119678.4223,119678.4223,119678.4223 +149000,131630.9478,131630.9478,131630.9478 +155800,137321.2061,137321.2061,137321.2061 +163500,143973.5639,143973.5639,143973.5639 +175000,153571.8563,153571.8563,153571.8563 +179000,148456.8866,148456.8866,148456.8866 +185600,153488.3876,153488.3876,153488.3876 +199900,165039.8271,165039.8271,165039.8271 +215000,176939.5763,176939.5763,176939.5763 +235000,192959.3127,192959.3127,192959.3127 +250000,180046.1193,180046.1193,180046.1193 +279000,200240.2442,200240.2442,200240.2442 +295000,211445.4891,211445.4891,211445.4891 \ No newline at end of file From f5be991f56a81484094275207822a52ad9aab284 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Wed, 3 Sep 2025 21:19:34 +0000 Subject: [PATCH 19/42] update test_metrics --- assesspy/tests/test_metrics.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/assesspy/tests/test_metrics.py b/assesspy/tests/test_metrics.py index 498533d..23f6e23 100644 --- a/assesspy/tests/test_metrics.py +++ b/assesspy/tests/test_metrics.py @@ -1,4 +1,3 @@ -import numpy as np import pytest as pt import assesspy as ap @@ -21,7 +20,7 @@ def test_metric_value_is_correct_ccao(self, metric, metric_val): "prd": 1.0484192615223522, "prb": 0.0024757, "mki": 0.794, - "ki": -0.062, + "ki": -0.06, } assert pt.approx(metric_val, rel=0.01) == expected[metric] @@ -88,10 +87,10 @@ def test_mki_matches_based_on_tied_sales(metric): For the quintos dataset, MKI/KI should be identical based on the ordering of estimates. """ - sample = ap.quintos_sample() + sample = ap.quintos_sample_with_tiebreaks() estimate_cols = [ c - for c in ["estimate", "estimate1", "estimate2"] + for c in ["estimate", "estimate_alt_sort_1", "estimate_alt_sort_2"] if c in sample.columns ] From fe5b4454849800ef97e23cf5df04a3a98e899354 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Thu, 4 Sep 2025 15:47:50 +0000 Subject: [PATCH 20/42] Include documentation --- assesspy/__init__.py | 2 ++ assesspy/load_data.py | 22 +++++++++++++++++++ docs/source/quintos_sample_with_tiebreaks.rst | 5 +++++ 3 files changed, 29 insertions(+) create mode 100644 docs/source/quintos_sample_with_tiebreaks.rst diff --git a/assesspy/__init__.py b/assesspy/__init__.py index 8d8652a..f12ab6e 100644 --- a/assesspy/__init__.py +++ b/assesspy/__init__.py @@ -7,6 +7,7 @@ from .load_data import ( ccao_sample, quintos_sample, + quintos_sample_with_tiebreaks, ) from .metrics import ( cod, @@ -22,3 +23,4 @@ ) from .outliers import is_outlier from .sales_chasing import is_sales_chased +import pandas as pd diff --git a/assesspy/load_data.py b/assesspy/load_data.py index dcca6c7..24c0c2b 100644 --- a/assesspy/load_data.py +++ b/assesspy/load_data.py @@ -50,3 +50,25 @@ def quintos_sample() -> pd.DataFrame: source = files("assesspy").joinpath("data/quintos_sample.csv") with as_file(source) as file: return pd.read_csv(file) + + +def quintos_sample_with_tiebreaks() -> pd.DataFrame: + """ + Sample of sales and estimated market values modified to include tiebreak situations + :return: + A Pandas DataFrame with 30 observation and 4 variables: + + ======================== ===================================================== + **estimate** (`float`) Assessed fair market value + **estimate_alt_sort_1** (`float`) Alternative sort 1 for tiebreaks + **estimate_alt_sort_2** (`float`) Alternative sort 2 for tiebreaks + **sale_price** (`float`) Recorded sale price of this property + ======================== ===================================================== + + :rtype: pd.DataFrame + """ + source = files("assesspy").joinpath( + "data/quintos_sample_with_tiebreaks.csv" + ) + with as_file(source) as file: + return pd.read_csv(file) diff --git a/docs/source/quintos_sample_with_tiebreaks.rst b/docs/source/quintos_sample_with_tiebreaks.rst new file mode 100644 index 0000000..69a1b91 --- /dev/null +++ b/docs/source/quintos_sample_with_tiebreaks.rst @@ -0,0 +1,5 @@ +================================ +Sample data from Quintos studies Modified with Tiebreaks +================================ + +.. autofunction:: assesspy.quintos_sample_with_tiebreaks From e67f91527a77a7bcbd71887c5e0127f595eb3d9f Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Thu, 4 Sep 2025 15:59:37 +0000 Subject: [PATCH 21/42] lintr --- assesspy/__init__.py | 3 ++- assesspy/tests/test_metrics.py | 1 - 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/assesspy/__init__.py b/assesspy/__init__.py index f12ab6e..4d354df 100644 --- a/assesspy/__init__.py +++ b/assesspy/__init__.py @@ -1,3 +1,5 @@ +import pandas as pd + from .ci import ( boot_ci, cod_ci, @@ -23,4 +25,3 @@ ) from .outliers import is_outlier from .sales_chasing import is_sales_chased -import pandas as pd diff --git a/assesspy/tests/test_metrics.py b/assesspy/tests/test_metrics.py index 23f6e23..7bf03d9 100644 --- a/assesspy/tests/test_metrics.py +++ b/assesspy/tests/test_metrics.py @@ -96,7 +96,6 @@ def test_mki_matches_based_on_tied_sales(metric): sales = sample["sale_price"] - # Use the first present estimate column as the reference ref_col = estimate_cols[0] ref_val = getattr(ap, metric)(sample[ref_col], sales) From 34e0b14160c792b4ee735c3878c70633129c12f1 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Thu, 4 Sep 2025 16:23:57 +0000 Subject: [PATCH 22/42] set as a fixture --- assesspy/tests/test_metrics.py | 55 ++++++++++++++++++++-------------- 1 file changed, 32 insertions(+), 23 deletions(-) diff --git a/assesspy/tests/test_metrics.py b/assesspy/tests/test_metrics.py index 7bf03d9..96fae0d 100644 --- a/assesspy/tests/test_metrics.py +++ b/assesspy/tests/test_metrics.py @@ -80,28 +80,37 @@ def test_metric_met_function_thresholds(self, metric, metric_val): } assert getattr(ap, f"{metric}_met")(metric_val) == expected[metric] - -@pt.mark.parametrize("metric", ["mki", "ki"]) -def test_mki_matches_based_on_tied_sales(metric): +@pt.fixture +def compute_quintos_tie_equal(): """ - For the quintos dataset, MKI/KI should be identical based - on the ordering of estimates. + Compute MKI/KI for the quintos tiebreak sample and assert equality + across all estimate variants. Returns the common value. """ - sample = ap.quintos_sample_with_tiebreaks() - estimate_cols = [ - c - for c in ["estimate", "estimate_alt_sort_1", "estimate_alt_sort_2"] - if c in sample.columns - ] - - sales = sample["sale_price"] - - ref_col = estimate_cols[0] - ref_val = getattr(ap, metric)(sample[ref_col], sales) - - for col in estimate_cols[1:]: - val = getattr(ap, metric)(sample[col], sales) - assert val == ref_val, ( - f"{metric.upper()} differs between {ref_col} and {col}: " - f"{ref_val} vs {val}" - ) + def _compute(metric_name: str) -> float: + + sample = ap.quintos_sample_with_tiebreaks() + estimate_cols = [ + c for c in ["estimate", "estimate_alt_sort_1", "estimate_alt_sort_2"] + if c in sample.columns + ] + sales = sample["sale_price"] + + ref_col = estimate_cols[0] + ref_val = getattr(ap, metric_name)(sample[ref_col], sales) + + for col in estimate_cols[1:]: + val = getattr(ap, metric_name)(sample[col], sales) + assert val == ref_val, ( + f"{metric_name.upper()} differs between {ref_col} and {col}: " + f"{ref_val} vs {val}" + ) + + return ref_val + + return _compute + + +@pt.mark.parametrize("metric_name", ["mki", "ki"]) +def test_quintos_tie_equal(compute_quintos_tie_equal, metric_name): + val = compute_quintos_tie_equal(metric_name) + assert isinstance(val, float) \ No newline at end of file From e7d25ac02e5c17194b9161daa0db049c190b22bf Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Thu, 4 Sep 2025 16:24:18 +0000 Subject: [PATCH 23/42] lintr --- assesspy/tests/test_metrics.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/assesspy/tests/test_metrics.py b/assesspy/tests/test_metrics.py index 96fae0d..40cd68a 100644 --- a/assesspy/tests/test_metrics.py +++ b/assesspy/tests/test_metrics.py @@ -80,17 +80,19 @@ def test_metric_met_function_thresholds(self, metric, metric_val): } assert getattr(ap, f"{metric}_met")(metric_val) == expected[metric] + @pt.fixture def compute_quintos_tie_equal(): """ Compute MKI/KI for the quintos tiebreak sample and assert equality across all estimate variants. Returns the common value. """ + def _compute(metric_name: str) -> float: - sample = ap.quintos_sample_with_tiebreaks() estimate_cols = [ - c for c in ["estimate", "estimate_alt_sort_1", "estimate_alt_sort_2"] + c + for c in ["estimate", "estimate_alt_sort_1", "estimate_alt_sort_2"] if c in sample.columns ] sales = sample["sale_price"] @@ -113,4 +115,4 @@ def _compute(metric_name: str) -> float: @pt.mark.parametrize("metric_name", ["mki", "ki"]) def test_quintos_tie_equal(compute_quintos_tie_equal, metric_name): val = compute_quintos_tie_equal(metric_name) - assert isinstance(val, float) \ No newline at end of file + assert isinstance(val, float) From 1c8918c6b2304c9534bab8da788c82426518ce85 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Thu, 4 Sep 2025 17:23:20 +0000 Subject: [PATCH 24/42] make one fixture --- assesspy/tests/test_metrics.py | 67 +++++++++++++++------------------- 1 file changed, 30 insertions(+), 37 deletions(-) diff --git a/assesspy/tests/test_metrics.py b/assesspy/tests/test_metrics.py index 40cd68a..92f7138 100644 --- a/assesspy/tests/test_metrics.py +++ b/assesspy/tests/test_metrics.py @@ -14,6 +14,36 @@ def metric_val(self, metric, ccao_data, quintos_data): return getattr(ap, metric)(*quintos_data) return getattr(ap, metric)(*ccao_data) + @pt.fixture + def quintos_tie(self, metric): + if metric not in ("mki", "ki"): + return None + + sample = ap.quintos_sample_with_tiebreaks() + estimate_cols = [ + c for c in ["estimate", "estimate_alt_sort_1", "estimate_alt_sort_2"] + if c in sample.columns + ] + sales = sample["sale_price"] + + ref_col = estimate_cols[0] + ref_val = getattr(ap, metric)(sample[ref_col], sales) + + for col in estimate_cols[1:]: + val = getattr(ap, metric)(sample[col], sales) + assert val == ref_val, ( + f"{metric.upper()} differs between {ref_col} and {col}: " + f"{ref_val} vs {val}" + ) + + return ref_val + + def test_quintos_tie(self, metric, quintos_tie): + if metric in ("mki", "ki"): + assert isinstance(quintos_tie, float) + else: + assert quintos_tie is None + def test_metric_value_is_correct_ccao(self, metric, metric_val): expected = { "cod": 17.81456901196891, @@ -79,40 +109,3 @@ def test_metric_met_function_thresholds(self, metric, metric_val): "mki": False, } assert getattr(ap, f"{metric}_met")(metric_val) == expected[metric] - - -@pt.fixture -def compute_quintos_tie_equal(): - """ - Compute MKI/KI for the quintos tiebreak sample and assert equality - across all estimate variants. Returns the common value. - """ - - def _compute(metric_name: str) -> float: - sample = ap.quintos_sample_with_tiebreaks() - estimate_cols = [ - c - for c in ["estimate", "estimate_alt_sort_1", "estimate_alt_sort_2"] - if c in sample.columns - ] - sales = sample["sale_price"] - - ref_col = estimate_cols[0] - ref_val = getattr(ap, metric_name)(sample[ref_col], sales) - - for col in estimate_cols[1:]: - val = getattr(ap, metric_name)(sample[col], sales) - assert val == ref_val, ( - f"{metric_name.upper()} differs between {ref_col} and {col}: " - f"{ref_val} vs {val}" - ) - - return ref_val - - return _compute - - -@pt.mark.parametrize("metric_name", ["mki", "ki"]) -def test_quintos_tie_equal(compute_quintos_tie_equal, metric_name): - val = compute_quintos_tie_equal(metric_name) - assert isinstance(val, float) From 0c5692bba8d6a737c1cd83cf376c8eed8d45a426 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Thu, 4 Sep 2025 17:25:02 +0000 Subject: [PATCH 25/42] make one test --- assesspy/tests/test_metrics.py | 20 ++------------------ 1 file changed, 2 insertions(+), 18 deletions(-) diff --git a/assesspy/tests/test_metrics.py b/assesspy/tests/test_metrics.py index 92f7138..bab6c3b 100644 --- a/assesspy/tests/test_metrics.py +++ b/assesspy/tests/test_metrics.py @@ -21,7 +21,8 @@ def quintos_tie(self, metric): sample = ap.quintos_sample_with_tiebreaks() estimate_cols = [ - c for c in ["estimate", "estimate_alt_sort_1", "estimate_alt_sort_2"] + c + for c in ["estimate", "estimate_alt_sort_1", "estimate_alt_sort_2"] if c in sample.columns ] sales = sample["sale_price"] @@ -35,25 +36,8 @@ def quintos_tie(self, metric): f"{metric.upper()} differs between {ref_col} and {col}: " f"{ref_val} vs {val}" ) - return ref_val - def test_quintos_tie(self, metric, quintos_tie): - if metric in ("mki", "ki"): - assert isinstance(quintos_tie, float) - else: - assert quintos_tie is None - - def test_metric_value_is_correct_ccao(self, metric, metric_val): - expected = { - "cod": 17.81456901196891, - "prd": 1.0484192615223522, - "prb": 0.0024757, - "mki": 0.794, - "ki": -0.06, - } - assert pt.approx(metric_val, rel=0.01) == expected[metric] - def test_metric_value_is_correct_iaao( self, metric, iaao_data_name, iaao_data ): From 3fcc03bbe5f466bf67b2996061861a91f72158c8 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Thu, 4 Sep 2025 17:31:24 +0000 Subject: [PATCH 26/42] re-add stray delete --- assesspy/tests/test_metrics.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/assesspy/tests/test_metrics.py b/assesspy/tests/test_metrics.py index bab6c3b..ecd5898 100644 --- a/assesspy/tests/test_metrics.py +++ b/assesspy/tests/test_metrics.py @@ -14,6 +14,16 @@ def metric_val(self, metric, ccao_data, quintos_data): return getattr(ap, metric)(*quintos_data) return getattr(ap, metric)(*ccao_data) + def test_metric_value_is_correct_ccao(self, metric, metric_val): + expected = { + "cod": 17.81456901196891, + "prd": 1.0484192615223522, + "prb": 0.0024757, + "mki": 0.794, + "ki": -0.06, + } + assert pt.approx(metric_val, rel=0.01) == expected[metric] + @pt.fixture def quintos_tie(self, metric): if metric not in ("mki", "ki"): From 455085d3b2c5e2b6ff517598ab080b3d925f041c Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Thu, 4 Sep 2025 17:36:43 +0000 Subject: [PATCH 27/42] remove unneeded pandas --- assesspy/__init__.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/assesspy/__init__.py b/assesspy/__init__.py index 4d354df..a6245ba 100644 --- a/assesspy/__init__.py +++ b/assesspy/__init__.py @@ -1,5 +1,3 @@ -import pandas as pd - from .ci import ( boot_ci, cod_ci, From aa7dd08989f77a1a6376e03602b6547c56c93311 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Thu, 4 Sep 2025 18:17:16 +0000 Subject: [PATCH 28/42] Add parametized test --- assesspy/tests/test_metrics.py | 52 ++++++++++++++++++---------------- 1 file changed, 28 insertions(+), 24 deletions(-) diff --git a/assesspy/tests/test_metrics.py b/assesspy/tests/test_metrics.py index ecd5898..795e5f4 100644 --- a/assesspy/tests/test_metrics.py +++ b/assesspy/tests/test_metrics.py @@ -24,30 +24,6 @@ def test_metric_value_is_correct_ccao(self, metric, metric_val): } assert pt.approx(metric_val, rel=0.01) == expected[metric] - @pt.fixture - def quintos_tie(self, metric): - if metric not in ("mki", "ki"): - return None - - sample = ap.quintos_sample_with_tiebreaks() - estimate_cols = [ - c - for c in ["estimate", "estimate_alt_sort_1", "estimate_alt_sort_2"] - if c in sample.columns - ] - sales = sample["sale_price"] - - ref_col = estimate_cols[0] - ref_val = getattr(ap, metric)(sample[ref_col], sales) - - for col in estimate_cols[1:]: - val = getattr(ap, metric)(sample[col], sales) - assert val == ref_val, ( - f"{metric.upper()} differs between {ref_col} and {col}: " - f"{ref_val} vs {val}" - ) - return ref_val - def test_metric_value_is_correct_iaao( self, metric, iaao_data_name, iaao_data ): @@ -81,6 +57,34 @@ def test_metric_value_is_correct_iaao( pt.approx(result, rel=0.02) == expected[iaao_data_name][metric] ) + @pt.fixture + def quintos_tie(self, metric): + if metric not in ("mki", "ki"): + return None + + sample = ap.quintos_sample_with_tiebreaks() + estimate_cols = [ + c + for c in ["estimate", "estimate_alt_sort_1", "estimate_alt_sort_2"] + if c in sample.columns + ] + sales = sample["sale_price"] + + ref_col = estimate_cols[0] + ref_val = getattr(ap, metric)(sample[ref_col], sales) + + for col in estimate_cols[1:]: + val = getattr(ap, metric)(sample[col], sales) + assert val == ref_val, ( + f"{metric.upper()} differs between {ref_col} and {col}: " + f"{ref_val} vs {val}" + ) + return ref_val + + @pt.mark.parametrize("metric", ["mki", "ki"]) + def test_quintos_tiebreaks_consistent(metric, quintos_tie): + assert True + def test_metric_has_numeric_output(self, metric_val): assert type(metric_val) is float From c18b8ec5f95383194437b37322d0718e752f4001 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Thu, 4 Sep 2025 18:24:53 +0000 Subject: [PATCH 29/42] rename --- assesspy/tests/test_metrics.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/assesspy/tests/test_metrics.py b/assesspy/tests/test_metrics.py index 795e5f4..883718d 100644 --- a/assesspy/tests/test_metrics.py +++ b/assesspy/tests/test_metrics.py @@ -58,7 +58,7 @@ def test_metric_value_is_correct_iaao( ) @pt.fixture - def quintos_tie(self, metric): + def mki_tie(self, metric): if metric not in ("mki", "ki"): return None @@ -82,7 +82,7 @@ def quintos_tie(self, metric): return ref_val @pt.mark.parametrize("metric", ["mki", "ki"]) - def test_quintos_tiebreaks_consistent(metric, quintos_tie): + def test_mki_tiebreaks_consistent(metric, mki_tie): assert True def test_metric_has_numeric_output(self, metric_val): From a3b96a21a213c4447e88ed61b8e653a659535ca3 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Thu, 4 Sep 2025 21:26:35 +0000 Subject: [PATCH 30/42] Add to conftest --- assesspy/tests/conftest.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/assesspy/tests/conftest.py b/assesspy/tests/conftest.py index 725e6d4..e137f99 100644 --- a/assesspy/tests/conftest.py +++ b/assesspy/tests/conftest.py @@ -26,6 +26,16 @@ def quintos_data() -> tuple: sample = ap.quintos_sample() return sample.estimate, sample.sale_price +@pt.fixture(scope="session") +def quintos_data_with_tiebreaks() -> tuple: + sample = ap.quintos_sample() + return ( + sample.estimate, + sample.estimate_alt_sort_1, + sample.estimate_alt_sort_2, + sample.sale_price, + ) + @pt.fixture(scope="session", params=["1_1", "1_4", "d_1", "d_2"]) def iaao_data_name(request): From 31dc6ebc36288187b4fe93f3ff990a241e1e49f5 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Wed, 10 Sep 2025 18:17:23 +0000 Subject: [PATCH 31/42] update conftest --- assesspy/tests/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assesspy/tests/conftest.py b/assesspy/tests/conftest.py index e137f99..41c6bd9 100644 --- a/assesspy/tests/conftest.py +++ b/assesspy/tests/conftest.py @@ -28,7 +28,7 @@ def quintos_data() -> tuple: @pt.fixture(scope="session") def quintos_data_with_tiebreaks() -> tuple: - sample = ap.quintos_sample() + sample = ap.quintos_sample_with_tiebreaks() return ( sample.estimate, sample.estimate_alt_sort_1, From 6574b2a596ff76d588df1b6244ca590d62322616 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Wed, 10 Sep 2025 18:31:44 +0000 Subject: [PATCH 32/42] lintr --- assesspy/tests/conftest.py | 1 + 1 file changed, 1 insertion(+) diff --git a/assesspy/tests/conftest.py b/assesspy/tests/conftest.py index 41c6bd9..87f172e 100644 --- a/assesspy/tests/conftest.py +++ b/assesspy/tests/conftest.py @@ -26,6 +26,7 @@ def quintos_data() -> tuple: sample = ap.quintos_sample() return sample.estimate, sample.sale_price + @pt.fixture(scope="session") def quintos_data_with_tiebreaks() -> tuple: sample = ap.quintos_sample_with_tiebreaks() From db9823d48e2864588f3d753f016fedaba10060d3 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Thu, 11 Sep 2025 20:57:05 +0000 Subject: [PATCH 33/42] record commenting --- assesspy/tests/test_metrics.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/assesspy/tests/test_metrics.py b/assesspy/tests/test_metrics.py index 883718d..8165b92 100644 --- a/assesspy/tests/test_metrics.py +++ b/assesspy/tests/test_metrics.py @@ -61,7 +61,7 @@ def test_metric_value_is_correct_iaao( def mki_tie(self, metric): if metric not in ("mki", "ki"): return None - +# This block is just reformatting the data sample = ap.quintos_sample_with_tiebreaks() estimate_cols = [ c @@ -69,7 +69,7 @@ def mki_tie(self, metric): if c in sample.columns ] sales = sample["sale_price"] - +# This block here is testing the values ref_col = estimate_cols[0] ref_val = getattr(ap, metric)(sample[ref_col], sales) @@ -80,9 +80,14 @@ def mki_tie(self, metric): f"{ref_val} vs {val}" ) return ref_val + + # We need to adapt the code above so that it's using the shape of the data in the quintos data with tiebreaks. + # Fixture just returns the data and we should just reference the column indexes rather than column names + # The parmetize should test the process and test the data @pt.mark.parametrize("metric", ["mki", "ki"]) - def test_mki_tiebreaks_consistent(metric, mki_tie): + def test_mki_tiebreaks_consistent(metric, quintos_data_with_tiebreaks): + estimate_cols = [quintos_data_with_tiebreaks] assert True def test_metric_has_numeric_output(self, metric_val): From 53f52a0f112b636cc72c6e398070a648420e488e Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Thu, 11 Sep 2025 20:59:20 +0000 Subject: [PATCH 34/42] more commenting --- assesspy/tests/test_metrics.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/assesspy/tests/test_metrics.py b/assesspy/tests/test_metrics.py index 8165b92..3827ec8 100644 --- a/assesspy/tests/test_metrics.py +++ b/assesspy/tests/test_metrics.py @@ -72,7 +72,7 @@ def mki_tie(self, metric): # This block here is testing the values ref_col = estimate_cols[0] ref_val = getattr(ap, metric)(sample[ref_col], sales) - +# Assert calls should always be in the test functions for col in estimate_cols[1:]: val = getattr(ap, metric)(sample[col], sales) assert val == ref_val, ( @@ -84,6 +84,7 @@ def mki_tie(self, metric): # We need to adapt the code above so that it's using the shape of the data in the quintos data with tiebreaks. # Fixture just returns the data and we should just reference the column indexes rather than column names # The parmetize should test the process and test the data + # In this case we don't need the fixture in this file since its in conftest @pt.mark.parametrize("metric", ["mki", "ki"]) def test_mki_tiebreaks_consistent(metric, quintos_data_with_tiebreaks): From 24a07dece8d8e16d25f466a4eb5f2d8193eea01e Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Thu, 11 Sep 2025 21:21:39 +0000 Subject: [PATCH 35/42] move everything to test --- assesspy/tests/conftest.py | 2 +- assesspy/tests/test_metrics.py | 43 ++++++++++++++-------------------- 2 files changed, 18 insertions(+), 27 deletions(-) diff --git a/assesspy/tests/conftest.py b/assesspy/tests/conftest.py index 87f172e..61f335a 100644 --- a/assesspy/tests/conftest.py +++ b/assesspy/tests/conftest.py @@ -31,10 +31,10 @@ def quintos_data() -> tuple: def quintos_data_with_tiebreaks() -> tuple: sample = ap.quintos_sample_with_tiebreaks() return ( + sample.sale_price, sample.estimate, sample.estimate_alt_sort_1, sample.estimate_alt_sort_2, - sample.sale_price, ) diff --git a/assesspy/tests/test_metrics.py b/assesspy/tests/test_metrics.py index 3827ec8..6e7a454 100644 --- a/assesspy/tests/test_metrics.py +++ b/assesspy/tests/test_metrics.py @@ -61,35 +61,26 @@ def test_metric_value_is_correct_iaao( def mki_tie(self, metric): if metric not in ("mki", "ki"): return None -# This block is just reformatting the data - sample = ap.quintos_sample_with_tiebreaks() - estimate_cols = [ - c - for c in ["estimate", "estimate_alt_sort_1", "estimate_alt_sort_2"] - if c in sample.columns - ] - sales = sample["sale_price"] -# This block here is testing the values - ref_col = estimate_cols[0] - ref_val = getattr(ap, metric)(sample[ref_col], sales) -# Assert calls should always be in the test functions - for col in estimate_cols[1:]: - val = getattr(ap, metric)(sample[col], sales) + + @pt.mark.parametrize("metric", ["mki", "ki"]) + def test_mki_tiebreaks_consistent( + self, metric, quintos_data_with_tiebreaks + ): + sale_price, estimate, estimate_alt_sort_1, estimate_alt_sort_2 = ( + quintos_data_with_tiebreaks + ) + fn = getattr(ap, metric) + + ref_val = fn(estimate, sale_price) + + for idx, est in enumerate( + (estimate_alt_sort_1, estimate_alt_sort_2), start=1 + ): + val = fn(est, sale_price) assert val == ref_val, ( - f"{metric.upper()} differs between {ref_col} and {col}: " + f"{metric.upper()} differs between estimate[0] and estimate_alt_sort_{idx}: " f"{ref_val} vs {val}" ) - return ref_val - - # We need to adapt the code above so that it's using the shape of the data in the quintos data with tiebreaks. - # Fixture just returns the data and we should just reference the column indexes rather than column names - # The parmetize should test the process and test the data - # In this case we don't need the fixture in this file since its in conftest - - @pt.mark.parametrize("metric", ["mki", "ki"]) - def test_mki_tiebreaks_consistent(metric, quintos_data_with_tiebreaks): - estimate_cols = [quintos_data_with_tiebreaks] - assert True def test_metric_has_numeric_output(self, metric_val): assert type(metric_val) is float From bde3a7755dd994bfbf061eff1ff333374b6c2468 Mon Sep 17 00:00:00 2001 From: Damonamajor <56321109+Damonamajor@users.noreply.github.com> Date: Thu, 11 Sep 2025 16:26:52 -0500 Subject: [PATCH 36/42] Update load_data.py --- assesspy/load_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assesspy/load_data.py b/assesspy/load_data.py index 24c0c2b..417b23f 100644 --- a/assesspy/load_data.py +++ b/assesspy/load_data.py @@ -59,10 +59,10 @@ def quintos_sample_with_tiebreaks() -> pd.DataFrame: A Pandas DataFrame with 30 observation and 4 variables: ======================== ===================================================== + **sale_price** (`float`) Recorded sale price of this property **estimate** (`float`) Assessed fair market value **estimate_alt_sort_1** (`float`) Alternative sort 1 for tiebreaks **estimate_alt_sort_2** (`float`) Alternative sort 2 for tiebreaks - **sale_price** (`float`) Recorded sale price of this property ======================== ===================================================== :rtype: pd.DataFrame From 132d80349d78326b41b77de9a2322a1194d2e52b Mon Sep 17 00:00:00 2001 From: Damonamajor <56321109+Damonamajor@users.noreply.github.com> Date: Thu, 11 Sep 2025 16:27:42 -0500 Subject: [PATCH 37/42] Update test_metrics.py --- assesspy/tests/test_metrics.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/assesspy/tests/test_metrics.py b/assesspy/tests/test_metrics.py index 6e7a454..1fde9b8 100644 --- a/assesspy/tests/test_metrics.py +++ b/assesspy/tests/test_metrics.py @@ -57,11 +57,6 @@ def test_metric_value_is_correct_iaao( pt.approx(result, rel=0.02) == expected[iaao_data_name][metric] ) - @pt.fixture - def mki_tie(self, metric): - if metric not in ("mki", "ki"): - return None - @pt.mark.parametrize("metric", ["mki", "ki"]) def test_mki_tiebreaks_consistent( self, metric, quintos_data_with_tiebreaks From 5861e4b34fe7cf2a8d810ee35c311f1c4314ceb9 Mon Sep 17 00:00:00 2001 From: Damonamajor <56321109+Damonamajor@users.noreply.github.com> Date: Fri, 12 Sep 2025 14:34:01 -0500 Subject: [PATCH 38/42] Update assesspy/load_data.py Co-authored-by: Jean Cochrane --- assesspy/load_data.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/assesspy/load_data.py b/assesspy/load_data.py index 417b23f..2c48c61 100644 --- a/assesspy/load_data.py +++ b/assesspy/load_data.py @@ -54,7 +54,10 @@ def quintos_sample() -> pd.DataFrame: def quintos_sample_with_tiebreaks() -> pd.DataFrame: """ - Sample of sales and estimated market values modified to include tiebreak situations + Modified version of the Quintos sample of sales and estimated market values + that can be used to ensure that MKI/KI implementations are consistent when + some sales have the same sale price but different estimates. + :return: A Pandas DataFrame with 30 observation and 4 variables: From 7ab91cbac5994e6ab09f8e64699843828f2a3a40 Mon Sep 17 00:00:00 2001 From: Damonamajor <56321109+Damonamajor@users.noreply.github.com> Date: Fri, 12 Sep 2025 14:34:09 -0500 Subject: [PATCH 39/42] Update assesspy/load_data.py Co-authored-by: Jean Cochrane --- assesspy/load_data.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/assesspy/load_data.py b/assesspy/load_data.py index 2c48c61..ea8450f 100644 --- a/assesspy/load_data.py +++ b/assesspy/load_data.py @@ -64,8 +64,8 @@ def quintos_sample_with_tiebreaks() -> pd.DataFrame: ======================== ===================================================== **sale_price** (`float`) Recorded sale price of this property **estimate** (`float`) Assessed fair market value - **estimate_alt_sort_1** (`float`) Alternative sort 1 for tiebreaks - **estimate_alt_sort_2** (`float`) Alternative sort 2 for tiebreaks + **estimate_alt_sort_1** (`float`) Alternative FMV sort 1 for testing tiebreaks + **estimate_alt_sort_2** (`float`) Alternative FMV sort 2 for testing tiebreaks ======================== ===================================================== :rtype: pd.DataFrame From cb51fc4fdfc86030ec93a5322cf75452b25b18df Mon Sep 17 00:00:00 2001 From: Damonamajor <56321109+Damonamajor@users.noreply.github.com> Date: Fri, 12 Sep 2025 14:34:17 -0500 Subject: [PATCH 40/42] Update assesspy/metrics.py Co-authored-by: Jean Cochrane --- assesspy/metrics.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/assesspy/metrics.py b/assesspy/metrics.py index 9313e70..43d21db 100644 --- a/assesspy/metrics.py +++ b/assesspy/metrics.py @@ -211,7 +211,15 @@ def _calculate_gini( .reset_index(drop=True) ) df = pd.concat([estimate, sale_price], axis=1) - # Mergesort is required for stable sort results + # This Gini coefficient algorithm is sensitive to the order of the input + # observations: If multiple observations share the same sale price but have + # different estimates, the output coefficients will be different depending + # on which of the sales with identical prices gets ordered first in the + # input dataframe. To ensure a stable sort order, Quintos recommends + # sorting by ascending sale price and then by descending estimate to break + # any ties. This produces "worst case" MKI/KI statistics, but ensures those + # statistics are deterministic. See this issue for more discussion: + # https://github.com/ccao-data/assesspy/issues/33#issuecomment-3180632954 df.sort_values( by=["sale_price", "estimate"], ascending=[True, False], From 8bcdf4157f72cd1956de4f30f4ed6f022901b1ba Mon Sep 17 00:00:00 2001 From: Damonamajor <56321109+Damonamajor@users.noreply.github.com> Date: Fri, 12 Sep 2025 14:34:23 -0500 Subject: [PATCH 41/42] Update docs/source/quintos_sample_with_tiebreaks.rst Co-authored-by: Jean Cochrane --- docs/source/quintos_sample_with_tiebreaks.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/quintos_sample_with_tiebreaks.rst b/docs/source/quintos_sample_with_tiebreaks.rst index 69a1b91..dac9530 100644 --- a/docs/source/quintos_sample_with_tiebreaks.rst +++ b/docs/source/quintos_sample_with_tiebreaks.rst @@ -1,5 +1,5 @@ ================================ -Sample data from Quintos studies Modified with Tiebreaks +Sample data from Quintos studies, modified to test sort order tiebreaks ================================ .. autofunction:: assesspy.quintos_sample_with_tiebreaks From 8d4d9950835b3c7e440b034749f5fd63cb993425 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Fri, 12 Sep 2025 19:44:37 +0000 Subject: [PATCH 42/42] lintr --- assesspy/load_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assesspy/load_data.py b/assesspy/load_data.py index ea8450f..9a569be 100644 --- a/assesspy/load_data.py +++ b/assesspy/load_data.py @@ -57,7 +57,7 @@ def quintos_sample_with_tiebreaks() -> pd.DataFrame: Modified version of the Quintos sample of sales and estimated market values that can be used to ensure that MKI/KI implementations are consistent when some sales have the same sale price but different estimates. - + :return: A Pandas DataFrame with 30 observation and 4 variables: