From 89a62d1d0f10b9dc037bf2fb324269c06cf03045 Mon Sep 17 00:00:00 2001 From: Herald-TUOS Date: Wed, 6 May 2026 15:34:07 +0100 Subject: [PATCH 1/2] deduplicate sjoin results for points on shared region boundaries while reverse geocoding --- geocode/utilities.py | 1 + 1 file changed, 1 insertion(+) diff --git a/geocode/utilities.py b/geocode/utilities.py index ce69746..ec84f10 100644 --- a/geocode/utilities.py +++ b/geocode/utilities.py @@ -265,6 +265,7 @@ def reverse_geocode( ).to_crs(regions.crs) regions.set_index("region_id", inplace=True) joined = regions.sjoin(coords, how="right").to_crs(regions.crs) + joined = joined[~joined.index.duplicated(keep="first")] # Perform sjoin nearest on coords that wasn't reverse-geocoded to a region na_geolocations = joined[joined["region_id"].isna()].copy() if not na_geolocations.empty and max_distance is not None: From 36962718c18703beca56b53064b130e845260558 Mon Sep 17 00:00:00 2001 From: Herald-TUOS Date: Wed, 6 May 2026 16:10:20 +0100 Subject: [PATCH 2/2] add a comment to explain deduplication of reverse geocoded results --- geocode/utilities.py | 1 + 1 file changed, 1 insertion(+) diff --git a/geocode/utilities.py b/geocode/utilities.py index ec84f10..73e78ac 100644 --- a/geocode/utilities.py +++ b/geocode/utilities.py @@ -265,6 +265,7 @@ def reverse_geocode( ).to_crs(regions.crs) regions.set_index("region_id", inplace=True) joined = regions.sjoin(coords, how="right").to_crs(regions.crs) + # Remove coords which fall on the boundary of multiple regions by deduplicating joined = joined[~joined.index.duplicated(keep="first")] # Perform sjoin nearest on coords that wasn't reverse-geocoded to a region na_geolocations = joined[joined["region_id"].isna()].copy()