From 15fc24fe9f6ffeb807b7cc3e50ad9569853f918f Mon Sep 17 00:00:00 2001 From: Mvin8 Date: Mon, 22 Sep 2025 14:45:35 +0300 Subject: [PATCH 1/4] =?UTF-8?q?tyle:=20=D0=BF=D1=80=D0=B8=D0=BC=D0=B5?= =?UTF-8?q?=D0=BD=D0=B8=D1=82=D1=8C=20black=20=D0=BA=D0=BE=20=D0=B2=D1=81?= =?UTF-8?q?=D0=B5=D0=BC=20Python-=D1=84=D0=B0=D0=B9=D0=BB=D0=B0=D0=BC.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- popframe/method/agglomeration.py | 142 ++++++------- popframe/method/anchor_settlement.py | 198 +++++++++--------- popframe/method/city_evaluation.py | 161 ++++++-------- popframe/method/engineer.py | 33 +-- popframe/method/landuse_assessment.py | 152 +++++++++----- popframe/method/popuation_frame.py | 21 +- popframe/method/spatial_inequality.py | 51 ++--- popframe/method/territory_evaluation.py | 113 +++++----- popframe/models/geodataframe.py | 15 +- popframe/models/region.py | 74 +++---- popframe/models/town.py | 16 +- popframe/preprocessing/__init__.py | 2 +- .../preprocessing/adjacency_calculator.py | 14 +- popframe/preprocessing/level_filler.py | 10 +- popframe/preprocessing/population_filler.py | 39 ++-- popframe/utils/const.py | 90 ++++---- 16 files changed, 579 insertions(+), 552 deletions(-) diff --git a/popframe/method/agglomeration.py b/popframe/method/agglomeration.py index 5bfcd13..0a5e0b0 100644 --- a/popframe/method/agglomeration.py +++ b/popframe/method/agglomeration.py @@ -11,13 +11,13 @@ ) CITY_LEVELS = [ - "Малый город", - "Средний город", - "Большой город", - "Крупный город", - "Крупнейший город", - "Сверхкрупный город" - ] + "Малый город", + "Средний город", + "Большой город", + "Крупный город", + "Крупнейший город", + "Сверхкрупный город", +] class AgglomerationBuilder(BaseMethod): @@ -31,22 +31,22 @@ class AgglomerationBuilder(BaseMethod): def _build_agglomeration(self, towns: gpd.GeoDataFrame, time: int) -> gpd.GeoDataFrame: """ Builds agglomerations for cities based on the accessibility matrix and travel time. - + Parameters: - time_threshold: Maximum travel time to consider cities in the same agglomeration (default is 80 minutes). - + Returns: - A GeoDataFrame of the agglomerations. """ - node_population = towns.set_index('id')['population'] - node_names = towns.set_index('id')['name'] + node_population = towns.set_index("id")["population"] + node_names = towns.set_index("id")["name"] agglomerations = [] for level_index, level in enumerate(reversed(CITY_LEVELS)): max_time = time - 10 * level_index - level_nodes = towns[towns['level'] == level].sort_values(by='population', ascending=False) + level_nodes = towns[towns["level"] == level].sort_values(by="population", ascending=False) - for node, population in level_nodes[['id', 'population']].itertuples(index=False): + for node, population in level_nodes[["id", "population"]].itertuples(index=False): if node in self._in_agglomeration or population < self.min_population: continue @@ -61,9 +61,7 @@ def _build_agglomeration(self, towns: gpd.GeoDataFrame, time: int) -> gpd.GeoDat self._in_agglomeration[member_node] = True if agglomerations: - agglomeration_gdf = gpd.GeoDataFrame( - agglomerations, columns=["name", "geometry"] - ).set_geometry('geometry') + agglomeration_gdf = gpd.GeoDataFrame(agglomerations, columns=["name", "geometry"]).set_geometry("geometry") agglomeration_gdf.set_crs(self.region.crs, inplace=True) else: agglomeration_gdf = gpd.GeoDataFrame(columns=["name", "geometry"]) @@ -73,11 +71,11 @@ def _build_agglomeration(self, towns: gpd.GeoDataFrame, time: int) -> gpd.GeoDat def _get_agglomeration_around_node(self, start_node: int, max_time: int, towns: gpd.GeoDataFrame) -> Optional[dict]: """ Finds the agglomeration around a given city node within the specified time limit using the accessibility matrix. - + Parameters: - start_node: The node to start agglomeration search from. - max_time: The maximum time to travel from the start node. - + Returns: - A dictionary containing the geometry of the agglomeration and the nodes within it. """ @@ -90,28 +88,25 @@ def _get_agglomeration_around_node(self, start_node: int, max_time: int, towns: if within_time_nodes.empty: return None - nodes_data = towns.set_index('id').loc[within_time_nodes] - nodes_data['geometry'] = nodes_data.apply(lambda row: Point(row['geometry'].x, row['geometry'].y), axis=1) - nodes_gdf = gpd.GeoDataFrame(nodes_data, geometry='geometry', crs=self.region.crs) + nodes_data = towns.set_index("id").loc[within_time_nodes] + nodes_data["geometry"] = nodes_data.apply(lambda row: Point(row["geometry"].x, row["geometry"].y), axis=1) + nodes_gdf = gpd.GeoDataFrame(nodes_data, geometry="geometry", crs=self.region.crs) # Calculate the remaining distance buffer distance = {node: (max_time - distances_from_start[node]) * self.radius_m_per_min for node in within_time_nodes} nodes_gdf["left_distance"] = nodes_gdf.index.map(distance) agglomeration_geom = nodes_gdf.buffer(nodes_gdf["left_distance"]).unary_union - return { - "geometry": agglomeration_geom, - "nodes_in_agglomeration": list(within_time_nodes) - } + return {"geometry": agglomeration_geom, "nodes_in_agglomeration": list(within_time_nodes)} def _merge_intersecting_agglomerations(self, gdf: gpd.GeoDataFrame, towns: gpd.GeoDataFrame) -> gpd.GeoDataFrame: """ Merges intersecting agglomerations into larger polygons and sums up the population. - + Parameters: - gdf: GeoDataFrame of the agglomerations. - towns: GeoDataFrame of the towns. - + Returns: - A GeoDataFrame of the merged agglomerations with updated population data and agglomeration level. """ @@ -124,16 +119,16 @@ def _merge_intersecting_agglomerations(self, gdf: gpd.GeoDataFrame, towns: gpd.G continue overlapping_agglomerations = [row_i] - geometry = row_i['geometry'] - merged_names = {row_i['name']} + geometry = row_i["geometry"] + merged_names = {row_i["name"]} # Первый цикл: проверка пересечений и объединение агломераций for j, row_j in gdf.iterrows(): if i != j and j not in processed_indices: - if geometry.intersects(row_j['geometry']): + if geometry.intersects(row_j["geometry"]): overlapping_agglomerations.append(row_j) - geometry = unary_union([geometry, row_j['geometry']]) # Используем unary_union - merged_names.add(row_j['name']) + geometry = unary_union([geometry, row_j["geometry"]]) # Используем unary_union + merged_names.add(row_j["name"]) processed_indices.add(j) # Второй цикл: дополнительная проверка для объединения с новыми полигонами @@ -142,10 +137,10 @@ def _merge_intersecting_agglomerations(self, gdf: gpd.GeoDataFrame, towns: gpd.G still_intersecting = False for j, row_j in gdf.iterrows(): if j not in processed_indices: - if geometry.intersects(row_j['geometry']): + if geometry.intersects(row_j["geometry"]): overlapping_agglomerations.append(row_j) - geometry = unary_union([geometry, row_j['geometry']]) # Используем unary_union - merged_names.add(row_j['name']) + geometry = unary_union([geometry, row_j["geometry"]]) # Используем unary_union + merged_names.add(row_j["name"]) processed_indices.add(j) still_intersecting = True @@ -154,7 +149,7 @@ def _merge_intersecting_agglomerations(self, gdf: gpd.GeoDataFrame, towns: gpd.G geometry = geometry.buffer(0) towns_in_agglomeration = towns[towns.intersects(geometry)] - population_from_towns = towns_in_agglomeration['population'].sum() + population_from_towns = towns_in_agglomeration["population"].sum() # Определение уровня агломерации на основе населения if population_from_towns <= 250000: @@ -169,11 +164,11 @@ def _merge_intersecting_agglomerations(self, gdf: gpd.GeoDataFrame, towns: gpd.G agglomeration_level = 5 merged_agglomeration = { - 'geometry': geometry, - 'type': 'Polycentric' if len(merged_names) > 1 else 'Monocentric', - 'core_cities': ', '.join(merged_names), - 'population': population_from_towns, - 'agglomeration_level': agglomeration_level + "geometry": geometry, + "type": "Polycentric" if len(merged_names) > 1 else "Monocentric", + "core_cities": ", ".join(merged_names), + "population": population_from_towns, + "agglomeration_level": agglomeration_level, } merged_geometries.append(merged_agglomeration) @@ -181,35 +176,35 @@ def _merge_intersecting_agglomerations(self, gdf: gpd.GeoDataFrame, towns: gpd.G return gpd.GeoDataFrame(merged_geometries, crs=gdf.crs) - def _simplify_multipolygons(self, gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame: """ Simplifies multipolygons to keep only the largest polygon for each agglomeration. - + Parameters: - gdf: GeoDataFrame containing the agglomeration geometries. - + Returns: - A GeoDataFrame with simplified geometries. """ - gdf['geometry'] = gdf['geometry'].apply( + gdf["geometry"] = gdf["geometry"].apply( lambda geom: max(geom.geoms, key=lambda g: g.area) if isinstance(geom, MultiPolygon) else geom ) return gdf - - def evaluate_city_agglomeration_status(self, towns: gpd.GeoDataFrame, agglomeration_gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame: + def evaluate_city_agglomeration_status( + self, towns: gpd.GeoDataFrame, agglomeration_gdf: gpd.GeoDataFrame + ) -> gpd.GeoDataFrame: """ Evaluates cities according to their position in the agglomeration. - + Adds the 'agglomeration_status' and 'agglomeration_level' attributes to towns: - 'agglomeration_status': 'Outside the agglomeration', 'Agglomeration Center', 'In the agglomeration'. - 'agglomeration_level': 0 for cities outside agglomerations, and agglomeration level (1 to 5) for cities within agglomerations. - + Parameters: - towns: GeoDataFrame with cities. - agglomeration_gdf: GeoDataFrame with agglomerations. - + Returns: - Updated GeoDataFrame with added 'agglomeration_status' (str) and 'agglomeration_level' (int) attributes. """ @@ -217,77 +212,76 @@ def evaluate_city_agglomeration_status(self, towns: gpd.GeoDataFrame, agglomerat agglomeration_level = [] for idx, town in towns.iterrows(): - town_point = town['geometry'] - town_name = town['name'] - + town_point = town["geometry"] + town_name = town["name"] + in_agglomeration = False is_core_city = False current_agglomeration_level = 0 # Default level for towns outside agglomerations for agg_idx, agg in agglomeration_gdf.iterrows(): - if town_point.intersects(agg['geometry']): + if town_point.intersects(agg["geometry"]): in_agglomeration = True - current_agglomeration_level = agg['agglomeration_level'] + current_agglomeration_level = agg["agglomeration_level"] # Проверяем, является ли город основным в агломерации - core_cities = agg['core_cities'].split(', ') + core_cities = agg["core_cities"].split(", ") if town_name in core_cities: is_core_city = True - current_status = 'Центр агломерации' + current_status = "Центр агломерации" break if is_core_city: - agglomeration_status.append('Центр агломерации') + agglomeration_status.append("Центр агломерации") agglomeration_level.append(current_agglomeration_level) elif not in_agglomeration: - agglomeration_status.append('Вне агломерации') + agglomeration_status.append("Вне агломерации") agglomeration_level.append(0) # 0 for cities outside agglomerations else: - agglomeration_status.append('В агломерации') + agglomeration_status.append("В агломерации") agglomeration_level.append(current_agglomeration_level) - towns['agglomeration_status'] = agglomeration_status - towns['agglomeration_level'] = agglomeration_level + towns["agglomeration_status"] = agglomeration_status + towns["agglomeration_level"] = agglomeration_level return towns - def get_agglomerations(self, update_df: Optional[pd.DataFrame] = None, time: int = 80) -> gpd.GeoDataFrame: """ The main function that orchestrates the creation, merging, and finalization of agglomerations. - + Returns: - A GeoDataFrame with the finalized agglomerations, merged, simplified, and overlaid on region boundaries. """ towns = self.region.get_update_towns_gdf(update_df) - + if towns is None or len(towns) < 2: raise ValueError("Для построения агломерации требуется минимум два города.") - + # Проверяем, что хотя бы у двух городов есть ненулевое население - valid_pop = towns['population'].notnull() & (towns['population'] > 0) + valid_pop = towns["population"].notnull() & (towns["population"] > 0) if valid_pop.sum() < 2: raise ValueError("Требуются данные о населении минимум у двух разных городов.") - + if time < 50: print("Минимально допустимое значение параметра 'time' — 50 минут. Заменяю на 50.") time = 50 - + region_boundary = self.region.region # Step 1: Build agglomerations agglomeration_gdf = self._build_agglomeration(towns, time) - + # Step 2: Merge intersecting agglomerations and update population data agglomeration_gdf = self._simplify_multipolygons(agglomeration_gdf) - + # Step 3: Overlay agglomerations on region boundaries agglomeration_gdf = self._merge_intersecting_agglomerations(agglomeration_gdf, towns) # Step 4: Simplify multipolygons - agglomeration_gdf = gpd.overlay(agglomeration_gdf, region_boundary, how='intersection') - + agglomeration_gdf = gpd.overlay(agglomeration_gdf, region_boundary, how="intersection") + # Step 5: Final geometry corrections agglomeration_gdf = self._simplify_multipolygons(agglomeration_gdf) - + return agglomeration_gdf diff --git a/popframe/method/anchor_settlement.py b/popframe/method/anchor_settlement.py index 4c74bd8..95091f9 100644 --- a/popframe/method/anchor_settlement.py +++ b/popframe/method/anchor_settlement.py @@ -10,16 +10,15 @@ from popframe.utils.const import TIME_TO_METERS_FACTOR - class AnchorSettlementBuilder(BaseMethod): """Build boundaries for anchor (опорные) settlements using travel-time buffers.""" radius_m_per_min: int = TIME_TO_METERS_FACTOR - + def _build_anchor_settlement_boundaries(self, towns: gpd.GeoDataFrame, time: int) -> gpd.GeoDataFrame: """ Builds boundaries for anchor settlements iteratively, starting from the closest town based on travel time. - + Parameters ---------- towns : geopandas.GeoDataFrame @@ -32,24 +31,24 @@ def _build_anchor_settlement_boundaries(self, towns: gpd.GeoDataFrame, time: int geopandas.GeoDataFrame GeoDataFrame of the anchor settlement boundaries. """ - anchor_towns = towns[towns['is_anchor_settlement'] == True] + anchor_towns = towns[towns["is_anchor_settlement"] == True] boundaries = [] - + accessibility_matrix = self.region.accessibility_matrix - - for node in anchor_towns['id']: + + for node in anchor_towns["id"]: boundary = self._get_boundary_around_node(node, time, towns, accessibility_matrix) - + if boundary: - boundary["name"] = towns.loc[towns['id'] == node, 'name'].values[0] + boundary["name"] = towns.loc[towns["id"] == node, "name"].values[0] boundaries.append(boundary) - + if boundaries: - boundary_gdf = gpd.GeoDataFrame(boundaries, columns=["name", "geometry"]).set_geometry('geometry') + boundary_gdf = gpd.GeoDataFrame(boundaries, columns=["name", "geometry"]).set_geometry("geometry") boundary_gdf.set_crs(towns.crs, inplace=True) else: boundary_gdf = gpd.GeoDataFrame(columns=["name", "geometry"]) - + return boundary_gdf def _get_boundary_around_node( @@ -80,22 +79,19 @@ def _get_boundary_around_node( """ distances_from_start = accessibility_matrix.loc[start_node] within_time_nodes = distances_from_start[distances_from_start <= max_time].index - + if within_time_nodes.empty: return None - - nodes_gdf = towns.set_index('id').loc[within_time_nodes] - + + nodes_gdf = towns.set_index("id").loc[within_time_nodes] + distance = {node: (max_time - distances_from_start[node]) * self.radius_m_per_min for node in within_time_nodes} # distance = {node: (max_time) * RADIUS for node in within_time_nodes} nodes_gdf["left_distance"] = nodes_gdf.index.map(distance) boundary_geom = nodes_gdf.buffer(nodes_gdf["left_distance"]).unary_union - - return { - "geometry": boundary_geom, - "nodes_in_boundary": list(within_time_nodes) - } - + + return {"geometry": boundary_geom, "nodes_in_boundary": list(within_time_nodes)} + def _simplify_multipolygons(self, gdf: gpd.GeoDataFrame, towns: gpd.GeoDataFrame) -> gpd.GeoDataFrame: """ Simplify MultiPolygon geometries, keeping only those intersecting with anchor towns. @@ -112,26 +108,26 @@ def _simplify_multipolygons(self, gdf: gpd.GeoDataFrame, towns: gpd.GeoDataFrame geopandas.GeoDataFrame Simplified GeoDataFrame. """ - anchor_towns = towns[towns['is_anchor_settlement'] == True] - + anchor_towns = towns[towns["is_anchor_settlement"] == True] + def process_geometry(geom): if isinstance(geom, MultiPolygon): polygons = list(geom.geoms) - + # Фильтруем только те полигоны, которые пересекаются с anchor_towns filtered_polygons = [p for p in polygons if any(p.intersects(at) for at in anchor_towns.geometry)] - + if filtered_polygons: return MultiPolygon(filtered_polygons) if len(filtered_polygons) > 1 else filtered_polygons[0] else: return None # Удаляем геометрию, если ни один полигон не пересекается - + return geom if any(geom.intersects(at) for at in anchor_towns.geometry) else None - - gdf['geometry'] = gdf['geometry'].apply(process_geometry) - gdf = gdf.dropna(subset=['geometry']).reset_index(drop=True) # Удаляем строки без геометрии + + gdf["geometry"] = gdf["geometry"].apply(process_geometry) + gdf = gdf.dropna(subset=["geometry"]).reset_index(drop=True) # Удаляем строки без геометрии return gdf - + def _merge_intersecting_boundaries(self, gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame: """ Merge intersecting boundaries into single geometries. @@ -154,43 +150,42 @@ def _merge_intersecting_boundaries(self, gdf: gpd.GeoDataFrame) -> gpd.GeoDataFr continue overlapping_boundaries = [row_i] - geometry = row_i['geometry'] - merged_names = {row_i['name']} + geometry = row_i["geometry"] + merged_names = {row_i["name"]} for j, row_j in gdf.iterrows(): if i != j and j not in processed_indices: - if geometry.intersects(row_j['geometry']): + if geometry.intersects(row_j["geometry"]): overlapping_boundaries.append(row_j) - geometry = unary_union([geometry, row_j['geometry']]).buffer(0) - merged_names.add(row_j['name']) + geometry = unary_union([geometry, row_j["geometry"]]).buffer(0) + merged_names.add(row_j["name"]) processed_indices.add(j) - + still_merging = True while still_merging: still_merging = False for j, row_j in gdf.iterrows(): - if j not in processed_indices and geometry.intersects(row_j['geometry']): + if j not in processed_indices and geometry.intersects(row_j["geometry"]): overlapping_boundaries.append(row_j) - geometry = unary_union([geometry, row_j['geometry']]).buffer(0) - merged_names.add(row_j['name']) + geometry = unary_union([geometry, row_j["geometry"]]).buffer(0) + merged_names.add(row_j["name"]) processed_indices.add(j) still_merging = True - + if not geometry.is_valid: geometry = geometry.buffer(0) - + merged_boundary = { - 'geometry': geometry, - 'type': 'Merged' if len(merged_names) > 1 else 'Single', - 'anchor_settlements': ', '.join(merged_names) + "geometry": geometry, + "type": "Merged" if len(merged_names) > 1 else "Single", + "anchor_settlements": ", ".join(merged_names), } merged_geometries.append(merged_boundary) - + processed_indices.add(i) return gpd.GeoDataFrame(merged_geometries, crs=gdf.crs) - def split_multipolygons(self, geometry): """ Split MultiPolygon into individual polygons, marking all but the largest as 'created'. @@ -210,7 +205,9 @@ def split_multipolygons(self, geometry): return [(sorted_polygons[0], "оригинал")] + [(poly, "создан") for poly in sorted_polygons[1:]] return [(geometry, "оригинал")] - def voronoi_polygons_within_boundaries(self, boundary_polygon: Polygon, anchor_towns: gpd.GeoDataFrame) -> gpd.GeoDataFrame: + def voronoi_polygons_within_boundaries( + self, boundary_polygon: Polygon, anchor_towns: gpd.GeoDataFrame + ) -> gpd.GeoDataFrame: """ Create a Voronoi diagram within the given boundary polygon for anchor towns. @@ -226,13 +223,13 @@ def voronoi_polygons_within_boundaries(self, boundary_polygon: Polygon, anchor_t geopandas.GeoDataFrame GeoDataFrame with Voronoi polygons and their sources. """ - + # Фильтруем точки, оставляя только те, которые внутри границы points = [p for p in anchor_towns.geometry if boundary_polygon.contains(p)] - + if len(points) < 2: - return gpd.GeoDataFrame(columns=['geometry', 'source'], crs=anchor_towns.crs) - + return gpd.GeoDataFrame(columns=["geometry", "source"], crs=anchor_towns.crs) + if len(points) == 2: # Если только 2 точки, делаем разрез границы на две части p1, p2 = points @@ -248,10 +245,12 @@ def voronoi_polygons_within_boundaries(self, boundary_polygon: Polygon, anchor_t min_x, min_y, max_x, max_y = boundary_polygon.bounds # Определяем длинную разрезающую линию, которая точно пересекает полигон - cut_line = LineString([ - (mid_x - perp_dx * (max_x - min_x), mid_y - perp_dy * (max_y - min_y)), - (mid_x + perp_dx * (max_x - min_x), mid_y + perp_dy * (max_y - min_y)) - ]) + cut_line = LineString( + [ + (mid_x - perp_dx * (max_x - min_x), mid_y - perp_dy * (max_y - min_y)), + (mid_x + perp_dx * (max_x - min_x), mid_y + perp_dy * (max_y - min_y)), + ] + ) # Проверяем, что cut_line остается LineString if cut_line.geom_type == "MultiLineString": @@ -262,12 +261,11 @@ def voronoi_polygons_within_boundaries(self, boundary_polygon: Polygon, anchor_t if len(split_polys.geoms) == 2: return gpd.GeoDataFrame( - {'geometry': list(split_polys.geoms), 'source': ["оригинал", "оригинал"]}, - crs=anchor_towns.crs + {"geometry": list(split_polys.geoms), "source": ["оригинал", "оригинал"]}, crs=anchor_towns.crs ) else: print("Ошибка разрезания границы на две части.") - return gpd.GeoDataFrame(columns=['geometry', 'source'], crs=anchor_towns.crs) + return gpd.GeoDataFrame(columns=["geometry", "source"], crs=anchor_towns.crs) # Если точек больше 2, используем стандартную диаграмму Вороного points_array = np.array([(p.x, p.y) for p in points]) @@ -280,8 +278,8 @@ def voronoi_polygons_within_boundaries(self, boundary_polygon: Polygon, anchor_t split_polys = self.split_multipolygons(clipped_poly) polygons.extend([p[0] for p in split_polys]) sources.extend([p[1] for p in split_polys]) - - return gpd.GeoDataFrame({'geometry': polygons, 'source': sources}, crs=anchor_towns.crs) + + return gpd.GeoDataFrame({"geometry": polygons, "source": sources}, crs=anchor_towns.crs) def find_largest_intersection(self, target_poly: Polygon, gdf: gpd.GeoDataFrame): """ @@ -326,16 +324,16 @@ def cluster_and_merge_created_polygons(self, created_gdf: gpd.GeoDataFrame) -> g """ merged_polygons = [] processed = set() - + for idx, row in created_gdf.iterrows(): if idx in processed: continue - + cluster = created_gdf[created_gdf.geometry.intersects(row.geometry)].geometry.tolist() merged_polygons.append(unary_union(cluster)) processed.update(created_gdf[created_gdf.geometry.intersects(row.geometry)].index) - - return gpd.GeoDataFrame({'geometry': merged_polygons, 'source': 'создан'}, crs=created_gdf.crs) + + return gpd.GeoDataFrame({"geometry": merged_polygons, "source": "создан"}, crs=created_gdf.crs) def merge_created_polygons(self, voronoi_gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame: """ @@ -351,34 +349,33 @@ def merge_created_polygons(self, voronoi_gdf: gpd.GeoDataFrame) -> gpd.GeoDataFr geopandas.GeoDataFrame GeoDataFrame with merged polygons. """ - created_gdf = voronoi_gdf[voronoi_gdf['source'] == "создан"].copy() - original_gdf = voronoi_gdf[voronoi_gdf['source'] == "оригинал"].copy() - + created_gdf = voronoi_gdf[voronoi_gdf["source"] == "создан"].copy() + original_gdf = voronoi_gdf[voronoi_gdf["source"] == "оригинал"].copy() + # Кластеризация созданных полигонов clustered_created_gdf = self.cluster_and_merge_created_polygons(created_gdf) - + merged_geometries = [] original_to_remove = set() - + for _, row in clustered_created_gdf.iterrows(): buffered_geom = row.geometry.buffer(30) # Буфер перед объединением с оригиналом max_index, max_intersection_poly = self.find_largest_intersection(buffered_geom, original_gdf) - + if max_intersection_poly: merged_geometry = unary_union([buffered_geom, max_intersection_poly]) original_to_remove.add(max_index) else: merged_geometry = buffered_geom - + merged_geometries.append(merged_geometry) - + # Удаляем оригиналы, которые были объединены original_gdf = original_gdf.drop(index=original_to_remove) - - merged_gdf = gpd.GeoDataFrame({'geometry': merged_geometries, 'source': 'объединён'}, crs=voronoi_gdf.crs) - - return gpd.GeoDataFrame(pd.concat([original_gdf, merged_gdf], ignore_index=True)) + merged_gdf = gpd.GeoDataFrame({"geometry": merged_geometries, "source": "объединён"}, crs=voronoi_gdf.crs) + + return gpd.GeoDataFrame(pd.concat([original_gdf, merged_gdf], ignore_index=True)) def final_simplification(self, merged_gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame: """ @@ -401,14 +398,16 @@ def final_simplification(self, merged_gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame exploded_geometry = gpd.GeoSeries(final_geometry).explode(index_parts=False) exploded_geometry = exploded_geometry.buffer(30) # Создаем GeoDataFrame с разбиением MultiPolygon на отдельные Polygon - final_gdf = gpd.GeoDataFrame({'geometry': exploded_geometry, 'source': 'финальный'}, crs=merged_gdf.crs) + final_gdf = gpd.GeoDataFrame({"geometry": exploded_geometry, "source": "финальный"}, crs=merged_gdf.crs) # Сброс индекса после разбиения final_gdf = final_gdf.reset_index(drop=True) return final_gdf - def merge_gdfs_excluding_intersections(self, boundaries_gdf: gpd.GeoDataFrame, final_gdf: gpd.GeoDataFrame, anchor_towns: gpd.GeoDataFrame) -> gpd.GeoDataFrame: + def merge_gdfs_excluding_intersections( + self, boundaries_gdf: gpd.GeoDataFrame, final_gdf: gpd.GeoDataFrame, anchor_towns: gpd.GeoDataFrame + ) -> gpd.GeoDataFrame: """ Merge boundaries_gdf and final_gdf, excluding intersecting polygons, and assign names from anchor_towns. @@ -426,16 +425,16 @@ def merge_gdfs_excluding_intersections(self, boundaries_gdf: gpd.GeoDataFrame, f geopandas.GeoDataFrame Merged GeoDataFrame with 'geometry' and 'anchor_name'. """ - + # Удаляем полигоны из boundaries_gdf, которые пересекаются с final_gdf filtered_boundaries_gdf = boundaries_gdf[~boundaries_gdf.intersects(final_gdf.unary_union)] - + # Объединяем оставшиеся полигоны с final_gdf merged_gdf = gpd.GeoDataFrame(pd.concat([filtered_boundaries_gdf, final_gdf], ignore_index=True)) - + # Создаем новый столбец 'name', который будет содержать имя города из anchor_towns merged_gdf["anchor_name"] = None - + # Для каждого полигона ищем, какие объекты из anchor_towns находятся внутри for idx, row in merged_gdf.iterrows(): # Выбираем все точки, которые содержатся в полигоне @@ -443,13 +442,15 @@ def merge_gdfs_excluding_intersections(self, boundaries_gdf: gpd.GeoDataFrame, f if not towns_in_poly.empty: # Присваиваем имя первого найденного города merged_gdf.at[idx, "anchor_name"] = towns_in_poly.iloc[0]["name"] - + # Оставляем только столбцы 'geometry' и 'name' merged_gdf = merged_gdf[["geometry", "anchor_name"]] - + return merged_gdf - - def get_anchor_settlement_boundaries(self, towns: gpd.GeoDataFrame, update_df: Optional[pd.DataFrame] = None, time: int = 50) -> gpd.GeoDataFrame: + + def get_anchor_settlement_boundaries( + self, towns: gpd.GeoDataFrame, update_df: Optional[pd.DataFrame] = None, time: int = 50 + ) -> gpd.GeoDataFrame: """ Main function to orchestrate the creation, merging, and finalization of anchor settlement boundaries. @@ -469,28 +470,28 @@ def get_anchor_settlement_boundaries(self, towns: gpd.GeoDataFrame, update_df: O """ if update_df is not None: towns = self.region.get_update_towns_gdf(update_df) - + region_boundary = self.region.region local_crs = self.region.region.crs towns = towns.to_crs(local_crs) - + boundary_gdf = self._build_anchor_settlement_boundaries(towns, time) - + boundary_gdf = self._simplify_multipolygons(boundary_gdf, towns) - + boundary_gdf = self._merge_intersecting_boundaries(boundary_gdf) boundary_gdf = boundary_gdf.explode(index_parts=False) # Сбрасываем индекс (если необходимо) boundary_gdf = boundary_gdf.reset_index(drop=True) - - boundary_gdf = gpd.overlay(boundary_gdf, region_boundary, how='intersection') - + + boundary_gdf = gpd.overlay(boundary_gdf, region_boundary, how="intersection") + boundary_gdf = self._simplify_multipolygons(boundary_gdf, towns) - - boundary_gdf['geometry'] = boundary_gdf['geometry'].apply( + + boundary_gdf["geometry"] = boundary_gdf["geometry"].apply( lambda geom: Polygon(geom.exterior) if geom.is_valid else geom ) @@ -511,9 +512,6 @@ def get_anchor_settlement_boundaries(self, towns: gpd.GeoDataFrame, update_df: O # Финальная обработка final_gdf = self.final_simplification(merged_voronoi_gdf) - result_gdf = self.merge_gdfs_excluding_intersections(boundary_gdf, final_gdf, anchor_towns) - + return result_gdf - - diff --git a/popframe/method/city_evaluation.py b/popframe/method/city_evaluation.py index 6f00b59..1bf6a89 100644 --- a/popframe/method/city_evaluation.py +++ b/popframe/method/city_evaluation.py @@ -2,6 +2,7 @@ import pandas as pd import json + class CityPopulationScorer: """ Calculates population-based scores for hexagonal grid cells based on municipal data. @@ -24,11 +25,9 @@ class CityPopulationScorer: 2: "Территория отличается относительно низкой численностью и плотностью населения, что ограничивает возможности развитие инфраструктуры и экономики.", 3: "Территория имеет средние показатели численности и плотности населения, что создаёт возможность развитие инфраструктуры и экономики.", 4: "Территория имеет высокие показатели численности и плотности населения, что способствует развитие инфраструктуры и экономики.", - 5: "Территория имеет очень высокими показателями численности и плотности, что указывает на высокий потенциал развития инфраструктуры и экономики." + 5: "Территория имеет очень высокими показателями численности и плотности, что указывает на высокий потенциал развития инфраструктуры и экономики.", } - - def __init__(self, gdf_mo: gpd.GeoDataFrame, gdf_hex: gpd.GeoDataFrame, target_crs: int = 3857): """ Initializes the HexPopulationScorer. @@ -58,9 +57,9 @@ def compute_mo_density(self): self.gdf_mo: Adds columns 'area_m2', 'area_km2', and 'density_mo'. """ self.gdf_mo = self.gdf_mo.to_crs(epsg=self.target_crs) - self.gdf_mo['area_m2'] = self.gdf_mo.geometry.area - self.gdf_mo['area_km2'] = self.gdf_mo['area_m2'] / 1_000_000 - self.gdf_mo['density_mo'] = self.gdf_mo['population'] / self.gdf_mo['area_km2'] + self.gdf_mo["area_m2"] = self.gdf_mo.geometry.area + self.gdf_mo["area_km2"] = self.gdf_mo["area_m2"] / 1_000_000 + self.gdf_mo["density_mo"] = self.gdf_mo["population"] / self.gdf_mo["area_km2"] def assign_hex_to_mo(self): """ @@ -77,84 +76,61 @@ def assign_hex_to_mo(self): """ self.gdf_hex = self.gdf_hex.to_crs(epsg=self.target_crs) - mo_small = self.gdf_mo[['geometry', 'territory_id', 'population', 'density_mo']] + mo_small = self.gdf_mo[["geometry", "territory_id", "population", "density_mo"]] - hex_mo = gpd.overlay( - self.gdf_hex[['geometry', 'hexagon_id']], - mo_small, - how='intersection' - ) - hex_mo['inter_area'] = hex_mo.geometry.area + hex_mo = gpd.overlay(self.gdf_hex[["geometry", "hexagon_id"]], mo_small, how="intersection") + hex_mo["inter_area"] = hex_mo.geometry.area - idx = hex_mo.groupby('hexagon_id')['inter_area'].idxmax() - hex_max = hex_mo.loc[idx, ['hexagon_id', 'territory_id', 'population', 'density_mo']] + idx = hex_mo.groupby("hexagon_id")["inter_area"].idxmax() + hex_max = hex_mo.loc[idx, ["hexagon_id", "territory_id", "population", "density_mo"]] - self.gdf_hex = self.gdf_hex.merge(hex_max, on='hexagon_id', how='left') - self.gdf_hex = self.gdf_hex.rename(columns={'density_mo': 'density'}) + self.gdf_hex = self.gdf_hex.merge(hex_max, on="hexagon_id", how="left") + self.gdf_hex = self.gdf_hex.rename(columns={"density_mo": "density"}) def normalize_and_score(self): - """ - Нормализует population и density, считает combined_norm, - присваивает score = 1–5 для ячеек с данными и 0 для ячеек без данных. - """ - # 1. Нормируем population - pop_min, pop_max = self.gdf_hex['population'].min(), self.gdf_hex['population'].max() - if pop_max == pop_min: - self.gdf_hex['norm_pop'] = 0.5 - else: - self.gdf_hex['norm_pop'] = ( - self.gdf_hex['population'] - pop_min - ) / (pop_max - pop_min) - - # 2. Нормируем density - dens_min, dens_max = self.gdf_hex['density'].min(), self.gdf_hex['density'].max() - if dens_max == dens_min: - self.gdf_hex['norm_dens'] = 0.5 - else: - self.gdf_hex['norm_dens'] = ( - self.gdf_hex['density'] - dens_min - ) / (dens_max - dens_min) - - # Заменяем NaN (в том числе от ячеек без данных) на 0 - self.gdf_hex['norm_pop'] = self.gdf_hex['norm_pop'].fillna(0) - self.gdf_hex['norm_dens'] = self.gdf_hex['norm_dens'].fillna(0) - - # 3. Вычисляем сырое значение - self.gdf_hex['combined_raw'] = ( - self.gdf_hex['norm_pop'] + self.gdf_hex['norm_dens'] - ) / 2 - - # 4. Мин–макс нормируем combined_raw - raw_min, raw_max = ( - self.gdf_hex['combined_raw'].min(), - self.gdf_hex['combined_raw'].max() - ) - if raw_max == raw_min: - self.gdf_hex['combined_norm'] = 0.5 - else: - self.gdf_hex['combined_norm'] = ( - self.gdf_hex['combined_raw'] - raw_min - ) / (raw_max - raw_min) - - self.gdf_hex['combined_norm'] = self.gdf_hex['combined_norm'].fillna(0) - - # 5. Присваиваем оценку: - # – для ячеек с данными: масштабируем в [1;5] - # – для ячеек без данных (вода): оставляем 0 - # 5.1 Сначала проставляем всем 1–5 - self.gdf_hex['score'] = ( - (self.gdf_hex['combined_norm'] * 4 + 1) - .round() - .clip(lower=1, upper=5) - .astype(int) - ) - - # 5.2 Выставляем 0 там, где нет ни population, ни density - mask_no_data = ( - self.gdf_hex['population'].isna() - & self.gdf_hex['density'].isna() - ) - self.gdf_hex.loc[mask_no_data, 'score'] = 0 + """ + Нормализует population и density, считает combined_norm, + присваивает score = 1–5 для ячеек с данными и 0 для ячеек без данных. + """ + # 1. Нормируем population + pop_min, pop_max = self.gdf_hex["population"].min(), self.gdf_hex["population"].max() + if pop_max == pop_min: + self.gdf_hex["norm_pop"] = 0.5 + else: + self.gdf_hex["norm_pop"] = (self.gdf_hex["population"] - pop_min) / (pop_max - pop_min) + + # 2. Нормируем density + dens_min, dens_max = self.gdf_hex["density"].min(), self.gdf_hex["density"].max() + if dens_max == dens_min: + self.gdf_hex["norm_dens"] = 0.5 + else: + self.gdf_hex["norm_dens"] = (self.gdf_hex["density"] - dens_min) / (dens_max - dens_min) + + # Заменяем NaN (в том числе от ячеек без данных) на 0 + self.gdf_hex["norm_pop"] = self.gdf_hex["norm_pop"].fillna(0) + self.gdf_hex["norm_dens"] = self.gdf_hex["norm_dens"].fillna(0) + + # 3. Вычисляем сырое значение + self.gdf_hex["combined_raw"] = (self.gdf_hex["norm_pop"] + self.gdf_hex["norm_dens"]) / 2 + + # 4. Мин–макс нормируем combined_raw + raw_min, raw_max = (self.gdf_hex["combined_raw"].min(), self.gdf_hex["combined_raw"].max()) + if raw_max == raw_min: + self.gdf_hex["combined_norm"] = 0.5 + else: + self.gdf_hex["combined_norm"] = (self.gdf_hex["combined_raw"] - raw_min) / (raw_max - raw_min) + + self.gdf_hex["combined_norm"] = self.gdf_hex["combined_norm"].fillna(0) + + # 5. Присваиваем оценку: + # – для ячеек с данными: масштабируем в [1;5] + # – для ячеек без данных (вода): оставляем 0 + # 5.1 Сначала проставляем всем 1–5 + self.gdf_hex["score"] = (self.gdf_hex["combined_norm"] * 4 + 1).round().clip(lower=1, upper=5).astype(int) + + # 5.2 Выставляем 0 там, где нет ни population, ни density + mask_no_data = self.gdf_hex["population"].isna() & self.gdf_hex["density"].isna() + self.gdf_hex.loc[mask_no_data, "score"] = 0 def assign_interpretations(self): """ @@ -167,7 +143,7 @@ def assign_interpretations(self): Modifies: self.gdf_hex: Adds column 'interpretation'. """ - self.gdf_hex['interpretation'] = self.gdf_hex['score'].apply( + self.gdf_hex["interpretation"] = self.gdf_hex["score"].apply( lambda v: CityPopulationScorer.INTERPRETATIONS[int(v)] if pd.notna(v) else None ) @@ -188,20 +164,16 @@ def generate_output(self) -> list: """ output_list = [] for _, row in self.gdf_hex.iterrows(): - output_list.append({ - 'hexagon_id': row['hexagon_id'], - 'project': None, - 'average_population_density': ( - round(row['density'], 1) if pd.notna(row['density']) else None - ), - 'total_population': ( - int(row['population']) if pd.notna(row['population']) else None - ), - 'score': ( - float(row['score']) if pd.notna(row['score']) else None - ), - 'interpretation': row['interpretation'] - }) + output_list.append( + { + "hexagon_id": row["hexagon_id"], + "project": None, + "average_population_density": (round(row["density"], 1) if pd.notna(row["density"]) else None), + "total_population": (int(row["population"]) if pd.notna(row["population"]) else None), + "score": (float(row["score"]) if pd.notna(row["score"]) else None), + "interpretation": row["interpretation"], + } + ) self.output = output_list return output_list @@ -224,4 +196,3 @@ def run(self) -> list: self.normalize_and_score() self.assign_interpretations() return self.generate_output() - diff --git a/popframe/method/engineer.py b/popframe/method/engineer.py index 3d7f8a6..1bddfb1 100644 --- a/popframe/method/engineer.py +++ b/popframe/method/engineer.py @@ -3,6 +3,7 @@ from typing import Dict, Any, Set from popframe.utils.const import RADIUS_NPP_M, RADIUS_HPP_M, RADIUS_DEFAULT_INFRA_M + class InfrastructureAnalyzer: def __init__(self, infrastructure_gdf: gpd.GeoDataFrame, assessment_areas_gdf: gpd.GeoDataFrame) -> None: """ @@ -18,12 +19,12 @@ def __init__(self, infrastructure_gdf: gpd.GeoDataFrame, assessment_areas_gdf: g # Convert coordinate systems to metric (EPSG:3857) for calculations self.infrastructure_gdf = infrastructure_gdf.to_crs(epsg=3857) self.assessment_areas_gdf = assessment_areas_gdf.to_crs(epsg=3857) - + # Initialize columns and start analysis - self.assessment_areas_gdf['score'] = 0 - self.assessment_areas_gdf['types_in_radius'] = None + self.assessment_areas_gdf["score"] = 0 + self.assessment_areas_gdf["types_in_radius"] = None self._analyze_infrastructure() - + @staticmethod def get_radius(physical_object_type: Dict[str, Any]) -> float: """ @@ -39,13 +40,13 @@ def get_radius(physical_object_type: Dict[str, Any]) -> float: float The radius in meters for the given object type. """ - name = physical_object_type.get('name', '') + name = physical_object_type.get("name", "") if "Атомная электростанция" in name: return float(RADIUS_NPP_M) if "Гидроэлектростанция" in name: return float(RADIUS_HPP_M) return float(RADIUS_DEFAULT_INFRA_M) - + def _analyze_infrastructure(self) -> None: """ Analyzes infrastructure for each territory and adds assessment attributes. @@ -58,24 +59,24 @@ def _analyze_infrastructure(self) -> None: for index, area in self.assessment_areas_gdf.iterrows(): # Create a temporary set of unique types for this assessment area unique_types_in_radius: Set[str] = set() - + # Check each object in infrastructure_gdf for inclusion in the buffer for _, obj in self.infrastructure_gdf.iterrows(): # Extract `physical_object_type` dictionary to determine the radius - physical_object_info: Dict[str, Any] = obj['physical_object_type'] + physical_object_info: Dict[str, Any] = obj["physical_object_type"] buffer_distance: float = self.get_radius(physical_object_info) - + # Create a buffer for the current assessment area area_buffer: BaseGeometry = area.geometry.buffer(buffer_distance) - + # If the object is within the buffer, add its `type` to the set if obj.geometry.within(area_buffer): - unique_types_in_radius.add(obj['type']) - + unique_types_in_radius.add(obj["type"]) + # Count unique types and assign them to 'score' and 'types_in_radius' attributes - self.assessment_areas_gdf.at[index, 'score'] = len(unique_types_in_radius) - self.assessment_areas_gdf.at[index, 'types_in_radius'] = list(unique_types_in_radius) - + self.assessment_areas_gdf.at[index, "score"] = len(unique_types_in_radius) + self.assessment_areas_gdf.at[index, "types_in_radius"] = list(unique_types_in_radius) + def get_results(self) -> gpd.GeoDataFrame: """ Returns the result with columns 'id', 'score', 'types_in_radius', and 'geometry' in CRS 4326. @@ -86,4 +87,4 @@ def get_results(self) -> gpd.GeoDataFrame: GeoDataFrame with the results in CRS 4326. """ # Convert back to CRS 4326 before returning - return self.assessment_areas_gdf[['score', 'types_in_radius', 'geometry']].to_crs(epsg=4326) + return self.assessment_areas_gdf[["score", "types_in_radius", "geometry"]].to_crs(epsg=4326) diff --git a/popframe/method/landuse_assessment.py b/popframe/method/landuse_assessment.py index 83af31f..4160c59 100644 --- a/popframe/method/landuse_assessment.py +++ b/popframe/method/landuse_assessment.py @@ -15,8 +15,8 @@ from popframe.utils.const import LANDUSE_TAGS, LANDUSE_COLORS, LANDUSE_MAPPING -class LandUseAssessment(BaseMethod): +class LandUseAssessment(BaseMethod): @retry(stop_max_attempt_number=5, wait_fixed=2000) def fetch_osm_data(self, polygon, tags): """ @@ -36,7 +36,9 @@ def fetch_osm_data(self, polygon, tags): """ return ox.features_from_polygon(polygon, tags=tags) - def get_landuse_data(self, territories: Optional[gpd.GeoDataFrame] = None, landuse_tags: Optional[Dict[str, list]] = None) -> gpd.GeoDataFrame: + def get_landuse_data( + self, territories: Optional[gpd.GeoDataFrame] = None, landuse_tags: Optional[Dict[str, list]] = None + ) -> gpd.GeoDataFrame: """ Retrieve and process land use data for given territories. @@ -54,9 +56,11 @@ def get_landuse_data(self, territories: Optional[gpd.GeoDataFrame] = None, landu landuse_tags = landuse_tags or LANDUSE_TAGS unique_tags = set(tag for tags in landuse_tags.values() for tag in tags) - tag_filters = [{'landuse': tag} for tag in unique_tags if not tag.startswith('place_')] + \ - [{'place': 'city'}, {'place': 'town'}] + \ - [{'natural': 'water'}, {'natural': 'wood'}, {'natural': 'grassland'}] + tag_filters = ( + [{"landuse": tag} for tag in unique_tags if not tag.startswith("place_")] + + [{"place": "city"}, {"place": "town"}] + + [{"natural": "water"}, {"natural": "wood"}, {"natural": "grassland"}] + ) def process_polygon(polygon): """ @@ -74,13 +78,17 @@ def process_polygon(polygon): """ unique_gdfs = {} with ThreadPoolExecutor() as executor: - future_to_tag_filter = {executor.submit(self.fetch_osm_data, polygon, tag_filter): tag_filter for tag_filter in tag_filters} - for future in tqdm(as_completed(future_to_tag_filter), total=len(tag_filters), desc="Processing landuse tags"): + future_to_tag_filter = { + executor.submit(self.fetch_osm_data, polygon, tag_filter): tag_filter for tag_filter in tag_filters + } + for future in tqdm( + as_completed(future_to_tag_filter), total=len(tag_filters), desc="Processing landuse tags" + ): tag_filter = future_to_tag_filter[future] try: gdf = future.result() if not gdf.empty: - gdf = gdf.set_geometry('geometry') + gdf = gdf.set_geometry("geometry") if gdf.crs is None: gdf.set_crs(epsg=4326, inplace=True) unique_gdfs[frozenset(tag_filter.items())] = gdf @@ -91,10 +99,10 @@ def process_polygon(polygon): for category, tags in landuse_tags.items(): category_geoms = [] for tag in tags: - if tag.startswith('place_'): - tag_filter = {'place': 'city' if tag == 'place_city' else 'town'} + if tag.startswith("place_"): + tag_filter = {"place": "city" if tag == "place_city" else "town"} else: - tag_filter = {'landuse': tag} if tag not in ['wood', 'water', 'grassland'] else {'natural': tag} + tag_filter = {"landuse": tag} if tag not in ["wood", "water", "grassland"] else {"natural": tag} gdf = unique_gdfs.get(frozenset(tag_filter.items())) if gdf is not None: @@ -106,21 +114,23 @@ def process_polygon(polygon): category_union = unary_union(valid_geoms) category_intersect = category_union.intersection(polygon) if isinstance(category_intersect, (Polygon, MultiPolygon)): - combined_geometries.append({'indicator': category, 'geometry': category_intersect}) + combined_geometries.append({"indicator": category, "geometry": category_intersect}) elif isinstance(category_intersect, GeometryCollection): for geom in category_intersect.geoms: if isinstance(geom, (Polygon, MultiPolygon)): - combined_geometries.append({'indicator': category, 'geometry': geom}) + combined_geometries.append({"indicator": category, "geometry": geom}) return combined_geometries all_combined_geometries = [] with ThreadPoolExecutor() as executor: - future_to_polygon = {executor.submit(process_polygon, polygon): polygon for polygon in territories_gdf.geometry} + future_to_polygon = { + executor.submit(process_polygon, polygon): polygon for polygon in territories_gdf.geometry + } for future in as_completed(future_to_polygon): all_combined_geometries.extend(future.result()) - landuse_gdf = gpd.GeoDataFrame(all_combined_geometries, columns=['indicator', 'geometry'], crs='EPSG:4326') + landuse_gdf = gpd.GeoDataFrame(all_combined_geometries, columns=["indicator", "geometry"], crs="EPSG:4326") def adjust_geometries(main_indicator, other_gdf, landuse_gdf): """ @@ -140,60 +150,91 @@ def adjust_geometries(main_indicator, other_gdf, landuse_gdf): geopandas.GeoDataFrame Adjusted GeoDataFrame. """ - main_gdf = landuse_gdf[landuse_gdf['indicator'] == main_indicator] + main_gdf = landuse_gdf[landuse_gdf["indicator"] == main_indicator] if not main_gdf.empty: other_union = unary_union(other_gdf.geometry) - adjusted_geometries = [geom.difference(other_union) for geom in main_gdf.geometry if not geom.difference(other_union).is_empty] + adjusted_geometries = [ + geom.difference(other_union) + for geom in main_gdf.geometry + if not geom.difference(other_union).is_empty + ] if adjusted_geometries: - main_gdf['geometry'] = adjusted_geometries + main_gdf["geometry"] = adjusted_geometries return pd.concat([other_gdf, main_gdf], ignore_index=True) return landuse_gdf - landuse_gdf = adjust_geometries('1.3.5 Процент земель специального назначения', landuse_gdf[~landuse_gdf['indicator'].isin(['1.3.5 Процент земель специального назначения'])], landuse_gdf) - landuse_gdf = adjust_geometries('1.3.1 Процент застройки жилищным строительством', landuse_gdf[~landuse_gdf['indicator'].isin(['1.3.1 Процент застройки жилищным строительством'])], landuse_gdf) - landuse_gdf = adjust_geometries('1.3.6 Процент земель населенных пунктов', landuse_gdf[~landuse_gdf['indicator'].isin(['1.3.6 Процент земель населенных пунктов'])], landuse_gdf) + landuse_gdf = adjust_geometries( + "1.3.5 Процент земель специального назначения", + landuse_gdf[~landuse_gdf["indicator"].isin(["1.3.5 Процент земель специального назначения"])], + landuse_gdf, + ) + landuse_gdf = adjust_geometries( + "1.3.1 Процент застройки жилищным строительством", + landuse_gdf[~landuse_gdf["indicator"].isin(["1.3.1 Процент застройки жилищным строительством"])], + landuse_gdf, + ) + landuse_gdf = adjust_geometries( + "1.3.6 Процент земель населенных пунктов", + landuse_gdf[~landuse_gdf["indicator"].isin(["1.3.6 Процент земель населенных пунктов"])], + landuse_gdf, + ) region_area_km2 = territories_gdf.to_crs(territories_gdf.estimate_utm_crs()).geometry.area.sum() / 1e6 - landuse_gdf['urbanization'] = (landuse_gdf.to_crs(territories_gdf.estimate_utm_crs()).geometry.area / 1e6 / region_area_km2 * 100) + landuse_gdf["urbanization"] = ( + landuse_gdf.to_crs(territories_gdf.estimate_utm_crs()).geometry.area / 1e6 / region_area_km2 * 100 + ) all_landuse_union = unary_union(landuse_gdf.geometry) total_polygon = unary_union(territories_gdf.geometry) other_landuse = total_polygon.difference(all_landuse_union) if not other_landuse.is_empty: - other_landuse_gdf = gpd.GeoDataFrame([{'indicator': '1.3.9 Территории смежного назначения', 'geometry': other_landuse}], crs='EPSG:4326') - other_landuse_gdf['urbanization'] = (other_landuse_gdf.to_crs(territories_gdf.estimate_utm_crs()).geometry.area / 1e6 / region_area_km2 * 100) + other_landuse_gdf = gpd.GeoDataFrame( + [{"indicator": "1.3.9 Территории смежного назначения", "geometry": other_landuse}], crs="EPSG:4326" + ) + other_landuse_gdf["urbanization"] = ( + other_landuse_gdf.to_crs(territories_gdf.estimate_utm_crs()).geometry.area / 1e6 / region_area_km2 * 100 + ) landuse_gdf = pd.concat([landuse_gdf, other_landuse_gdf], ignore_index=True) - grouped_landuse_gdf = landuse_gdf.groupby('indicator').agg({ - 'geometry': lambda x: unary_union(x), - 'urbanization': 'sum' - }).reset_index() + grouped_landuse_gdf = ( + landuse_gdf.groupby("indicator") + .agg({"geometry": lambda x: unary_union(x), "urbanization": "sum"}) + .reset_index() + ) results = [] - total_value = grouped_landuse_gdf['urbanization'].sum() + total_value = grouped_landuse_gdf["urbanization"].sum() normalization_factor = 100 / total_value if total_value > 100 else 1 for index, row in grouped_landuse_gdf.iterrows(): - key = row['indicator'] - value = row['urbanization'] * normalization_factor + key = row["indicator"] + value = row["urbanization"] * normalization_factor rounded_value = math.floor(value * 1000) / 1000.0 - results.append({ - '№ п/п': key.split(' ')[0], - 'Название хранимое': ' '.join(key.split(' ')[1:]) if ' ' in key else key, - 'ед.изм.': '%', - 'Значение': rounded_value, - 'Источник': 'modeled', - 'Период': 2024, - 'geometry': row['geometry'] - }) - results_gdf = gpd.GeoDataFrame(results, crs='EPSG:4326') + results.append( + { + "№ п/п": key.split(" ")[0], + "Название хранимое": " ".join(key.split(" ")[1:]) if " " in key else key, + "ед.изм.": "%", + "Значение": rounded_value, + "Источник": "modeled", + "Период": 2024, + "geometry": row["geometry"], + } + ) + results_gdf = gpd.GeoDataFrame(results, crs="EPSG:4326") return results_gdf - - def plot_landuse(self, region_gdf: gpd.GeoDataFrame, landuse_gdf: gpd.GeoDataFrame, *, colors: Optional[Dict[str, str]] = None, landuse_mapping: Optional[Dict[str, str]] = None) -> None: + def plot_landuse( + self, + region_gdf: gpd.GeoDataFrame, + landuse_gdf: gpd.GeoDataFrame, + *, + colors: Optional[Dict[str, str]] = None, + landuse_mapping: Optional[Dict[str, str]] = None, + ) -> None: """ Plot the land use data on a map with region boundaries and legend. @@ -215,27 +256,30 @@ def plot_landuse(self, region_gdf: gpd.GeoDataFrame, landuse_gdf: gpd.GeoDataFra fig, ax = plt.subplots(figsize=(10, 10)) # Увеличим размер фигуры ax.set_axis_off() # Отключение осей - - region_gdf_utm.boundary.plot(ax=ax, linewidth=1, color='black', label='Граница региона') + + region_gdf_utm.boundary.plot(ax=ax, linewidth=1, color="black", label="Граница региона") colors = colors or LANDUSE_COLORS landuse_mapping = landuse_mapping or LANDUSE_MAPPING - + for key, label in landuse_mapping.items(): - gdf = landuse_gdf[landuse_gdf['Название хранимое'] == key] + gdf = landuse_gdf[landuse_gdf["Название хранимое"] == key] if not gdf.empty: if gdf.crs is None: gdf.set_crs(epsg=4326, inplace=True) gdf_utm = gdf.to_crs(crs) # Преобразование каждого типа земельного использования в UTM - gdf_utm.plot(ax=ax, color=colors.get(label, 'gray'), alpha=0.5, label=label) - + gdf_utm.plot(ax=ax, color=colors.get(label, "gray"), alpha=0.5, label=label) + # Добавление подложки карты ctx.add_basemap(ax, crs=crs, source=ctx.providers.CartoDB.Positron) # Создание пользовательских меток для легенды - legend_elements = [Line2D([0], [0], marker='o', color='w', label=label, - markerfacecolor=color, markersize=10) - for label, color in colors.items()] - - ax.legend(handles=legend_elements, loc='upper left', bbox_to_anchor=(1, 1)) # Перемещение легенды в верхнюю часть справа + legend_elements = [ + Line2D([0], [0], marker="o", color="w", label=label, markerfacecolor=color, markersize=10) + for label, color in colors.items() + ] + + ax.legend( + handles=legend_elements, loc="upper left", bbox_to_anchor=(1, 1) + ) # Перемещение легенды в верхнюю часть справа plt.show() diff --git a/popframe/method/popuation_frame.py b/popframe/method/popuation_frame.py index d60916e..50dedcf 100644 --- a/popframe/method/popuation_frame.py +++ b/popframe/method/popuation_frame.py @@ -5,8 +5,8 @@ from .base_method import BaseMethod -class PopulationFrame(BaseMethod): +class PopulationFrame(BaseMethod): def _create_circle(self, center, size): """ Create a circular buffer around a point. @@ -42,10 +42,10 @@ def _size_from_population(self, population, level): The calculated size for the circle. """ if level in ["Малое сельское поселение", "Среднее сельское поселение", "Большое сельское поселение"]: - return 0.0001 * (population ** 0.5) # Logarithmic scale for small settlements + return 0.0001 * (population**0.5) # Logarithmic scale for small settlements elif level == "Сверхкрупный город": - return 6e-5 * (population ** 0.5) # Reduced linear scale for very large cities - return 0.0001 * (population ** 0.5) # Linear scale for large settlements + return 6e-5 * (population**0.5) # Reduced linear scale for very large cities + return 0.0001 * (population**0.5) # Linear scale for large settlements def _convert_points_to_circles(self, gdf): """ @@ -61,10 +61,15 @@ def _convert_points_to_circles(self, gdf): geopandas.GeoDataFrame GeoDataFrame with circular geometries. """ - gdf['size'] = gdf.apply(lambda row: self._size_from_population(row['population'], row['level']), axis=1) - gdf['size_in_meters'] = gdf['size'] * METERS_PER_DEGREE - gdf['geometry'] = gdf.apply(lambda row: self._create_circle(row['geometry'], row['size_in_meters']) if isinstance(row['geometry'], Point) else row['geometry'], axis=1) - gdf = gdf.drop(columns=['size', 'size_in_meters']) + gdf["size"] = gdf.apply(lambda row: self._size_from_population(row["population"], row["level"]), axis=1) + gdf["size_in_meters"] = gdf["size"] * METERS_PER_DEGREE + gdf["geometry"] = gdf.apply( + lambda row: self._create_circle(row["geometry"], row["size_in_meters"]) + if isinstance(row["geometry"], Point) + else row["geometry"], + axis=1, + ) + gdf = gdf.drop(columns=["size", "size_in_meters"]) return gdf def build_circle_frame(self, update_df: pd.DataFrame | None = None) -> gpd.GeoDataFrame: diff --git a/popframe/method/spatial_inequality.py b/popframe/method/spatial_inequality.py index dc0e3fe..b5429f4 100644 --- a/popframe/method/spatial_inequality.py +++ b/popframe/method/spatial_inequality.py @@ -3,6 +3,7 @@ from typing import Optional, Tuple, Dict from popframe.method.base_method import BaseMethod + class SpatialInequalityCalculator(BaseMethod): """ Calculator for spatial inequality metrics. @@ -11,10 +12,7 @@ class SpatialInequalityCalculator(BaseMethod): """ def transfer_inequality_metrics_to_polygons( - self, - gdf_cities: gpd.GeoDataFrame, - gdf_polygons: gpd.GeoDataFrame, - inequality_keyword: str = "Неравенство" + self, gdf_cities: gpd.GeoDataFrame, gdf_polygons: gpd.GeoDataFrame, inequality_keyword: str = "Неравенство" ) -> Tuple[gpd.GeoDataFrame, Dict[str, Dict[str, float]]]: """ Transfer all columns containing inequality metrics from points (cities) to polygons (agglomerations) @@ -45,35 +43,23 @@ def transfer_inequality_metrics_to_polygons( raise KeyError(f"Не найдено колонок с '{inequality_keyword}'") # 2) Гео‑объединение точек и полигонов cities_with_idx = gpd.sjoin( - gdf_cities[metric_cols + ['geometry']], - gdf_polygons[['geometry']], - how='left', - predicate='within' + gdf_cities[metric_cols + ["geometry"]], gdf_polygons[["geometry"]], how="left", predicate="within" ) # 3) Усреднение по индексам полигонов - grouped = ( - cities_with_idx - .groupby('index_right')[metric_cols] - .mean() - .rename_axis('poly_index') - ) + grouped = cities_with_idx.groupby("index_right")[metric_cols].mean().rename_axis("poly_index") # 4) Присоединяем к полигонам gdf_polygons_with_metrics = ( - gdf_polygons - .reset_index() - .rename(columns={'index': 'poly_index'}) - .merge(grouped.reset_index(), on='poly_index', how='left') - .set_index('poly_index') + gdf_polygons.reset_index() + .rename(columns={"index": "poly_index"}) + .merge(grouped.reset_index(), on="poly_index", how="left") + .set_index("poly_index") ) # 5) Статистика внутри/вне - inside = cities_with_idx.dropna(subset=['index_right']) - outside = cities_with_idx[cities_with_idx['index_right'].isna()] + inside = cities_with_idx.dropna(subset=["index_right"]) + outside = cities_with_idx[cities_with_idx["index_right"].isna()] mean_within = inside[metric_cols].mean().to_dict() mean_outside = outside[metric_cols].mean().to_dict() - stats = { - 'mean_within': mean_within, - 'mean_outside': mean_outside - } + stats = {"mean_within": mean_within, "mean_outside": mean_outside} return gdf_polygons_with_metrics, stats def get_best_territory( @@ -82,7 +68,7 @@ def get_best_territory( group_name: Optional[str] = None, spatial_suffix: str = " - Неравенство", default_col: str = "Пространственное неравенство", - top_n: int = 5 + top_n: int = 5, ) -> gpd.GeoDataFrame: """ Return up to `top_n` territories (rows) with the minimum spatial inequality value. @@ -117,9 +103,7 @@ def get_best_territory( if primary_col not in gdf.columns: raise KeyError(f"В GeoDataFrame нет колонки «{primary_col}»") top_df = gdf.sort_values(primary_col, ascending=True).head(top_n).copy() - pattern = re.compile( - rf"^{re.escape(group_name.strip())}.*\bНеравенство\b", re.IGNORECASE - ) + pattern = re.compile(rf"^{re.escape(group_name.strip())}.*\bНеравенство\b", re.IGNORECASE) group_metrics = [c for c in top_df.columns if pattern.match(c)] non_metrics = [c for c in top_df.columns if "Неравенство" not in c] keep_cols = non_metrics + group_metrics @@ -133,7 +117,7 @@ def get_best_group_for_territory( self, gdf: gpd.GeoDataFrame, suffix: str = " - Неравенство", - new_col: str = "Наименьшее неравенство для соц‑группы" + new_col: str = "Наименьшее неравенство для соц‑группы", ) -> gpd.GeoDataFrame: """ Add a column with the name of the social group with the minimum inequality metric. @@ -153,10 +137,9 @@ def get_best_group_for_territory( GeoDataFrame with an added column for the group with the minimum inequality. """ all_cols = [ - c for c in gdf.columns - if isinstance(c, str) - and c.endswith(suffix) - and c[: -len(suffix)].strip().lower() != "итоговое" + c + for c in gdf.columns + if isinstance(c, str) and c.endswith(suffix) and c[: -len(suffix)].strip().lower() != "итоговое" ] if not all_cols: raise KeyError(f"Колонки с суффиксом '{suffix}' не найдены") diff --git a/popframe/method/territory_evaluation.py b/popframe/method/territory_evaluation.py index 8722820..1b094cd 100644 --- a/popframe/method/territory_evaluation.py +++ b/popframe/method/territory_evaluation.py @@ -19,7 +19,6 @@ class TerritoryEvaluation(BaseMethod): - @classmethod def _is_criterion_satisfied(cls, profile_value, criterion_value): """ @@ -81,44 +80,44 @@ def calculate_potential(self, criteria_values): profiles = { "Жилая застройка - ИЖС": { "criteria": {"Население": 1, "Транспорт": 2, "Экология": 4, "Соц-об": 4, "Инж инф": 3}, - "weights": {"Население": 0, "Транспорт": 0, "Экология": 1, "Соц-об": 1, "Инж инф": 0} + "weights": {"Население": 0, "Транспорт": 0, "Экология": 1, "Соц-об": 1, "Инж инф": 0}, }, "Жилая застройка - Малоэтажная": { "criteria": {"Население": 3, "Транспорт": 3, "Экология": 4, "Соц-об": 3, "Инж инф": 4}, - "weights": {"Население": 1, "Транспорт": 1, "Экология": 1, "Соц-об": 1, "Инж инф": 1} + "weights": {"Население": 1, "Транспорт": 1, "Экология": 1, "Соц-об": 1, "Инж инф": 1}, }, "Жилая застройка - Среднеэтажная": { "criteria": {"Население": 4, "Транспорт": 4, "Экология": 4, "Соц-об": 3, "Инж инф": 5}, - "weights": {"Население": 1, "Транспорт": 1, "Экология": 1, "Соц-об": 1, "Инж инф": 1} + "weights": {"Население": 1, "Транспорт": 1, "Экология": 1, "Соц-об": 1, "Инж инф": 1}, }, "Жилая застройка - Многоэтажная": { "criteria": {"Население": 5, "Транспорт": 5, "Экология": 4, "Соц-об": 3, "Инж инф": 5}, - "weights": {"Население": 1, "Транспорт": 1, "Экология": 1, "Соц-об": 1, "Инж инф": 1} + "weights": {"Население": 1, "Транспорт": 1, "Экология": 1, "Соц-об": 1, "Инж инф": 1}, }, "Общественно-деловая": { "criteria": {"Население": 4, "Транспорт": 5, "Экология": 4, "Соц-об": 2, "Инж инф": 4}, - "weights": {"Население": 1, "Транспорт": 1, "Экология": 1, "Соц-об": 0, "Инж инф": 1} + "weights": {"Население": 1, "Транспорт": 1, "Экология": 1, "Соц-об": 0, "Инж инф": 1}, }, "Рекреационная": { "criteria": {"Население": 0, "Транспорт": 0, "Экология": 4, "Соц-об": 0, "Инж инф": 0}, - "weights": {"Население": 0, "Транспорт": 0, "Экология": 0, "Соц-об": 0, "Инж инф": 0} + "weights": {"Население": 0, "Транспорт": 0, "Экология": 0, "Соц-об": 0, "Инж инф": 0}, }, "Специального назначения": { "criteria": {"Население": 0, "Транспорт": 3, "Экология": 1, "Соц-об": 0, "Инж инф": 2}, - "weights": {"Население": 1, "Транспорт": 1, "Экология": 1, "Соц-об": 0, "Инж инф": 1} + "weights": {"Население": 1, "Транспорт": 1, "Экология": 1, "Соц-об": 0, "Инж инф": 1}, }, "Промышленная": { "criteria": {"Население": 3, "Транспорт": 4, "Экология": 0, "Соц-об": 2, "Инж инф": 4}, - "weights": {"Население": 1, "Транспорт": 1, "Экология": 0, "Соц-об": 0, "Инж инф": 1} + "weights": {"Население": 1, "Транспорт": 1, "Экология": 0, "Соц-об": 0, "Инж инф": 1}, }, "Сельско-хозяйственная": { "criteria": {"Население": 3, "Транспорт": 4, "Экология": 4, "Соц-об": 2, "Инж инф": 3}, - "weights": {"Население": 1, "Транспорт": 1, "Экология": 1, "Соц-об": 0, "Инж инф": 1} + "weights": {"Население": 1, "Транспорт": 1, "Экология": 1, "Соц-об": 0, "Инж инф": 1}, }, "Транспортная инженерная": { "criteria": {"Население": 2, "Транспорт": 2, "Экология": 0, "Соц-об": 1, "Инж инф": 2}, - "weights": {"Население": 0, "Транспорт": 0, "Экология": 0, "Соц-об": 0, "Инж инф": 0} - } + "weights": {"Население": 0, "Транспорт": 0, "Экология": 0, "Соц-об": 0, "Инж инф": 0}, + }, } potential_scores = {} @@ -135,8 +134,12 @@ def calculate_potential(self, criteria_values): ) potential_scores[profile] = (potential, weighted_score) - ranked_profiles = sorted(potential_scores.items(), key=lambda x: (x[0] != "Рекреационная", x[1][0], x[1][1]), reverse=True) - ranked_profiles = [item for item in ranked_profiles if item[1][0] > 0] + [item for item in ranked_profiles if item[1][0] == 0] + ranked_profiles = sorted( + potential_scores.items(), key=lambda x: (x[0] != "Рекреационная", x[1][0], x[1][1]), reverse=True + ) + ranked_profiles = [item for item in ranked_profiles if item[1][0] > 0] + [ + item for item in ranked_profiles if item[1][0] == 0 + ] return ranked_profiles @@ -176,7 +179,6 @@ def evaluate_territory_location(self, territories_gdf): return results - def _get_level_scores(self): """ Get the scores for different levels of settlements. @@ -199,7 +201,6 @@ def _get_level_scores(self): "Малое сельское поселение": 1, } - def _evaluate_single_territory(self, territory_geom, territory_name, settlements_gdf, level_scores): """ Evaluate a single territory, determining its nearest settlements and interpretation. @@ -228,7 +229,6 @@ def _evaluate_single_territory(self, territory_geom, territory_name, settlements else: return self._evaluate_between_settlements(territory_geom, territory_name, settlements_gdf) - def _evaluate_nearby_settlement(self, territory_name, settlements_in_buffer, level_scores): """ Evaluate the territory if it is near settlements. @@ -247,23 +247,24 @@ def _evaluate_nearby_settlement(self, territory_name, settlements_in_buffer, lev dict The results of the territory evaluation. """ - settlements_in_buffer['score'] = settlements_in_buffer['level'].map(level_scores) - max_settlement = settlements_in_buffer.loc[settlements_in_buffer['score'].idxmax()] - max_level = max_settlement['level'] - max_score = max_settlement['score'] - closest_settlement_name = max_settlement['name'] - - interpretation = f"Территория находится внутри или непосредственной близости населенного пункта уровня {max_level}" + settlements_in_buffer["score"] = settlements_in_buffer["level"].map(level_scores) + max_settlement = settlements_in_buffer.loc[settlements_in_buffer["score"].idxmax()] + max_level = max_settlement["level"] + max_score = max_settlement["score"] + closest_settlement_name = max_settlement["name"] + + interpretation = ( + f"Территория находится внутри или непосредственной близости населенного пункта уровня {max_level}" + ) return { "territory": territory_name, "score": max_score, "interpretation": interpretation, "closest_settlement": closest_settlement_name, "closest_settlement1": None, - "closest_settlement2": None + "closest_settlement2": None, } - def _evaluate_between_settlements(self, territory_geom, territory_name, settlements_gdf): """ Evaluate the territory if it is located between settlements. @@ -286,7 +287,9 @@ def _evaluate_between_settlements(self, territory_geom, territory_name, settleme nearby_settlements = settlements_gdf[settlements_gdf.geometry.intersects(buffer_20km)] if len(nearby_settlements) > 1: - closest_settlement1, closest_settlement2 = self._find_closest_settlement_pair(territory_geom, nearby_settlements) + closest_settlement1, closest_settlement2 = self._find_closest_settlement_pair( + territory_geom, nearby_settlements + ) if closest_settlement1 and closest_settlement2: interpretation = "Территория находится между основными ядрами системы расселения" @@ -296,19 +299,18 @@ def _evaluate_between_settlements(self, territory_geom, territory_name, settleme "interpretation": interpretation, "closest_settlement": None, "closest_settlement1": closest_settlement1.name, - "closest_settlement2": closest_settlement2.name + "closest_settlement2": closest_settlement2.name, } - + return { "territory": territory_name, "score": 0, "interpretation": "Территория находится за границей агломерации", "closest_settlement": None, "closest_settlement1": None, - "closest_settlement2": None + "closest_settlement2": None, } - def _find_closest_settlement_pair(self, territory_geom, nearby_settlements): """ Find the pair of nearest settlements around the territory. @@ -326,7 +328,7 @@ def _find_closest_settlement_pair(self, territory_geom, nearby_settlements): The pair of nearest settlements and the minimum distance. """ - min_distance = float('inf') + min_distance = float("inf") closest_settlement1 = None closest_settlement2 = None @@ -336,15 +338,18 @@ def _find_closest_settlement_pair(self, territory_geom, nearby_settlements): distance_to_settlement2 = territory_geom.distance(settlement2.geometry) total_distance = distance_to_settlement1 + distance_to_settlement2 - if (distance_to_settlement1 > BUFFER_NEARBY_SETTLEMENT_M and distance_to_settlement2 > BUFFER_NEARBY_SETTLEMENT_M and - total_distance <= BETWEEN_SETTLEMENT_RATIO * distance_between_settlements and - total_distance < min_distance): + if ( + distance_to_settlement1 > BUFFER_NEARBY_SETTLEMENT_M + and distance_to_settlement2 > BUFFER_NEARBY_SETTLEMENT_M + and total_distance <= BETWEEN_SETTLEMENT_RATIO * distance_between_settlements + and total_distance < min_distance + ): min_distance = total_distance closest_settlement1 = settlement1 closest_settlement2 = settlement2 return closest_settlement1, closest_settlement2 - + def population_criterion(self, territories_gdf): """ Calculate population density and assess territories based on demographic characteristics. @@ -362,11 +367,11 @@ def population_criterion(self, territories_gdf): gdf_territory = territories_gdf.to_crs(epsg=3857) towns_gdf = self.region.get_towns_gdf().to_crs(epsg=3857) results = self._calculate_density_population(gdf_territory, towns_gdf) - + for result in results: - score = self._assess_territory(result['average_population_density'], result['total_population']) - result['score'] = score - result['interpretation'] = self._interpret_score(score) + score = self._assess_territory(result["average_population_density"], result["total_population"]) + result["score"] = score + result["interpretation"] = self._interpret_score(score) return results @@ -391,21 +396,25 @@ def _calculate_density_population(self, gdf_territory, towns_gdf, radius_m=20000 results = [] for _, territory in gdf_territory.iterrows(): buffer = territory.geometry.buffer(radius_m) - towns_in_buffer = gpd.sjoin(towns_gdf, gpd.GeoDataFrame(geometry=[buffer], crs=towns_gdf.crs), predicate='intersects') + towns_in_buffer = gpd.sjoin( + towns_gdf, gpd.GeoDataFrame(geometry=[buffer], crs=towns_gdf.crs), predicate="intersects" + ) if not towns_in_buffer.empty: - total_population = towns_in_buffer['population'].sum() + total_population = towns_in_buffer["population"].sum() buffer_area = buffer.area / 1e6 # in square kilometers population_density = total_population / buffer_area if buffer_area > 0 else 0 else: total_population = 0 population_density = 0 - results.append({ - 'project': territory.get('name'), - 'average_population_density': round(population_density, 1), - 'total_population': total_population - }) + results.append( + { + "project": territory.get("name"), + "average_population_density": round(population_density, 1), + "total_population": total_population, + } + ) return results @@ -430,11 +439,13 @@ def _assess_territory(self, density, population): score_df = pd.DataFrame(DENSITY_SCORE_TABLE) result = score_df[ - (score_df['min_dens'] <= density) & (density < score_df['max_dens']) & - (score_df['min_pop'] <= population) & (population < score_df['max_pop']) + (score_df["min_dens"] <= density) + & (density < score_df["max_dens"]) + & (score_df["min_pop"] <= population) + & (population < score_df["max_pop"]) ] if not result.empty: - return result.iloc[0]['score'] + return result.iloc[0]["score"] return 0 def _interpret_score(self, score): @@ -457,6 +468,6 @@ def _interpret_score(self, score): 2: "Территория имеет умеренные показатели численности и плотности населения, что указывает на потенциал для развития.", 3: "Территория имеет показатели численности и плотности населения выше среднего, что указывает на возможность развития территории.", 4: "Территория имеет хорошие показатели численности и плотности населения, что способствует ее активному развитию.", - 5: "Территория с высокими показателями численности и плотности населения, что указывает высокий потенциал развития." + 5: "Территория с высокими показателями численности и плотности населения, что указывает высокий потенциал развития.", } return interpretations.get(score, "Неизвестный показатель.") diff --git a/popframe/models/geodataframe.py b/popframe/models/geodataframe.py index 086b671..62fec03 100644 --- a/popframe/models/geodataframe.py +++ b/popframe/models/geodataframe.py @@ -26,7 +26,7 @@ class BaseRow(BaseModel, ABC): ----- Inheriting classes can be configured to provide default column values to avoid None and NaN. """ - + model_config = ConfigDict(arbitrary_types_allowed=True) geometry: BaseGeometry index: int @@ -34,7 +34,7 @@ class BaseRow(BaseModel, ABC): class GeoDataFrame(gpd.GeoDataFrame, BaseModel, Generic[T]): """ - Custom GeoDataFrame class that extends geopandas.GeoDataFrame and supports data validation with Pydantic's BaseModel. + Custom GeoDataFrame class that extends geopandas.GeoDataFrame and supports data validation with Pydantic's BaseModel. This class allows for the automatic validation of data on initialization using a generic class T inherited from BaseRow. Attributes @@ -63,12 +63,12 @@ class GeoDataFrame(gpd.GeoDataFrame, BaseModel, Generic[T]): - The "index" column is managed separately to avoid conflicts with the GeoDataFrame's index. - The coordinate reference system (CRS) is either provided in the kwargs or inherited from the input data. """ - + @property def generic(self): """ Returns the generic class type used in the GeoDataFrame. This is needed to ensure Pydantic validation is performed correctly. - + Returns ------- T : Type @@ -88,7 +88,7 @@ def __init__(self, data, *args, **kwargs): Additional positional arguments passed to GeoDataFrame. **kwargs : dict Additional keyword arguments, including CRS (Coordinate Reference System) for spatial data. - + Raises ------ AssertionError @@ -96,7 +96,7 @@ def __init__(self, data, *args, **kwargs): """ generic_class = self.generic assert issubclass(generic_class, BaseRow), "Generic should be inherited from BaseRow" - + # Convert data to GeoDataFrame if it isn't one already if not isinstance(data, gpd.GeoDataFrame): data = gpd.GeoDataFrame(data, *args, **kwargs) @@ -119,7 +119,6 @@ def __init__(self, data, *args, **kwargs): index_name = data.index.name self.index.name = index_name self.set_geometry("geometry", inplace=True) - + # Set CRS (Coordinate Reference System) self.crs = kwargs["crs"] if "crs" in kwargs else data.crs - diff --git a/popframe/models/region.py b/popframe/models/region.py index 52213b2..d4204f6 100644 --- a/popframe/models/region.py +++ b/popframe/models/region.py @@ -8,12 +8,13 @@ from popframe.preprocessing.level_filler import LevelFiller -DISTRICTS_PLOT_COLOR = '#28486d' -SETTLEMENTS_PLOT_COLOR = '#ddd' -TOWNS_PLOT_COLOR = '#333333' -TERRITORIES_PLOT_COLOR = '#893434' +DISTRICTS_PLOT_COLOR = "#28486d" +SETTLEMENTS_PLOT_COLOR = "#ddd" +TOWNS_PLOT_COLOR = "#333333" +TERRITORIES_PLOT_COLOR = "#893434" -class Region(): + +class Region: """ A class representing a geographical region that includes districts, settlements, towns, and optionally territories. Provides methods for validating and visualizing spatial data, as well as for calculating accessibility between towns. @@ -37,13 +38,13 @@ class Region(): """ def __init__( - self, - region : gpd.GeoDataFrame, - towns : gpd.GeoDataFrame, - accessibility_matrix : pd.DataFrame, - ): + self, + region: gpd.GeoDataFrame, + towns: gpd.GeoDataFrame, + accessibility_matrix: pd.DataFrame, + ): """ - Initializes the Region object with GeoDataFrames for region, districts, settlements, and towns. + Initializes the Region object with GeoDataFrames for region, districts, settlements, and towns. Optionally includes territories and an accessibility matrix to model transportation between towns. Parameters @@ -70,40 +71,44 @@ def __init__( towns = self.validate_towns(towns) accessibility_matrix = self.validate_accessibility_matrix(accessibility_matrix) - assert (accessibility_matrix.index == towns.index).all(), "Accessibility matrix indices and towns indices don't match" - assert region.crs == towns.crs, 'CRS should match everywhere' + assert ( + accessibility_matrix.index == towns.index + ).all(), "Accessibility matrix indices and towns indices don't match" + assert region.crs == towns.crs, "CRS should match everywhere" self.crs = towns.crs self.region = region self._towns = Town.from_gdf(towns) - + self.accessibility_matrix = accessibility_matrix - + @staticmethod - def validate_towns(gdf : gpd.GeoDataFrame) -> gpd.GeoDataFrame: + def validate_towns(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame: """ Validates the towns GeoDataFrame. """ - assert isinstance(gdf, gpd.GeoDataFrame), 'Towns should be instance of gpd.GeoDataFrame' + assert isinstance(gdf, gpd.GeoDataFrame), "Towns should be instance of gpd.GeoDataFrame" return gdf @staticmethod - def validate_region(gdf : gpd.GeoDataFrame) -> gpd.GeoDataFrame: + def validate_region(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame: """ Validates the region GeoDataFrame to ensure it has the correct structure and data types. """ - assert isinstance(gdf, gpd.GeoDataFrame), 'Region should be instance of gpd.GeoDataFrame' - assert gdf.geom_type.isin(['Polygon', 'MultiPolygon']).all(), 'District geometry should be Polygon or MultiPolygon' - return gdf[['geometry']] - + assert isinstance(gdf, gpd.GeoDataFrame), "Region should be instance of gpd.GeoDataFrame" + assert gdf.geom_type.isin( + ["Polygon", "MultiPolygon"] + ).all(), "District geometry should be Polygon or MultiPolygon" + return gdf[["geometry"]] + @staticmethod - def validate_accessibility_matrix(df : pd.DataFrame) -> pd.DataFrame: + def validate_accessibility_matrix(df: pd.DataFrame) -> pd.DataFrame: """ - Validates the accessibility matrix, ensuring it has non-negative float values + Validates the accessibility matrix, ensuring it has non-negative float values and matching row and column indices. """ - assert pd.api.types.is_float_dtype(df.values), 'Accessibility matrix values should be float' - assert (df.values>=0).all(), 'Accessibility matrix values should be greater or equal 0' + assert pd.api.types.is_float_dtype(df.values), "Accessibility matrix values should be float" + assert (df.values >= 0).all(), "Accessibility matrix values should be greater or equal 0" assert (df.index == df.columns).all(), "Accessibility matrix indices and columns don't match" return df @@ -118,18 +123,17 @@ def towns(self) -> list[Town]: List of Town objects. """ return self._towns.values() - + def get_update_towns_gdf(self, update_df: pd.DataFrame | None = None): gdf = self.get_towns_gdf() if update_df is not None: # Обновляем значения населения в gdf из update_df - gdf.update(update_df[['population']]) - + gdf.update(update_df[["population"]]) + level_filler = LevelFiller(towns=gdf) gdf = level_filler.fill_levels() return gdf - def get_towns_gdf(self) -> gpd.GeoDataFrame: """ Returns a GeoDataFrame representing all towns in the region, including their relationships with settlements and districts. @@ -141,7 +145,7 @@ def get_towns_gdf(self) -> gpd.GeoDataFrame: """ data = [town.to_dict() for town in self.towns] gdf = gpd.GeoDataFrame(data, crs=self.crs) - gdf.set_index('id', inplace=True, drop=False) + gdf.set_index("id", inplace=True, drop=False) gdf = gdf.rename_axis(None) return gdf.fillna(0) @@ -149,12 +153,12 @@ def get_towns_gdf(self) -> gpd.GeoDataFrame: def __getitem__(self, arg): """ Overloaded subscript operator to access a town or accessibility data based on the argument type. - + Parameters ---------- arg : int or tuple Integer to access a town by its ID, or tuple to retrieve accessibility data between two towns. - + Raises ------ NotImplementedError @@ -177,7 +181,7 @@ def _(self, towns): if isinstance(town_b, Town): town_b = town_b.id return self.accessibility_matrix.loc[town_a, town_b] - + @staticmethod def from_pickle(file_path: str): """ @@ -208,4 +212,4 @@ def to_pickle(self, file_path: str): Path to the .pickle file. """ with open(file_path, "wb") as f: - pickle.dump(self, f) \ No newline at end of file + pickle.dump(self, f) diff --git a/popframe/models/town.py b/popframe/models/town.py index 7badd3e..9d15a1e 100644 --- a/popframe/models/town.py +++ b/popframe/models/town.py @@ -24,7 +24,7 @@ class Town(BaseModel): ------- to_dict() -> dict Converts the Town object to a dictionary format. - + from_gdf(cls, gdf: gpd.GeoDataFrame) -> dict Class method to create a dictionary of Town instances from a GeoDataFrame. """ @@ -45,11 +45,11 @@ def to_dict(self) -> dict: A dictionary containing the 'id', 'name', 'population', 'level', and 'geometry' of the town. """ res = { - 'id': self.id, - 'name': self.name, - 'population': self.population, - 'level': self.level, - 'geometry': self.geometry + "id": self.id, + "name": self.name, + "population": self.population, + "level": self.level, + "geometry": self.geometry, } return res @@ -61,7 +61,7 @@ def from_gdf(cls, gdf: gpd.GeoDataFrame) -> dict: Parameters ---------- gdf : gpd.GeoDataFrame - GeoDataFrame containing data to initialize Town instances. + GeoDataFrame containing data to initialize Town instances. The GeoDataFrame should have 'id', 'name', 'level', 'population', and 'geometry' columns. Returns @@ -69,4 +69,4 @@ def from_gdf(cls, gdf: gpd.GeoDataFrame) -> dict: dict A dictionary where the keys are the indices of the GeoDataFrame and the values are Town instances. """ - return {i: cls(**{**gdf.loc[i].to_dict(), 'id': i}) for i in gdf.index} + return {i: cls(**{**gdf.loc[i].to_dict(), "id": i}) for i in gdf.index} diff --git a/popframe/preprocessing/__init__.py b/popframe/preprocessing/__init__.py index 319c90f..dae7f37 100644 --- a/popframe/preprocessing/__init__.py +++ b/popframe/preprocessing/__init__.py @@ -1,3 +1,3 @@ """ Data getter utilities are located here. -""" \ No newline at end of file +""" diff --git a/popframe/preprocessing/adjacency_calculator.py b/popframe/preprocessing/adjacency_calculator.py index bf04678..947a6bd 100644 --- a/popframe/preprocessing/adjacency_calculator.py +++ b/popframe/preprocessing/adjacency_calculator.py @@ -5,6 +5,7 @@ from typing import Any import geopandas as gpd + try: import networkit as nk # type: ignore except Exception as exc: # pragma: no cover @@ -16,6 +17,7 @@ import math from ..models.geodataframe import BaseRow, GeoDataFrame + # from ..graph_generator import GraphGenerator @@ -162,8 +164,8 @@ def _get_nk_distances( target_blocks = loc.index source_block = loc.name - target_nodes = [from_blocks.loc[i,'index_left'] for i in target_blocks] - source_node = from_blocks.loc[source_block, 'index_left'] + target_nodes = [from_blocks.loc[i, "index_left"] for i in target_blocks] + source_node = from_blocks.loc[source_block, "index_left"] distances = [nk_dists.getDistance(source_node, node) for node in target_nodes] return pd.Series(data=distances, index=target_blocks) @@ -188,11 +190,11 @@ def get_dataframe(self) -> pd.DataFrame: blocks = self.blocks.copy() blocks.geometry = blocks.geometry.representative_point() - from_blocks = graph_gdf.sjoin_nearest(blocks, how='right') - + from_blocks = graph_gdf.sjoin_nearest(blocks, how="right") + accs_matrix = pd.DataFrame(0, index=from_blocks.index, columns=from_blocks.index) nk_dists = nk.distance.SPSP( # pylint: disable=c-extension-no-member - graph_nk, sources=list(from_blocks['index_left']) + graph_nk, sources=list(from_blocks["index_left"]) ).run() accs_matrix = accs_matrix.apply(lambda x: self._get_nk_distances(nk_dists, x, from_blocks), axis=1) @@ -200,6 +202,6 @@ def get_dataframe(self) -> pd.DataFrame: # accs_matrix.columns = blocks.index # bug fix in city block's closest node is no connecte to actual transport infrastructure - accs_matrix[accs_matrix>10000] = accs_matrix[accs_matrix < 10000].max().max() + accs_matrix[accs_matrix > 10000] = accs_matrix[accs_matrix < 10000].max().max() return accs_matrix diff --git a/popframe/preprocessing/level_filler.py b/popframe/preprocessing/level_filler.py index ffa96f7..38431da 100644 --- a/popframe/preprocessing/level_filler.py +++ b/popframe/preprocessing/level_filler.py @@ -20,6 +20,7 @@ class TownRow(BaseRow): level : str, optional The administrative level of the town, defaults to "Нет уровня" (no level). """ + id: int geometry: Point name: str @@ -42,7 +43,7 @@ class LevelFiller(BaseModel): ------- _assign_level(row) -> str A static method that assigns the correct administrative level to a town based on its population. - + validate_towns(gdf) A Pydantic validator that ensures town levels are correctly assigned before processing the GeoDataFrame. @@ -52,7 +53,7 @@ class LevelFiller(BaseModel): towns: GeoDataFrame[TownRow] population_thresholds: ClassVar[dict[str, tuple[int, int]]] = { - "Сверхкрупный город": (3000000, float('inf')), + "Сверхкрупный город": (3000000, float("inf")), "Крупнейший город": (1000000, 3000000), "Крупный город": (250000, 1000000), "Большой город": (100000, 250000), @@ -79,8 +80,8 @@ def _assign_level(row) -> str: str The administrative level of the town. """ - population = row['population'] - + population = row["population"] + for level, (lower_bound, upper_bound) in LevelFiller.population_thresholds.items(): if lower_bound < population <= upper_bound: return level @@ -116,4 +117,3 @@ def fill_levels(self) -> GeoDataFrame[TownRow]: An updated GeoDataFrame with filled levels for each town. """ return self.towns - diff --git a/popframe/preprocessing/population_filler.py b/popframe/preprocessing/population_filler.py index 8a574ce..a2de98f 100644 --- a/popframe/preprocessing/population_filler.py +++ b/popframe/preprocessing/population_filler.py @@ -4,6 +4,7 @@ import shapely import pandas as pd + class UnitRow(BaseRow): """ A class representing a unit of geographic data with a polygon or multipolygon geometry and a population count. @@ -15,6 +16,7 @@ class UnitRow(BaseRow): population : int The population residing within the geographic unit. """ + geometry: shapely.Polygon | shapely.MultiPolygon population: int @@ -34,6 +36,7 @@ class TownRow(BaseRow): is_city : bool A boolean indicating whether the town is a city. """ + geometry: shapely.Point name: str level: str @@ -59,10 +62,10 @@ class PopulationFiller(BaseModel): ------- validate_units(cls, gdf) -> GeoDataFrame[UnitRow] Validates that the units input is a valid GeoDataFrame of UnitRow type. - + validate_towns(cls, gdf) -> GeoDataFrame[TownRow] Validates that the towns input is a valid GeoDataFrame of TownRow type. - + validate_adjacency_matrix(cls, df) -> pd.DataFrame Validates that the adjacency matrix is square and matches the town index. @@ -81,7 +84,7 @@ class PopulationFiller(BaseModel): adjacency_matrix: InstanceOf[pd.DataFrame] city_multiplier: float = Field(gt=0, default=10) - @field_validator('units', mode='before') + @field_validator("units", mode="before") @classmethod def validate_units(cls, gdf): """ @@ -101,7 +104,7 @@ def validate_units(cls, gdf): gdf = GeoDataFrame[UnitRow](gdf) return gdf - @field_validator('towns', mode='before') + @field_validator("towns", mode="before") @classmethod def validate_towns(cls, gdf): """ @@ -121,7 +124,7 @@ def validate_towns(cls, gdf): gdf = GeoDataFrame[TownRow](gdf) return gdf - @field_validator('adjacency_matrix', mode='after') + @field_validator("adjacency_matrix", mode="after") @classmethod def validate_adjacency_matrix(cls, df): """ @@ -136,7 +139,7 @@ def validate_adjacency_matrix(cls, df): ------- pd.DataFrame A validated adjacency matrix. - + Raises ------ AssertionError @@ -145,7 +148,7 @@ def validate_adjacency_matrix(cls, df): assert all(df.index == df.columns), "Matrix index and columns don't match" return df - @model_validator(mode='after') + @model_validator(mode="after") def validate_model(self): """ Validates that the coordinate reference systems (CRS) of the towns and units match, and that the adjacency matrix matches the town indices. @@ -193,17 +196,19 @@ def fill(self) -> GeoDataFrame[TownRow]: A GeoDataFrame with updated population data for the towns. """ towns = self.towns.copy() - towns['median_time'] = towns.apply(lambda x: self._get_median_time(x.name), axis=1) + towns["median_time"] = towns.apply(lambda x: self._get_median_time(x.name), axis=1) for i in self.units.index: - geometry = self.units.loc[i, 'geometry'] - population = self.units.loc[i, 'population'] + geometry = self.units.loc[i, "geometry"] + population = self.units.loc[i, "population"] unit_towns = towns.loc[towns.within(geometry)].copy() - unit_towns['coef'] = unit_towns.apply(lambda x: (self.city_multiplier if x['is_city'] else 1)/x['median_time'], axis=1) - coef_sum = unit_towns['coef'].sum() - unit_towns['coef_norm'] = unit_towns['coef'] / coef_sum - unit_towns['population'] = population * unit_towns['coef_norm'] + unit_towns["coef"] = unit_towns.apply( + lambda x: (self.city_multiplier if x["is_city"] else 1) / x["median_time"], axis=1 + ) + coef_sum = unit_towns["coef"].sum() + unit_towns["coef_norm"] = unit_towns["coef"] / coef_sum + unit_towns["population"] = population * unit_towns["coef_norm"] for j in unit_towns.index: - towns.loc[j, 'coef'] = unit_towns.loc[j, 'coef'] - towns.loc[j, 'coef_norm'] = unit_towns.loc[j, 'coef_norm'] - towns.loc[j, 'population'] = round(unit_towns.loc[j, 'population']) + towns.loc[j, "coef"] = unit_towns.loc[j, "coef"] + towns.loc[j, "coef_norm"] = unit_towns.loc[j, "coef_norm"] + towns.loc[j, "population"] = round(unit_towns.loc[j, "population"]) return towns diff --git a/popframe/utils/const.py b/popframe/utils/const.py index 4bd44e6..35eddab 100644 --- a/popframe/utils/const.py +++ b/popframe/utils/const.py @@ -21,56 +21,66 @@ # Population density scoring table for TerritoryEvaluation._assess_territory # Each row defines range thresholds and the resulting score DENSITY_SCORE_TABLE = [ - {"min_dens": 0, "max_dens": 10, "min_pop": 0, "max_pop": 1_000, "score": 1}, - {"min_dens": 0, "max_dens": 10, "min_pop": 1_000,"max_pop": 5_000, "score": 2}, - {"min_dens": 0, "max_dens": 10, "min_pop": 5_000,"max_pop": float("inf"), "score": 3}, - {"min_dens": 10, "max_dens": 50, "min_pop": 0, "max_pop": 1_000, "score": 2}, - {"min_dens": 10, "max_dens": 50, "min_pop": 1_000,"max_pop": 5_000, "score": 3}, - {"min_dens": 10, "max_dens": 50, "min_pop": 5_000,"max_pop": float("inf"), "score": 4}, - {"min_dens": 50, "max_dens": float("inf"),"min_pop": 0, "max_pop": 1_000, "score": 3}, - {"min_dens": 50, "max_dens": float("inf"),"min_pop": 1_000,"max_pop": 5_000, "score": 4}, - {"min_dens": 50, "max_dens": float("inf"),"min_pop": 5_000,"max_pop": float("inf"), "score": 5}, + {"min_dens": 0, "max_dens": 10, "min_pop": 0, "max_pop": 1_000, "score": 1}, + {"min_dens": 0, "max_dens": 10, "min_pop": 1_000, "max_pop": 5_000, "score": 2}, + {"min_dens": 0, "max_dens": 10, "min_pop": 5_000, "max_pop": float("inf"), "score": 3}, + {"min_dens": 10, "max_dens": 50, "min_pop": 0, "max_pop": 1_000, "score": 2}, + {"min_dens": 10, "max_dens": 50, "min_pop": 1_000, "max_pop": 5_000, "score": 3}, + {"min_dens": 10, "max_dens": 50, "min_pop": 5_000, "max_pop": float("inf"), "score": 4}, + {"min_dens": 50, "max_dens": float("inf"), "min_pop": 0, "max_pop": 1_000, "score": 3}, + {"min_dens": 50, "max_dens": float("inf"), "min_pop": 1_000, "max_pop": 5_000, "score": 4}, + {"min_dens": 50, "max_dens": float("inf"), "min_pop": 5_000, "max_pop": float("inf"), "score": 5}, ] # Land-use dictionaries derived from OpenStreetMap tagging schema LANDUSE_TAGS = { - '1.3.1 Процент застройки жилищным строительством': [ - 'residential', 'apartments', 'detached', 'construction' + "1.3.1 Процент застройки жилищным строительством": ["residential", "apartments", "detached", "construction"], + "1.3.2 Процент земель сельскохозяйственного назначения": [ + "farmland", + "farmyard", + "orchard", + "vineyard", + "greenhouse_horticulture", + "meadow", + "plant_nursery", + "aquaculture", + "animal_keeping", + "breeding", + "grassland", ], - '1.3.2 Процент земель сельскохозяйственного назначения': [ - 'farmland', 'farmyard', 'orchard', 'vineyard', 'greenhouse_horticulture', - 'meadow', 'plant_nursery', 'aquaculture', 'animal_keeping', 'breeding', 'grassland' + "1.3.3 Процент земель промышленного назначения": ["industrial", "quarry", "landfill"], + "1.3.4 Процент земель, занятых лесными массивами": ["forest", "wood"], + "1.3.5 Процент земель специального назначения": ["military", "railway", "cemetery", "landfill", "brownfield"], + "1.3.6 Процент земель населенных пунктов": ["place_city", "place_town"], + "1.3.7 Процент земель, занятых особо охраняемыми природными территориями": [ + "national_park", + "protected_area", + "nature_reserve", + "conservation", ], - '1.3.3 Процент земель промышленного назначения': ['industrial', 'quarry', 'landfill'], - '1.3.4 Процент земель, занятых лесными массивами': ['forest', 'wood'], - '1.3.5 Процент земель специального назначения': ['military', 'railway', 'cemetery', 'landfill', 'brownfield'], - '1.3.6 Процент земель населенных пунктов': ['place_city', 'place_town'], - '1.3.7 Процент земель, занятых особо охраняемыми природными территориями': [ - 'national_park', 'protected_area', 'nature_reserve', 'conservation' - ], - '1.3.8 Процент земель, занятых водным фондом': ['basin', 'reservoir', 'water', 'salt_pond'], + "1.3.8 Процент земель, занятых водным фондом": ["basin", "reservoir", "water", "salt_pond"], } LANDUSE_COLORS = { - 'Застройка жилищным строительством': 'blue', - 'Сельскохозяйственные земли': 'yellow', - 'Промышленные земли': 'gray', - 'Лесные массивы': 'green', - 'Земли специального назначения': 'brown', - 'Земли населенных пунктов': 'orange', - 'Особо охраняемые природные территории': 'purple', - 'Водный фонд': 'cyan', - 'Территории смежного назначения': 'white', + "Застройка жилищным строительством": "blue", + "Сельскохозяйственные земли": "yellow", + "Промышленные земли": "gray", + "Лесные массивы": "green", + "Земли специального назначения": "brown", + "Земли населенных пунктов": "orange", + "Особо охраняемые природные территории": "purple", + "Водный фонд": "cyan", + "Территории смежного назначения": "white", } LANDUSE_MAPPING = { - 'Процент застройки жилищным строительством': 'Застройка жилищным строительством', - 'Процент земель сельскохозяйственного назначения': 'Сельскохозяйственные земли', - 'Процент земель промышленного назначения': 'Промышленные земли', - 'Процент земель, занятых лесными массивами': 'Лесные массивы', - 'Процент земель специального назначения': 'Земли специального назначения', - 'Процент земель населенных пунктов': 'Земли населенных пунктов', - 'Процент земель, занятых особо охраняемыми природными территориями': 'Особо охраняемые природные территории', - 'Процент земель, занятых водным фондом': 'Водный фонд', - 'Территории смежного назначения': 'Территории смежного назначения', + "Процент застройки жилищным строительством": "Застройка жилищным строительством", + "Процент земель сельскохозяйственного назначения": "Сельскохозяйственные земли", + "Процент земель промышленного назначения": "Промышленные земли", + "Процент земель, занятых лесными массивами": "Лесные массивы", + "Процент земель специального назначения": "Земли специального назначения", + "Процент земель населенных пунктов": "Земли населенных пунктов", + "Процент земель, занятых особо охраняемыми природными территориями": "Особо охраняемые природные территории", + "Процент земель, занятых водным фондом": "Водный фонд", + "Территории смежного назначения": "Территории смежного назначения", } From 3a23b0417855bd996f2878496b019678e2dd62f4 Mon Sep 17 00:00:00 2001 From: Mvin8 Date: Tue, 23 Sep 2025 12:53:59 +0300 Subject: [PATCH 2/4] fix: ensure GeoDataFrame always has geometry+CRS and handle empty agglomerations safely --- popframe/method/agglomeration.py | 147 +++++++++++-------------------- pyproject.toml | 47 +++++----- 2 files changed, 75 insertions(+), 119 deletions(-) diff --git a/popframe/method/agglomeration.py b/popframe/method/agglomeration.py index 0a5e0b0..691ad3a 100644 --- a/popframe/method/agglomeration.py +++ b/popframe/method/agglomeration.py @@ -19,25 +19,14 @@ "Сверхкрупный город", ] - class AgglomerationBuilder(BaseMethod): """Build urban agglomerations based on travel-time accessibility.""" - # Instance-scoped parameters (not globals) radius_m_per_min: int = TIME_TO_METERS_FACTOR min_population: int = MIN_CITY_POPULATION_FOR_AGGLO _in_agglomeration: Dict[int, bool] = {} def _build_agglomeration(self, towns: gpd.GeoDataFrame, time: int) -> gpd.GeoDataFrame: - """ - Builds agglomerations for cities based on the accessibility matrix and travel time. - - Parameters: - - time_threshold: Maximum travel time to consider cities in the same agglomeration (default is 80 minutes). - - Returns: - - A GeoDataFrame of the agglomerations. - """ node_population = towns.set_index("id")["population"] node_names = towns.set_index("id")["name"] agglomerations = [] @@ -61,59 +50,59 @@ def _build_agglomeration(self, towns: gpd.GeoDataFrame, time: int) -> gpd.GeoDat self._in_agglomeration[member_node] = True if agglomerations: - agglomeration_gdf = gpd.GeoDataFrame(agglomerations, columns=["name", "geometry"]).set_geometry("geometry") - agglomeration_gdf.set_crs(self.region.crs, inplace=True) + agglomeration_gdf = gpd.GeoDataFrame( + agglomerations, + geometry="geometry", + crs=self.region.crs, + )[["name", "geometry"]] else: - agglomeration_gdf = gpd.GeoDataFrame(columns=["name", "geometry"]) + agglomeration_gdf = gpd.GeoDataFrame( + columns=["name", "geometry"], + geometry="geometry", + crs=self.region.crs, + ) return agglomeration_gdf def _get_agglomeration_around_node(self, start_node: int, max_time: int, towns: gpd.GeoDataFrame) -> Optional[dict]: - """ - Finds the agglomeration around a given city node within the specified time limit using the accessibility matrix. - - Parameters: - - start_node: The node to start agglomeration search from. - - max_time: The maximum time to travel from the start node. - - Returns: - - A dictionary containing the geometry of the agglomeration and the nodes within it. - """ accessibility_matrix = self.region.accessibility_matrix - # Get the distances from the start_node to all others from the matrix distances_from_start = accessibility_matrix.loc[start_node] within_time_nodes = distances_from_start[distances_from_start <= max_time].index - if within_time_nodes.empty: + if len(within_time_nodes) == 0: return None - nodes_data = towns.set_index("id").loc[within_time_nodes] - nodes_data["geometry"] = nodes_data.apply(lambda row: Point(row["geometry"].x, row["geometry"].y), axis=1) + nodes_data = towns.set_index("id").loc[within_time_nodes].copy() + + # гарантируем геометрию-точку (на случай, если пришло не Point) + nodes_data["geometry"] = nodes_data["geometry"].apply( + lambda geom: Point(geom.x, geom.y) if isinstance(geom, Point) else geom.centroid + ) + nodes_gdf = gpd.GeoDataFrame(nodes_data, geometry="geometry", crs=self.region.crs) - # Calculate the remaining distance buffer + # остаточная дистанция буфера distance = {node: (max_time - distances_from_start[node]) * self.radius_m_per_min for node in within_time_nodes} nodes_gdf["left_distance"] = nodes_gdf.index.map(distance) + + # буфер переменного радиуса и объединение agglomeration_geom = nodes_gdf.buffer(nodes_gdf["left_distance"]).unary_union return {"geometry": agglomeration_geom, "nodes_in_agglomeration": list(within_time_nodes)} def _merge_intersecting_agglomerations(self, gdf: gpd.GeoDataFrame, towns: gpd.GeoDataFrame) -> gpd.GeoDataFrame: - """ - Merges intersecting agglomerations into larger polygons and sums up the population. + # защита для пустого входа + if gdf.empty: + return gpd.GeoDataFrame( + columns=["geometry", "type", "core_cities", "population", "agglomeration_level"], + geometry="geometry", + crs=towns.crs if getattr(towns, "crs", None) is not None else self.region.crs, + ) - Parameters: - - gdf: GeoDataFrame of the agglomerations. - - towns: GeoDataFrame of the towns. - - Returns: - - A GeoDataFrame of the merged agglomerations with updated population data and agglomeration level. - """ merged_geometries = [] processed_indices = set() - # Основной цикл по агломерациям for i, row_i in gdf.iterrows(): if i in processed_indices: continue @@ -122,16 +111,14 @@ def _merge_intersecting_agglomerations(self, gdf: gpd.GeoDataFrame, towns: gpd.G geometry = row_i["geometry"] merged_names = {row_i["name"]} - # Первый цикл: проверка пересечений и объединение агломераций for j, row_j in gdf.iterrows(): if i != j and j not in processed_indices: if geometry.intersects(row_j["geometry"]): overlapping_agglomerations.append(row_j) - geometry = unary_union([geometry, row_j["geometry"]]) # Используем unary_union + geometry = unary_union([geometry, row_j["geometry"]]) merged_names.add(row_j["name"]) processed_indices.add(j) - # Второй цикл: дополнительная проверка для объединения с новыми полигонами still_intersecting = True while still_intersecting: still_intersecting = False @@ -139,26 +126,24 @@ def _merge_intersecting_agglomerations(self, gdf: gpd.GeoDataFrame, towns: gpd.G if j not in processed_indices: if geometry.intersects(row_j["geometry"]): overlapping_agglomerations.append(row_j) - geometry = unary_union([geometry, row_j["geometry"]]) # Используем unary_union + geometry = unary_union([geometry, row_j["geometry"]]) merged_names.add(row_j["name"]) processed_indices.add(j) still_intersecting = True - # Проверяем валидность геометрии и исправляем ее, если необходимо if not geometry.is_valid: geometry = geometry.buffer(0) towns_in_agglomeration = towns[towns.intersects(geometry)] population_from_towns = towns_in_agglomeration["population"].sum() - # Определение уровня агломерации на основе населения - if population_from_towns <= 250000: + if population_from_towns <= 250_000: agglomeration_level = 1 - elif population_from_towns <= 500000: + elif population_from_towns <= 500_000: agglomeration_level = 2 - elif population_from_towns <= 1000000: + elif population_from_towns <= 1_000_000: agglomeration_level = 3 - elif population_from_towns <= 5000000: + elif population_from_towns <= 5_000_000: agglomeration_level = 4 else: agglomeration_level = 5 @@ -166,7 +151,7 @@ def _merge_intersecting_agglomerations(self, gdf: gpd.GeoDataFrame, towns: gpd.G merged_agglomeration = { "geometry": geometry, "type": "Polycentric" if len(merged_names) > 1 else "Monocentric", - "core_cities": ", ".join(merged_names), + "core_cities": ", ".join(sorted(merged_names)), "population": population_from_towns, "agglomeration_level": agglomeration_level, } @@ -174,18 +159,12 @@ def _merge_intersecting_agglomerations(self, gdf: gpd.GeoDataFrame, towns: gpd.G processed_indices.add(i) - return gpd.GeoDataFrame(merged_geometries, crs=gdf.crs) + return gpd.GeoDataFrame(merged_geometries, geometry="geometry", crs=gdf.crs) def _simplify_multipolygons(self, gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame: - """ - Simplifies multipolygons to keep only the largest polygon for each agglomeration. - - Parameters: - - gdf: GeoDataFrame containing the agglomeration geometries. - - Returns: - - A GeoDataFrame with simplified geometries. - """ + if gdf.empty: + return gdf + gdf = gdf.copy() gdf["geometry"] = gdf["geometry"].apply( lambda geom: max(geom.geoms, key=lambda g: g.area) if isinstance(geom, MultiPolygon) else geom ) @@ -194,41 +173,24 @@ def _simplify_multipolygons(self, gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame: def evaluate_city_agglomeration_status( self, towns: gpd.GeoDataFrame, agglomeration_gdf: gpd.GeoDataFrame ) -> gpd.GeoDataFrame: - """ - Evaluates cities according to their position in the agglomeration. - - Adds the 'agglomeration_status' and 'agglomeration_level' attributes to towns: - - 'agglomeration_status': 'Outside the agglomeration', 'Agglomeration Center', 'In the agglomeration'. - - 'agglomeration_level': 0 for cities outside agglomerations, and agglomeration level (1 to 5) for cities within agglomerations. - - Parameters: - - towns: GeoDataFrame with cities. - - agglomeration_gdf: GeoDataFrame with agglomerations. - - Returns: - - Updated GeoDataFrame with added 'agglomeration_status' (str) and 'agglomeration_level' (int) attributes. - """ agglomeration_status = [] agglomeration_level = [] - for idx, town in towns.iterrows(): + for _, town in towns.iterrows(): town_point = town["geometry"] town_name = town["name"] in_agglomeration = False is_core_city = False - current_agglomeration_level = 0 # Default level for towns outside agglomerations + current_agglomeration_level = 0 - for agg_idx, agg in agglomeration_gdf.iterrows(): + for _, agg in agglomeration_gdf.iterrows(): if town_point.intersects(agg["geometry"]): in_agglomeration = True current_agglomeration_level = agg["agglomeration_level"] - - # Проверяем, является ли город основным в агломерации core_cities = agg["core_cities"].split(", ") if town_name in core_cities: is_core_city = True - current_status = "Центр агломерации" break if is_core_city: @@ -236,29 +198,22 @@ def evaluate_city_agglomeration_status( agglomeration_level.append(current_agglomeration_level) elif not in_agglomeration: agglomeration_status.append("Вне агломерации") - agglomeration_level.append(0) # 0 for cities outside agglomerations + agglomeration_level.append(0) else: agglomeration_status.append("В агломерации") agglomeration_level.append(current_agglomeration_level) + towns = towns.copy() towns["agglomeration_status"] = agglomeration_status towns["agglomeration_level"] = agglomeration_level return towns def get_agglomerations(self, update_df: Optional[pd.DataFrame] = None, time: int = 80) -> gpd.GeoDataFrame: - """ - The main function that orchestrates the creation, merging, and finalization of agglomerations. - - Returns: - - A GeoDataFrame with the finalized agglomerations, merged, simplified, and overlaid on region boundaries. - """ - towns = self.region.get_update_towns_gdf(update_df) if towns is None or len(towns) < 2: raise ValueError("Для построения агломерации требуется минимум два города.") - # Проверяем, что хотя бы у двух городов есть ненулевое население valid_pop = towns["population"].notnull() & (towns["population"] > 0) if valid_pop.sum() < 2: raise ValueError("Требуются данные о населении минимум у двух разных городов.") @@ -267,21 +222,21 @@ def get_agglomerations(self, update_df: Optional[pd.DataFrame] = None, time: int print("Минимально допустимое значение параметра 'time' — 50 минут. Заменяю на 50.") time = 50 - region_boundary = self.region.region + region_boundary = self.region.region - # Step 1: Build agglomerations agglomeration_gdf = self._build_agglomeration(towns, time) - # Step 2: Merge intersecting agglomerations and update population data + # 2) Упростить мультиполигоны перед слиянием agglomeration_gdf = self._simplify_multipolygons(agglomeration_gdf) - # Step 3: Overlay agglomerations on region boundaries + # 3) Слияние пересекающихся и расчет показателей agglomeration_gdf = self._merge_intersecting_agglomerations(agglomeration_gdf, towns) - # Step 4: Simplify multipolygons - agglomeration_gdf = gpd.overlay(agglomeration_gdf, region_boundary, how="intersection") + # 4) Обрезка по границе региона + if not agglomeration_gdf.empty: + agglomeration_gdf = gpd.overlay(agglomeration_gdf, region_boundary, how="intersection") - # Step 5: Final geometry corrections + # 5) Финальная чистка геометрии agglomeration_gdf = self._simplify_multipolygons(agglomeration_gdf) return agglomeration_gdf diff --git a/pyproject.toml b/pyproject.toml index b74834b..b93f226 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,31 +14,32 @@ requires-python = ">=3.10" license = { file = "LICENSE" } dependencies = [ - "geopandas==1.0.1", - "retrying>=1.3.4", - "contextily", - "mapclassify==2.6.1", - "matplotlib", - "matplotlib-inline>=0.1.6", - "networkx>=3.1", - "numpy", - "osmnx==1.6.0", - "pandas", - "pyarrow", - "pydantic>=2.7.2", - "requests>=2.31.0", - "scikit-learn>=1.2.2", - "scipy", - "shapely==2.0.1", - "six>=1.16.0", - "tqdm>=4.65.0", - "seaborn>=0.12.2", - "folium>=0.14.0", - "dill>=0.3.8", - "python-louvain", - "geovoronoi" + "geopandas>=1.0.1", + "numpy>=2.0", + "pandas>=2.2", + "scipy>=1.12", + "scikit-learn>=1.5", + "shapely>=2.0.5", + "pyproj>=3.6", + "contextily", + "mapclassify==2.6.1", + "matplotlib", + "matplotlib-inline>=0.1.6", + "networkx>=3.1", + "osmnx==1.6.0", + "pyarrow", + "pydantic>=2.7.2", + "requests>=2.31.0", + "six>=1.16.0", + "tqdm>=4.65.0", + "seaborn>=0.12.2", + "folium>=0.14.0", + "dill>=0.3.8", + "python-louvain", + "geovoronoi" ] + classifiers = [ "Programming Language :: Python :: 3", "License :: OSI Approved :: BSD License", From 95e5c1ba51921b3e094993d08f8cd280777c77f7 Mon Sep 17 00:00:00 2001 From: Mvin8 Date: Tue, 23 Sep 2025 13:58:46 +0300 Subject: [PATCH 3/4] fix: prevent double RuntimeError by re-raising known exceptions and wrapping only unexpected ones --- popframe/method/agglomeration.py | 281 +++++++++++++++++++++++++------ 1 file changed, 233 insertions(+), 48 deletions(-) diff --git a/popframe/method/agglomeration.py b/popframe/method/agglomeration.py index 691ad3a..ab88d7c 100644 --- a/popframe/method/agglomeration.py +++ b/popframe/method/agglomeration.py @@ -19,14 +19,60 @@ "Сверхкрупный город", ] + class AgglomerationBuilder(BaseMethod): - """Build urban agglomerations based on travel-time accessibility.""" + """ + Build urban agglomerations from a towns layer and a travel-time matrix. + + Notes + ----- + The algorithm: + 1) Iterates city levels from largest to smallest and selects candidate centers. + 2) For each center, takes all towns reachable within a time threshold and + buffers them by the remaining time (converted to meters). + 3) Unifies buffers to produce proto-agglomerations. + 4) Merges intersecting proto-agglomerations and aggregates attributes. + 5) Clips to the region boundary and simplifies MultiPolygons. + + Attributes + ---------- + radius_m_per_min : int + Conversion factor from minutes of travel time to meters of buffer radius. + min_population : int + Minimal population for a town to be considered as a center or member. + _in_agglomeration : Dict[int, bool] + Registry of nodes already assigned to an agglomeration. + """ radius_m_per_min: int = TIME_TO_METERS_FACTOR min_population: int = MIN_CITY_POPULATION_FOR_AGGLO _in_agglomeration: Dict[int, bool] = {} def _build_agglomeration(self, towns: gpd.GeoDataFrame, time: int) -> gpd.GeoDataFrame: + """ + Build proto-agglomerations for eligible centers by level and time threshold. + + Parameters + ---------- + towns : GeoDataFrame + Towns with columns: ``id``, ``name``, ``population``, ``level``, ``geometry`` (Points). + CRS must match ``self.region.crs``. + time : int + Global time budget in minutes. Effective per-level budget decreases by 10 minutes + per level step when iterating from largest to smallest. + + Returns + ------- + GeoDataFrame + Columns: ``name``, ``geometry``. Geometry is Polygon or MultiPolygon. + Always has active geometry and CRS. + + Notes + ----- + Centers are filtered by ``level in CITY_LEVELS`` and ``population >= min_population``. + Remaining-time buffers are computed as + ``(time_center - distance_minutes) * radius_m_per_min``. + """ node_population = towns.set_index("id")["population"] node_names = towns.set_index("id")["name"] agglomerations = [] @@ -64,7 +110,32 @@ def _build_agglomeration(self, towns: gpd.GeoDataFrame, time: int) -> gpd.GeoDat return agglomeration_gdf - def _get_agglomeration_around_node(self, start_node: int, max_time: int, towns: gpd.GeoDataFrame) -> Optional[dict]: + def _get_agglomeration_around_node( + self, start_node: int, max_time: int, towns: gpd.GeoDataFrame + ) -> Optional[dict]: + """ + Build a single proto-agglomeration around a candidate center. + + Parameters + ---------- + start_node : int + ID of the center town. + max_time : int + Maximum travel time (minutes) from ``start_node`` to include towns. + towns : GeoDataFrame + Towns table aligned with the accessibility matrix index. + + Returns + ------- + dict or None + ``{"geometry": Polygon|MultiPolygon, "nodes_in_agglomeration": List[int]}`` + or ``None`` if no towns are reachable within ``max_time``. + + Notes + ----- + Uses ``self.region.accessibility_matrix`` with minutes as units. + Guarantees point geometry for towns by coercing to centroid if needed. + """ accessibility_matrix = self.region.accessibility_matrix distances_from_start = accessibility_matrix.loc[start_node] @@ -75,24 +146,46 @@ def _get_agglomeration_around_node(self, start_node: int, max_time: int, towns: nodes_data = towns.set_index("id").loc[within_time_nodes].copy() - # гарантируем геометрию-точку (на случай, если пришло не Point) + # ensure point-like geometry nodes_data["geometry"] = nodes_data["geometry"].apply( lambda geom: Point(geom.x, geom.y) if isinstance(geom, Point) else geom.centroid ) nodes_gdf = gpd.GeoDataFrame(nodes_data, geometry="geometry", crs=self.region.crs) - # остаточная дистанция буфера + # remaining buffer distance in meters distance = {node: (max_time - distances_from_start[node]) * self.radius_m_per_min for node in within_time_nodes} nodes_gdf["left_distance"] = nodes_gdf.index.map(distance) - # буфер переменного радиуса и объединение agglomeration_geom = nodes_gdf.buffer(nodes_gdf["left_distance"]).unary_union return {"geometry": agglomeration_geom, "nodes_in_agglomeration": list(within_time_nodes)} - def _merge_intersecting_agglomerations(self, gdf: gpd.GeoDataFrame, towns: gpd.GeoDataFrame) -> gpd.GeoDataFrame: - # защита для пустого входа + def _merge_intersecting_agglomerations( + self, gdf: gpd.GeoDataFrame, towns: gpd.GeoDataFrame + ) -> gpd.GeoDataFrame: + """ + Merge intersecting proto-agglomerations and compute attributes. + + Parameters + ---------- + gdf : GeoDataFrame + Proto-agglomerations with columns ``name`` and ``geometry``. + towns : GeoDataFrame + Towns layer used to aggregate population. + + Returns + ------- + GeoDataFrame + Columns: ``geometry``, ``type``, ``core_cities``, ``population``, ``agglomeration_level``. + Always has active geometry and CRS. + + Notes + ----- + Intersection is tested geometrically. Geometries are unioned via ``unary_union``. + Invalid geometries are repaired with ``buffer(0)``. + Agglomeration level is derived from total population thresholds. + """ if gdf.empty: return gpd.GeoDataFrame( columns=["geometry", "type", "core_cities", "population", "agglomeration_level"], @@ -107,14 +200,12 @@ def _merge_intersecting_agglomerations(self, gdf: gpd.GeoDataFrame, towns: gpd.G if i in processed_indices: continue - overlapping_agglomerations = [row_i] geometry = row_i["geometry"] merged_names = {row_i["name"]} for j, row_j in gdf.iterrows(): if i != j and j not in processed_indices: if geometry.intersects(row_j["geometry"]): - overlapping_agglomerations.append(row_j) geometry = unary_union([geometry, row_j["geometry"]]) merged_names.add(row_j["name"]) processed_indices.add(j) @@ -123,13 +214,11 @@ def _merge_intersecting_agglomerations(self, gdf: gpd.GeoDataFrame, towns: gpd.G while still_intersecting: still_intersecting = False for j, row_j in gdf.iterrows(): - if j not in processed_indices: - if geometry.intersects(row_j["geometry"]): - overlapping_agglomerations.append(row_j) - geometry = unary_union([geometry, row_j["geometry"]]) - merged_names.add(row_j["name"]) - processed_indices.add(j) - still_intersecting = True + if j not in processed_indices and geometry.intersects(row_j["geometry"]): + geometry = unary_union([geometry, row_j["geometry"]]) + merged_names.add(row_j["name"]) + processed_indices.add(j) + still_intersecting = True if not geometry.is_valid: geometry = geometry.buffer(0) @@ -162,6 +251,19 @@ def _merge_intersecting_agglomerations(self, gdf: gpd.GeoDataFrame, towns: gpd.G return gpd.GeoDataFrame(merged_geometries, geometry="geometry", crs=gdf.crs) def _simplify_multipolygons(self, gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame: + """ + Keep only the largest polygon for MultiPolygon geometries. + + Parameters + ---------- + gdf : GeoDataFrame + Input geometries. + + Returns + ------- + GeoDataFrame + Copy with simplified geometries. Empty input is returned unchanged. + """ if gdf.empty: return gdf gdf = gdf.copy() @@ -173,6 +275,23 @@ def _simplify_multipolygons(self, gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame: def evaluate_city_agglomeration_status( self, towns: gpd.GeoDataFrame, agglomeration_gdf: gpd.GeoDataFrame ) -> gpd.GeoDataFrame: + """ + Label towns by agglomeration membership and level. + + Parameters + ---------- + towns : GeoDataFrame + Towns layer with point geometries and columns ``name`` and ``geometry``. + agglomeration_gdf : GeoDataFrame + Agglomerations with columns ``geometry``, ``core_cities``, ``agglomeration_level``. + + Returns + ------- + GeoDataFrame + ``towns`` copy with two columns added: + - ``agglomeration_status``: {"Вне агломерации","В агломерации","Центр агломерации"} + - ``agglomeration_level``: int in {0..5} + """ agglomeration_status = [] agglomeration_level = [] @@ -208,35 +327,101 @@ def evaluate_city_agglomeration_status( towns["agglomeration_level"] = agglomeration_level return towns - def get_agglomerations(self, update_df: Optional[pd.DataFrame] = None, time: int = 80) -> gpd.GeoDataFrame: - towns = self.region.get_update_towns_gdf(update_df) - - if towns is None or len(towns) < 2: - raise ValueError("Для построения агломерации требуется минимум два города.") - - valid_pop = towns["population"].notnull() & (towns["population"] > 0) - if valid_pop.sum() < 2: - raise ValueError("Требуются данные о населении минимум у двух разных городов.") - - if time < 50: - print("Минимально допустимое значение параметра 'time' — 50 минут. Заменяю на 50.") - time = 50 - - region_boundary = self.region.region - - agglomeration_gdf = self._build_agglomeration(towns, time) - - # 2) Упростить мультиполигоны перед слиянием - agglomeration_gdf = self._simplify_multipolygons(agglomeration_gdf) - - # 3) Слияние пересекающихся и расчет показателей - agglomeration_gdf = self._merge_intersecting_agglomerations(agglomeration_gdf, towns) - - # 4) Обрезка по границе региона - if not agglomeration_gdf.empty: - agglomeration_gdf = gpd.overlay(agglomeration_gdf, region_boundary, how="intersection") - - # 5) Финальная чистка геометрии - agglomeration_gdf = self._simplify_multipolygons(agglomeration_gdf) - - return agglomeration_gdf + def get_agglomerations( + self, + update_df: Optional[pd.DataFrame] = None, + time: int = 80, + raise_on_empty: bool = True, + ) -> gpd.GeoDataFrame: + """ + Build, merge, clip, and finalize urban agglomerations. + + Parameters + ---------- + update_df : DataFrame, optional + If provided, used by ``self.region.get_update_towns_gdf`` to update towns. + time : int, default 80 + Global time budget in minutes. Values < 50 are coerced to 50. + raise_on_empty : bool, default True + If True, raise an error when no eligible towns or no agglomerations are produced. + + Returns + ------- + GeoDataFrame + Final agglomerations after merge, clip, and simplification. + + Raises + ------ + ValueError + If there are fewer than two towns or fewer than two towns with valid population. + RuntimeError + If no towns pass level/population filters or no agglomerations can be built + and ``raise_on_empty`` is True. + RuntimeError + On any unexpected error inside the pipeline, with original exception chained. + + Notes + ----- + The function validates candidates by + ``towns['level'].isin(CITY_LEVELS)`` and + ``towns['population'] >= self.min_population`` before building. + """ + try: + towns = self.region.get_update_towns_gdf(update_df) + if towns is None or len(towns) < 2: + raise ValueError("Для построения агломерации требуется минимум два города.") + + valid_pop = towns["population"].notnull() & (towns["population"] > 0) + if valid_pop.sum() < 2: + raise ValueError("Требуются данные о населении минимум у двух разных городов.") + + if time < 50: + print("Минимально допустимое значение параметра 'time' — 50 минут. Заменяю на 50.") + time = 50 + + # preliminary eligibility check + mask_level = towns["level"].isin(CITY_LEVELS) + mask_pop = towns["population"] >= self.min_population + eligible = towns[mask_level & mask_pop] + if eligible.empty and raise_on_empty: + levels_sample = list(towns["level"].value_counts().head(5).items()) + raise RuntimeError( + "No towns pass level and population criteria: " + f"min_population={self.min_population}, allowed levels={CITY_LEVELS}. " + f"Levels distribution (top5): {levels_sample}" + ) + + region_boundary = self.region.region + + # 1) build proto-agglomerations + agglomeration_gdf = self._build_agglomeration(towns, time) + + # explicit error if nothing produced + if agglomeration_gdf.empty and raise_on_empty: + lvl_ok = towns[mask_level] + pop_ok = towns[mask_pop] + msg_parts = [] + if lvl_ok.empty: + msg_parts.append("no towns with allowed levels (CITY_LEVELS)") + if pop_ok.empty: + msg_parts.append("no towns reaching min_population") + if not msg_parts: + msg_parts.append("accessibility produced no reachable sets at given time") + raise RuntimeError("Agglomerations not built: " + "; ".join(msg_parts)) + + # 2) simplify + agglomeration_gdf = self._simplify_multipolygons(agglomeration_gdf) + + # 3) merge overlapping and compute attributes + agglomeration_gdf = self._merge_intersecting_agglomerations(agglomeration_gdf, towns) + + # 4) clip to region boundary + if not agglomeration_gdf.empty: + agglomeration_gdf = gpd.overlay(agglomeration_gdf, region_boundary, how="intersection") + + # 5) final simplify + agglomeration_gdf = self._simplify_multipolygons(agglomeration_gdf) + return agglomeration_gdf + + except Exception as e: + raise RuntimeError(f"Ошибка при построении агломераций: {e}") from None From 22034fea75bbe176cfcae89da44c9e28bd842000 Mon Sep 17 00:00:00 2001 From: Mvin8 Date: Fri, 26 Sep 2025 11:03:06 +0300 Subject: [PATCH 4/4] add: retrying in pyproject --- pyproject.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index b93f226..563b7a3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,7 +36,8 @@ dependencies = [ "folium>=0.14.0", "dill>=0.3.8", "python-louvain", - "geovoronoi" + "geovoronoi", + "retrying" ]