Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions data/src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
tree_canopy,
unsafe_buildings,
vacant_properties,
recent_activity,
)
from new_etl.database import to_postgis_with_schema

Expand Down Expand Up @@ -76,6 +77,7 @@
tactical_urbanism,
conservatorship,
park_priority,
recent_activity,
]

print("Loading OPA properties dataset.")
Expand Down Expand Up @@ -108,7 +110,11 @@
"total_due",
"num_years_owed",
"permit_count",
"days_since_permit",
"days_since_business_license",
"days_since_appeal",
]

dataset.gdf[numeric_columns] = dataset.gdf[numeric_columns].apply(
pd.to_numeric, errors="coerce"
)
Expand Down
31 changes: 31 additions & 0 deletions data/src/new_etl/constants/services.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,10 @@
"https://services.arcgis.com/fLeGjb7u4uXqeF9q/ArcGIS/rest/services/PPR_Properties/FeatureServer/0"
]

PWD_PARCELS_TO_LOAD = [
"https://services.arcgis.com/fLeGjb7u4uXqeF9q/arcgis/rest/services/PWD_PARCELS/FeatureServer/0"
]

one_year_ago = (datetime.datetime.now() - datetime.timedelta(days=365)).strftime(
"%Y-%m-%d"
)
Expand Down Expand Up @@ -98,3 +102,30 @@
DOR_PARCELS_URL = (
"https://opendata.arcgis.com/datasets/1c57dd1b3ff84449a4b0e3fb29d3cafd_0.geojson"
)

ACTIVITY_QUERIES = {
"latest_permit_date": """
SELECT DISTINCT ON (opa_account_num)
opa_account_num,
permitissuedate AS latest_permit_date
FROM permits
WHERE opa_account_num IS NOT NULL
ORDER BY opa_account_num, permitissuedate DESC
""",
"latest_business_license_date": """
SELECT DISTINCT ON (opa_account_num)
opa_account_num,
mostrecentissuedate AS latest_business_license_date
FROM business_licenses
WHERE opa_account_num IS NOT NULL
ORDER BY opa_account_num, mostrecentissuedate DESC
""",
"latest_appeal_date": """
SELECT DISTINCT ON (opa_account_num)
opa_account_num,
scheduleddate AS latest_appeal_date
FROM appeals
WHERE opa_account_num IS NOT NULL
ORDER BY opa_account_num, scheduleddate DESC
""",
}
30 changes: 16 additions & 14 deletions data/src/new_etl/data_utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,25 +6,26 @@
from .tree_canopy import tree_canopy
from .nbhoods import nbhoods
from .gun_crimes import gun_crimes
from .drug_crimes import drug_crimes # Add missing import
from .drug_crimes import drug_crimes
from .delinquencies import delinquencies
from .opa_properties import opa_properties
from .vacant_properties import vacant_properties
from .priority_level import priority_level
from .access_process import access_process
from .contig_neighbors import contig_neighbors # Add missing import
from .dev_probability import dev_probability # Add missing import
from .negligent_devs import negligent_devs # Add missing import
from .pwd_parcels import pwd_parcels # Add missing import
from .unsafe_buildings import unsafe_buildings # Add missing import
from .imm_dang_buildings import imm_dang_buildings # Add missing import
from .tactical_urbanism import tactical_urbanism # Add missing import
from .conservatorship import conservatorship # Add missing import
from .owner_type import owner_type # Add missing import
from .community_gardens import community_gardens # Add missing import
from .park_priority import park_priority # Add missing import
from .ppr_properties import ppr_properties # Add missing import
from .contig_neighbors import contig_neighbors
from .dev_probability import dev_probability
from .negligent_devs import negligent_devs
from .pwd_parcels import pwd_parcels
from .unsafe_buildings import unsafe_buildings
from .imm_dang_buildings import imm_dang_buildings
from .tactical_urbanism import tactical_urbanism
from .conservatorship import conservatorship
from .owner_type import owner_type
from .community_gardens import community_gardens
from .park_priority import park_priority
from .ppr_properties import ppr_properties
from .council_dists import council_dists
from .recent_activity import recent_activity

__all__ = [
"city_owned_properties",
Expand All @@ -35,7 +36,7 @@
"tree_canopy",
"nbhoods",
"gun_crimes",
"drug_crimes", # Ensure completeness
"drug_crimes",
"delinquencies",
"opa_properties",
"vacant_properties",
Expand All @@ -54,4 +55,5 @@
"park_priority",
"ppr_properties",
"council_dists",
"recent_activity",
]
62 changes: 62 additions & 0 deletions data/src/new_etl/data_utils/recent_activity.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
import pandas as pd
import requests
from datetime import datetime, timezone

from ..classes.featurelayer import FeatureLayer
from ..metadata.metadata_utils import provide_metadata
from ..constants.services import ACTIVITY_QUERIES


def fetch_recent_activity(query: str) -> pd.DataFrame:
response = requests.get("https://phl.carto.com/api/v2/sql", params={"q": query})
Copy link
Contributor

@adamzev adamzev May 10, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We already should have a class that is capable of grabbing a carto query and converting the results to a df. Is there are reason it didn't work for this case?

I'd like to keep with using that class rather than duplicating the functionality.

response.raise_for_status()
data = response.json().get("rows", [])
return pd.DataFrame(data)


@provide_metadata()
def recent_activity(primary_featurelayer: FeatureLayer) -> FeatureLayer:
result_gdf = primary_featurelayer.gdf.copy()

for col_name, query in ACTIVITY_QUERIES.items():
try:
df = fetch_recent_activity(query)
if df.empty:
print("⚠️ No results found")
result_gdf[col_name] = pd.NaT
continue

result_gdf = result_gdf.merge(
df, how="left", left_on="opa_id", right_on="opa_account_num"
)
result_gdf.drop(columns=["opa_account_num"], inplace=True, errors="ignore")
print(f"📊 {result_gdf[col_name].isna().sum()} null values after merge")
except Exception as e:
print(f"❌ Error: {str(e)}")
result_gdf[col_name] = pd.NaT

current_date = datetime.now(timezone.utc)
date_columns = [
"latest_permit_date",
"latest_business_license_date",
"latest_appeal_date",
]

for date_col in date_columns:
activity_type = date_col.replace("latest_", "").replace("_date", "")
days_col = f"days_since_{activity_type}"
has_col = f"has_{activity_type}_record"

if date_col in result_gdf.columns:
result_gdf[has_col] = ~result_gdf[date_col].isna()
if result_gdf[date_col].dtype == "object":
result_gdf[date_col] = pd.to_datetime(
result_gdf[date_col], errors="coerce"
)
result_gdf[days_col] = (current_date - result_gdf[date_col]).dt.days.fillna(
9999
)

primary_featurelayer.gdf = result_gdf

return primary_featurelayer
Loading
Loading