55import numpy as np
66import pandas as pd
77from housing_data .build_data_utils import impute_2025_population
8- from housing_data .data_loading_helpers import get_path , get_url_text
98
109
11- def _get_places_crosswalk_df (data_path : Optional [Path ] = None ) -> pd .DataFrame :
12- df = pd .read_fwf (
13- get_path (
14- "https://www2.census.gov/geo/tiger/PREVGENZ/pl/us_places.txt" , data_path
15- )
16- )
10+ def _get_places_crosswalk_df (data_path : Path ) -> pd .DataFrame :
11+ df = pd .read_fwf (data_path / "us_places.txt" )
1712
1813 df ["State Code" ] = df ["CENSUS" ] // 10000
1914 df ["Place Code" ] = df ["CENSUS" ] % 10000
@@ -79,7 +74,7 @@ def get_unincorporated_places_populations_1980() -> pd.DataFrame:
7974 return remainder_df
8075
8176
82- def get_place_populations_1980 (data_path : Optional [ Path ] ) -> pd .DataFrame :
77+ def get_place_populations_1980 (data_path : Path ) -> pd .DataFrame :
8378 # Assuming this is run from `python/`
8479 # For the header row, use the nice descriptive names that IPUMS provides rather than the code names
8580 df = pd .read_csv ("../raw_data/nhgis0015_ds104_1980_place_070.csv" , header = 1 )
@@ -147,12 +142,8 @@ def get_place_populations_1980(data_path: Optional[Path]) -> pd.DataFrame:
147142 return df
148143
149144
150- def _load_raw_place_populations_1990s (data_path : Optional [Path ]) -> pd .DataFrame :
151- tables = get_url_text (
152- "https://www2.census.gov/programs-surveys/popest/tables/1990-2000/"
153- "2000-subcounties-evaluation-estimates/sc2000f_us.txt" ,
154- data_path ,
155- ).split ("\f " )
145+ def _load_raw_place_populations_1990s (data_path : Path ) -> pd .DataFrame :
146+ tables = (data_path / "sc2000f_us.txt" ).read_text ().split ("\f " )
156147
157148 common_cols = [
158149 "Block" ,
@@ -278,7 +269,7 @@ def remove_duplicate_cities(df: pd.DataFrame) -> pd.DataFrame:
278269 return df [~ place_state_tuples .isin (dupe_cities )]
279270
280271
281- def get_place_populations_1990s (data_path : Optional [ Path ] ) -> pd .DataFrame :
272+ def get_place_populations_1990s (data_path : Path ) -> pd .DataFrame :
282273 combined_df = _load_raw_place_populations_1990s (data_path )
283274
284275 city_rows = (
@@ -396,14 +387,8 @@ def _melt_df(
396387 )
397388
398389
399- def get_place_populations_2000s (data_path : Optional [Path ]) -> pd .DataFrame :
400- df = pd .read_csv (
401- get_path (
402- "https://www2.census.gov/programs-surveys/popest/datasets/2000-2010/intercensal/cities/sub-est00int.csv" ,
403- data_path ,
404- ),
405- encoding = "latin_1" ,
406- )
390+ def get_place_populations_2000s (data_path : Path ) -> pd .DataFrame :
391+ df = pd .read_csv (data_path / "sub-est00int.csv" , encoding = "latin_1" )
407392 return _melt_df (
408393 df ,
409394 years = list (range (2000 , 2011 )),
@@ -412,26 +397,14 @@ def get_place_populations_2000s(data_path: Optional[Path]) -> pd.DataFrame:
412397 )
413398
414399
415- def get_place_populations_2010s (data_path : Optional [Path ]) -> pd .DataFrame :
416- df = pd .read_csv (
417- get_path (
418- "https://www2.census.gov/programs-surveys/popest/datasets/2010-2020/cities/SUB-EST2020_ALL.csv" ,
419- data_path ,
420- ),
421- encoding = "latin_1" ,
422- )
400+ def get_place_populations_2010s (data_path : Path ) -> pd .DataFrame :
401+ df = pd .read_csv (data_path / "SUB-EST2020_ALL.csv" , encoding = "latin_1" )
423402
424403 return _melt_df (df , years = list (range (2010 , 2021 )))
425404
426405
427- def get_place_populations_2020s (data_path : Optional [Path ]) -> pd .DataFrame :
428- df = pd .read_csv (
429- get_path (
430- "https://www2.census.gov/programs-surveys/popest/datasets/2010-2020/cities/sub-est2024.csv" ,
431- data_path ,
432- ),
433- encoding = "latin_1" ,
434- )
406+ def get_place_populations_2020s (data_path : Path ) -> pd .DataFrame :
407+ df = pd .read_csv (data_path / "sub-est2024.csv" , encoding = "latin_1" )
435408 df = _melt_df (df , years = list (range (2020 , 2025 )))
436409 df = impute_2025_population (df )
437410 return df
@@ -482,7 +455,7 @@ def interpolate_1980s_populations(
482455 return interp_df
483456
484457
485- def get_place_population_estimates (data_path : Optional [ Path ] = None ) -> pd .DataFrame :
458+ def get_place_population_estimates (data_path : Path ) -> pd .DataFrame :
486459 """
487460 Returns a DataFrame with the columns:
488461 - state_code (int)
0 commit comments