Skip to content

Commit acb9d0d

Browse files
authored
Remove unnecessary get_path helper (#102)
Since #80, we always load the data from a local dir. So this helper is unnecessary.
1 parent ffe65e4 commit acb9d0d

File tree

5 files changed

+19
-70
lines changed

5 files changed

+19
-70
lines changed

python/housing_data/build_places.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -293,7 +293,7 @@ def get_name_spelling(places_df: pd.DataFrame) -> pd.Series:
293293

294294

295295
def load_places(
296-
data_repo_path: Optional[Path], counties_population_df: pd.DataFrame = None
296+
data_repo_path: Path, counties_population_df: pd.DataFrame = None
297297
) -> tuple[pd.DataFrame, pd.DataFrame]:
298298
raw_places_df = pd.concat(
299299
[
@@ -310,7 +310,7 @@ def load_places(
310310
raw_places_df.to_parquet(PUBLIC_DIR / "places_annual_without_population.parquet")
311311

312312
place_populations_df = place_population.get_place_population_estimates(
313-
data_path=data_repo_path / PLACE_POPULATION_DIR if data_repo_path else None
313+
data_path=data_repo_path / PLACE_POPULATION_DIR
314314
)
315315
place_populations_df = fix_nyc_boroughs_population(
316316
place_populations_df, counties_population_df

python/housing_data/county_population.py

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
import pandas as pd
55
import us
66
from housing_data.build_data_utils import impute_2025_population
7-
from housing_data.data_loading_helpers import get_url_text
87
from housing_data.fips_crosswalk import load_fips_crosswalk
98

109

@@ -119,11 +118,7 @@ def get_county_fips_crosswalk(data_repo_path: Path) -> pd.DataFrame:
119118

120119

121120
def get_county_populations_1990s(data_path: Path) -> pd.DataFrame:
122-
table_text = get_url_text(
123-
"https://www2.census.gov/programs-surveys/popest/tables/1990-2000/counties/totals/99c8_00.txt",
124-
data_path,
125-
encoding="latin_1",
126-
)
121+
table_text = (data_path / "99c8_00.txt").read_text(encoding="latin_1")
127122

128123
table_text = table_text[: table_text.index("Block 2")].strip()
129124

python/housing_data/data_loading_helpers.py

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -34,10 +34,3 @@ def get_url_text(
3434
else:
3535
web_url = os.path.join(web_prefix, common_path)
3636
return requests.get(web_url).text
37-
38-
39-
def get_path(url: str, data_path: Optional[Path]) -> str:
40-
if data_path is not None:
41-
return str(Path(data_path, Path(url).name))
42-
else:
43-
return url

python/housing_data/place_population.py

Lines changed: 13 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -5,15 +5,10 @@
55
import numpy as np
66
import pandas as pd
77
from housing_data.build_data_utils import impute_2025_population
8-
from housing_data.data_loading_helpers import get_path, get_url_text
98

109

11-
def _get_places_crosswalk_df(data_path: Optional[Path] = None) -> pd.DataFrame:
12-
df = pd.read_fwf(
13-
get_path(
14-
"https://www2.census.gov/geo/tiger/PREVGENZ/pl/us_places.txt", data_path
15-
)
16-
)
10+
def _get_places_crosswalk_df(data_path: Path) -> pd.DataFrame:
11+
df = pd.read_fwf(data_path / "us_places.txt")
1712

1813
df["State Code"] = df["CENSUS"] // 10000
1914
df["Place Code"] = df["CENSUS"] % 10000
@@ -79,7 +74,7 @@ def get_unincorporated_places_populations_1980() -> pd.DataFrame:
7974
return remainder_df
8075

8176

82-
def get_place_populations_1980(data_path: Optional[Path]) -> pd.DataFrame:
77+
def get_place_populations_1980(data_path: Path) -> pd.DataFrame:
8378
# Assuming this is run from `python/`
8479
# For the header row, use the nice descriptive names that IPUMS provides rather than the code names
8580
df = pd.read_csv("../raw_data/nhgis0015_ds104_1980_place_070.csv", header=1)
@@ -147,12 +142,8 @@ def get_place_populations_1980(data_path: Optional[Path]) -> pd.DataFrame:
147142
return df
148143

149144

150-
def _load_raw_place_populations_1990s(data_path: Optional[Path]) -> pd.DataFrame:
151-
tables = get_url_text(
152-
"https://www2.census.gov/programs-surveys/popest/tables/1990-2000/"
153-
"2000-subcounties-evaluation-estimates/sc2000f_us.txt",
154-
data_path,
155-
).split("\f")
145+
def _load_raw_place_populations_1990s(data_path: Path) -> pd.DataFrame:
146+
tables = (data_path / "sc2000f_us.txt").read_text().split("\f")
156147

157148
common_cols = [
158149
"Block",
@@ -278,7 +269,7 @@ def remove_duplicate_cities(df: pd.DataFrame) -> pd.DataFrame:
278269
return df[~place_state_tuples.isin(dupe_cities)]
279270

280271

281-
def get_place_populations_1990s(data_path: Optional[Path]) -> pd.DataFrame:
272+
def get_place_populations_1990s(data_path: Path) -> pd.DataFrame:
282273
combined_df = _load_raw_place_populations_1990s(data_path)
283274

284275
city_rows = (
@@ -396,14 +387,8 @@ def _melt_df(
396387
)
397388

398389

399-
def get_place_populations_2000s(data_path: Optional[Path]) -> pd.DataFrame:
400-
df = pd.read_csv(
401-
get_path(
402-
"https://www2.census.gov/programs-surveys/popest/datasets/2000-2010/intercensal/cities/sub-est00int.csv",
403-
data_path,
404-
),
405-
encoding="latin_1",
406-
)
390+
def get_place_populations_2000s(data_path: Path) -> pd.DataFrame:
391+
df = pd.read_csv(data_path / "sub-est00int.csv", encoding="latin_1")
407392
return _melt_df(
408393
df,
409394
years=list(range(2000, 2011)),
@@ -412,26 +397,14 @@ def get_place_populations_2000s(data_path: Optional[Path]) -> pd.DataFrame:
412397
)
413398

414399

415-
def get_place_populations_2010s(data_path: Optional[Path]) -> pd.DataFrame:
416-
df = pd.read_csv(
417-
get_path(
418-
"https://www2.census.gov/programs-surveys/popest/datasets/2010-2020/cities/SUB-EST2020_ALL.csv",
419-
data_path,
420-
),
421-
encoding="latin_1",
422-
)
400+
def get_place_populations_2010s(data_path: Path) -> pd.DataFrame:
401+
df = pd.read_csv(data_path / "SUB-EST2020_ALL.csv", encoding="latin_1")
423402

424403
return _melt_df(df, years=list(range(2010, 2021)))
425404

426405

427-
def get_place_populations_2020s(data_path: Optional[Path]) -> pd.DataFrame:
428-
df = pd.read_csv(
429-
get_path(
430-
"https://www2.census.gov/programs-surveys/popest/datasets/2010-2020/cities/sub-est2024.csv",
431-
data_path,
432-
),
433-
encoding="latin_1",
434-
)
406+
def get_place_populations_2020s(data_path: Path) -> pd.DataFrame:
407+
df = pd.read_csv(data_path / "sub-est2024.csv", encoding="latin_1")
435408
df = _melt_df(df, years=list(range(2020, 2025)))
436409
df = impute_2025_population(df)
437410
return df
@@ -482,7 +455,7 @@ def interpolate_1980s_populations(
482455
return interp_df
483456

484457

485-
def get_place_population_estimates(data_path: Optional[Path] = None) -> pd.DataFrame:
458+
def get_place_population_estimates(data_path: Path) -> pd.DataFrame:
486459
"""
487460
Returns a DataFrame with the columns:
488461
- state_code (int)

python/housing_data/state_population.py

Lines changed: 3 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
import pandas as pd
55
import us
66
from housing_data.build_data_utils import impute_2025_population
7-
from housing_data.data_loading_helpers import get_path, get_url_text
87

98
DIVISIONS = {
109
"New England": [
@@ -75,10 +74,7 @@ def _line_to_cols(row: str) -> list[str]:
7574

7675

7776
def get_state_populations_1980s(data_path: Path) -> pd.DataFrame:
78-
states_80s_text = get_url_text(
79-
"https://www2.census.gov/programs-surveys/popest/tables/1980-1990/state/asrh/st8090ts.txt",
80-
data_path,
81-
)
77+
states_80s_text = (data_path / "st8090ts.txt").read_text()
8278
handle = StringIO(states_80s_text)
8379

8480
for _ in range(10):
@@ -168,10 +164,7 @@ def get_state_populations_1990s(data_path: Path) -> pd.DataFrame:
168164

169165
def get_state_populations_2000s(data_path: Path) -> pd.DataFrame:
170166
df = pd.read_excel(
171-
get_path(
172-
"https://www2.census.gov/programs-surveys/popest/tables/2000-2010/intercensal/state/st-est00int-01.xls",
173-
data_path,
174-
),
167+
data_path / "st-est00int-01.xls",
175168
skiprows=3,
176169
skipfooter=8,
177170
)
@@ -206,12 +199,7 @@ def _melt_df(df: pd.DataFrame, years: list[int]) -> pd.DataFrame:
206199

207200

208201
def get_state_populations_2010s(data_path: Path) -> pd.DataFrame:
209-
df = pd.read_csv(
210-
get_path(
211-
"https://www2.census.gov/programs-surveys/popest/datasets/2010-2020/state/totals/nst-est2020-alldata.csv",
212-
data_path,
213-
)
214-
)
202+
df = pd.read_csv(data_path / "nst-est2020-alldata.csv")
215203

216204
return _melt_df(df, list(range(2010, 2020)))
217205

0 commit comments

Comments
 (0)