Skip to content

Commit 4a34a08

Browse files
authored
Merge pull request #631 from CodeForPhilly/630-sanitize-phone-numbers
sanitize numbers
2 parents 02dee13 + b9a6b02 commit 4a34a08

File tree

3 files changed

+43
-22
lines changed

3 files changed

+43
-22
lines changed

src/server/models.py

Lines changed: 7 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
from sqlalchemy.dialects.postgresql import JSONB, insert
2121
from sqlalchemy.ext.declarative import declarative_base
2222
from sqlalchemy.sql.functions import coalesce
23+
from utils import standardize_phone_number
2324

2425
Base = declarative_base()
2526

@@ -92,21 +93,6 @@ def dedup_consecutive(table, unique_id, id, order_by, dedup_on):
9293
return delete(table).where(unique_id == to_delete.c[0])
9394

9495

95-
def normalize_phone_number(number):
96-
result = None
97-
98-
if number and str(number) != "nan":
99-
number = re.sub("[() -.+]", "", str(number))
100-
101-
if number and number[0] == "1":
102-
number = number[1:]
103-
104-
if number.isdigit() and len(number) == 10:
105-
result = number
106-
107-
return result
108-
109-
11096
class PdpContacts(Base):
11197
__tablename__ = "pdp_contacts"
11298
__table_args__ = (
@@ -173,8 +159,10 @@ def insert_from_file_df(cls, df, conn):
173159
df = df[column_translation.keys()]
174160
df = df.rename(columns=column_translation)
175161

176-
df["phone"] = df["phone"].apply(normalize_phone_number)
177-
df["mobile"] = df["mobile"].apply(normalize_phone_number)
162+
phone_numbers = [standardize_phone_number(phone) for phone in df["phone"]]
163+
mobile_numbers = [standardize_phone_number(phone) for phone in df["mobile"]]
164+
df["phone"] = phone_numbers
165+
df["mobile"] = mobile_numbers
178166

179167
dedup_on = [col for col in cls.__table__.columns if col.name in df.columns]
180168
df["created_date"] = datetime.datetime.utcnow()
@@ -237,7 +225,8 @@ def insert_from_df(cls, df, conn):
237225
df = df[column_translation.keys()]
238226
df = df.rename(columns=column_translation)
239227

240-
df["phone"] = df["phone"].apply(normalize_phone_number)
228+
phone_numbers = [standardize_phone_number(phone) for phone in df["phone"]]
229+
df["phone"] = phone_numbers
241230

242231
dedup_on = [col for col in cls.__table__.columns if col.name in df.columns]
243232
df["created_date"] = datetime.datetime.utcnow()

src/server/utils.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
import re
2+
3+
def standardize_phone_number(phone):
4+
"""Standardize phone number format.
5+
6+
Args:
7+
phone (str): The phone number to standardize.
8+
9+
Returns:
10+
str: The standardized phone number.
11+
"""
12+
# Remove all non-numeric characters
13+
phone = re.sub(r'\D', '', phone)
14+
15+
# if the phone number is less than 10 digits, it's invalid
16+
if len(phone) < 10:
17+
return None
18+
19+
# If the phone number is exactly 10 digits, return as is
20+
if len(phone) == 10:
21+
return phone
22+
23+
# if the phone number is greater than 10 digits, take the last 10 digits
24+
if len(phone) > 10:
25+
return f'{phone[-10:]}'
26+
27+
# anything else we ignore
28+
return None

src/server/volgistics_importer.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
1-
import re
21
from flask.globals import current_app
32
from datetime import datetime, timedelta
43
from openpyxl import load_workbook
54
from jellyfish import jaro_similarity
65

76
from config import engine
7+
from utils import standardize_phone_number
88

99
import structlog
1010

@@ -178,6 +178,10 @@ def volgistics_people_import(workbook):
178178
col_email = col['Email']
179179
time_stamp = datetime.utcnow()
180180

181+
home_phone = standardize_phone_number(r[col_home])
182+
work_phone = standardize_phone_number(r[col_work])
183+
cell_phone = standardize_phone_number(r[col_cell])
184+
181185
try:
182186
for r in ws.iter_rows(min_row=2, max_col=42,values_only=True):
183187
insert_list.append(
@@ -194,9 +198,9 @@ def volgistics_people_import(workbook):
194198
"state": r[col_state],
195199
"zip": r[col_zip],
196200
"all_phone_numbers": r[col_all_phones],
197-
"home": r[col_home],
198-
"work": r[col_work],
199-
"cell": r[col_cell],
201+
"home": home_phone,
202+
"work": work_phone,
203+
"cell": cell_phone,
200204
"email": r[col_email],
201205
"created_date" : time_stamp
202206
}

0 commit comments

Comments
 (0)