Skip to content

Commit 3867a39

Browse files
committed
applying pep8 style on yapf
1 parent 7d68fc4 commit 3867a39

21 files changed

+1658
-1568
lines changed

.flake8

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
[flake8]
2+
max-line-length = 120
23
ignore = E501,E126,W503

.style.yapf

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
[style]
2+
BASED_ON_STYLE = pep8
3+
SPACES_BEFORE_COMMENT = 2
4+
COLUMN_LIMIT = 120

awswrangler/athena.py

Lines changed: 22 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -16,17 +16,18 @@ class Athena:
1616
def __init__(self, session):
1717
self._session = session
1818
self._client_athena = session.boto3_session.client(
19-
service_name="athena", config=session.botocore_config)
19+
service_name="athena", config=session.botocore_config
20+
)
2021

2122
def get_query_columns_metadata(self, query_execution_id):
2223
response = self._client_athena.get_query_results(
23-
QueryExecutionId=query_execution_id, MaxResults=1)
24+
QueryExecutionId=query_execution_id, MaxResults=1
25+
)
2426
col_info = response["ResultSet"]["ResultSetMetadata"]["ColumnInfo"]
2527
return {x["Name"]: x["Type"] for x in col_info}
2628

2729
def get_query_dtype(self, query_execution_id):
28-
cols_metadata = self.get_query_columns_metadata(
29-
query_execution_id=query_execution_id)
30+
cols_metadata = self.get_query_columns_metadata(query_execution_id=query_execution_id)
3031
logger.debug(f"cols_metadata: {cols_metadata}")
3132
dtype = {}
3233
parse_timestamps = []
@@ -53,10 +54,11 @@ def create_athena_bucket(self):
5354
5455
:return: Bucket s3 path (E.g. s3://aws-athena-query-results-ACCOUNT-REGION/)
5556
"""
56-
account_id = (self._session.boto3_session.client(
57-
service_name="sts",
58-
config=self._session.botocore_config).get_caller_identity().get(
59-
"Account"))
57+
account_id = (
58+
self._session.boto3_session.client(
59+
service_name="sts", config=self._session.botocore_config
60+
).get_caller_identity().get("Account")
61+
)
6062
session_region = self._session.boto3_session.region_name
6163
s3_output = f"s3://aws-athena-query-results-{account_id}-{session_region}/"
6264
s3_resource = self._session.boto3_session.resource("s3")
@@ -82,7 +84,8 @@ def run_query(self, query, database, s3_output=None, workgroup=None):
8284
QueryString=query,
8385
QueryExecutionContext={"Database": database},
8486
ResultConfiguration={"OutputLocation": s3_output},
85-
WorkGroup=workgroup)
87+
WorkGroup=workgroup
88+
)
8689
return response["QueryExecutionId"]
8790

8891
def wait_query(self, query_execution_id):
@@ -93,24 +96,20 @@ def wait_query(self, query_execution_id):
9396
:return: Query response
9497
"""
9598
final_states = ["FAILED", "SUCCEEDED", "CANCELLED"]
96-
response = self._client_athena.get_query_execution(
97-
QueryExecutionId=query_execution_id)
99+
response = self._client_athena.get_query_execution(QueryExecutionId=query_execution_id)
98100
state = response["QueryExecution"]["Status"]["State"]
99101
while state not in final_states:
100102
sleep(QUERY_WAIT_POLLING_DELAY)
101-
response = self._client_athena.get_query_execution(
102-
QueryExecutionId=query_execution_id)
103+
response = self._client_athena.get_query_execution(QueryExecutionId=query_execution_id)
103104
state = response["QueryExecution"]["Status"]["State"]
104105
logger.debug(f"state: {state}")
105106
logger.debug(
106107
f"StateChangeReason: {response['QueryExecution']['Status'].get('StateChangeReason')}"
107108
)
108109
if state == "FAILED":
109-
raise QueryFailed(
110-
response["QueryExecution"]["Status"].get("StateChangeReason"))
110+
raise QueryFailed(response["QueryExecution"]["Status"].get("StateChangeReason"))
111111
elif state == "CANCELLED":
112-
raise QueryCancelled(
113-
response["QueryExecution"]["Status"].get("StateChangeReason"))
112+
raise QueryCancelled(response["QueryExecution"]["Status"].get("StateChangeReason"))
114113
return response
115114

116115
def repair_table(self, database, table, s3_output=None, workgroup=None):
@@ -130,17 +129,17 @@ def repair_table(self, database, table, s3_output=None, workgroup=None):
130129
:return: Query execution ID
131130
"""
132131
query = f"MSCK REPAIR TABLE {table};"
133-
query_id = self.run_query(query=query,
134-
database=database,
135-
s3_output=s3_output,
136-
workgroup=workgroup)
132+
query_id = self.run_query(
133+
query=query, database=database, s3_output=s3_output, workgroup=workgroup
134+
)
137135
self.wait_query(query_execution_id=query_id)
138136
return query_id
139137

140138
@staticmethod
141139
def _normalize_name(name):
142-
name = "".join(c for c in unicodedata.normalize("NFD", name)
143-
if unicodedata.category(c) != "Mn")
140+
name = "".join(
141+
c for c in unicodedata.normalize("NFD", name) if unicodedata.category(c) != "Mn"
142+
)
144143
name = name.replace(" ", "_")
145144
name = name.replace("-", "_")
146145
name = name.replace(".", "_")

awswrangler/cloudwatchlogs.py

Lines changed: 25 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -13,14 +13,17 @@ class CloudWatchLogs:
1313
def __init__(self, session):
1414
self._session = session
1515
self._client_logs = session.boto3_session.client(
16-
service_name="logs", config=session.botocore_config)
16+
service_name="logs", config=session.botocore_config
17+
)
1718

18-
def start_query(self,
19-
query,
20-
log_group_names,
21-
start_time=datetime(year=1970, month=1, day=1),
22-
end_time=datetime.utcnow(),
23-
limit=None):
19+
def start_query(
20+
self,
21+
query,
22+
log_group_names,
23+
start_time=datetime(year=1970, month=1, day=1),
24+
end_time=datetime.utcnow(),
25+
limit=None
26+
):
2427
"""
2528
Run a query against AWS CloudWatchLogs Insights and wait the results
2629
https://docs.aws.amazon.com/AmazonCloudWatch/latest/logs/CWL_QuerySyntax.html
@@ -69,12 +72,14 @@ def wait_query(self, query_id):
6972
raise QueryCancelled(f"query ID: {query_id}")
7073
return response
7174

72-
def query(self,
73-
query,
74-
log_group_names,
75-
start_time=datetime(year=1970, month=1, day=1),
76-
end_time=datetime.utcnow(),
77-
limit=None):
75+
def query(
76+
self,
77+
query,
78+
log_group_names,
79+
start_time=datetime(year=1970, month=1, day=1),
80+
end_time=datetime.utcnow(),
81+
limit=None
82+
):
7883
"""
7984
Run a query against AWS CloudWatchLogs Insights and wait the results
8085
https://docs.aws.amazon.com/AmazonCloudWatch/latest/logs/CWL_QuerySyntax.html
@@ -86,10 +91,12 @@ def query(self,
8691
:param limit: The maximum number of log events to return in the query.
8792
:return: Results
8893
"""
89-
query_id = self.start_query(query=query,
90-
log_group_names=log_group_names,
91-
start_time=start_time,
92-
end_time=end_time,
93-
limit=limit)
94+
query_id = self.start_query(
95+
query=query,
96+
log_group_names=log_group_names,
97+
start_time=start_time,
98+
end_time=end_time,
99+
limit=limit
100+
)
94101
response = self.wait_query(query_id=query_id)
95102
return response["results"]

awswrangler/data_types.py

Lines changed: 10 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -163,8 +163,7 @@ def pyarrow2athena(dtype: pa.types) -> str:
163163
elif dtype_str.startswith("list"):
164164
return f"array<{pyarrow2athena(dtype.value_type)}>"
165165
elif dtype_str == "null":
166-
raise UndetectedType(
167-
"We can't infer the data type from an entire null object column")
166+
raise UndetectedType("We can't infer the data type from an entire null object column")
168167
else:
169168
raise UnsupportedType(f"Unsupported Pyarrow type: {dtype}")
170169

@@ -283,8 +282,7 @@ def spark2redshift(dtype: str) -> str:
283282
raise UnsupportedType("Unsupported Spark type: " + dtype)
284283

285284

286-
def convert_schema(func: Callable,
287-
schema: List[Tuple[str, str]]) -> Dict[str, str]:
285+
def convert_schema(func: Callable, schema: List[Tuple[str, str]]) -> Dict[str, str]:
288286
"""
289287
Convert schema in the format of {"col name": "bigint", "col2 name": "int"}
290288
applying some data types conversion function (e.g. spark2redshift)
@@ -296,10 +294,9 @@ def convert_schema(func: Callable,
296294
return {name: func(dtype) for name, dtype in schema}
297295

298296

299-
def extract_pyarrow_schema_from_pandas(dataframe: pd.DataFrame,
300-
preserve_index: bool,
301-
indexes_position: str = "right"
302-
) -> List[Tuple[str, str]]:
297+
def extract_pyarrow_schema_from_pandas(
298+
dataframe: pd.DataFrame, preserve_index: bool, indexes_position: str = "right"
299+
) -> List[Tuple[str, str]]:
303300
"""
304301
Extract the related Pyarrow schema from any Pandas DataFrame
305302
@@ -323,8 +320,7 @@ def extract_pyarrow_schema_from_pandas(dataframe: pd.DataFrame,
323320

324321
# Filling cols_dtypes and indexes
325322
indexes = []
326-
for field in pa.Schema.from_pandas(df=dataframe[cols],
327-
preserve_index=preserve_index):
323+
for field in pa.Schema.from_pandas(df=dataframe[cols], preserve_index=preserve_index):
328324
name = str(field.name)
329325
dtype = field.type
330326
cols_dtypes[name] = dtype
@@ -333,15 +329,11 @@ def extract_pyarrow_schema_from_pandas(dataframe: pd.DataFrame,
333329

334330
# Filling schema
335331
if indexes_position == "right":
336-
schema = [(name, cols_dtypes[name])
337-
for name in dataframe.columns] # adding columns
338-
schema += [(name, cols_dtypes[name])
339-
for name in indexes] # adding indexes
332+
schema = [(name, cols_dtypes[name]) for name in dataframe.columns] # adding columns
333+
schema += [(name, cols_dtypes[name]) for name in indexes] # adding indexes
340334
elif indexes_position == "left":
341-
schema = [(name, cols_dtypes[name])
342-
for name in indexes] # adding indexes
343-
schema += [(name, cols_dtypes[name])
344-
for name in dataframe.columns] # adding columns
335+
schema = [(name, cols_dtypes[name]) for name in indexes] # adding indexes
336+
schema += [(name, cols_dtypes[name]) for name in dataframe.columns] # adding columns
345337
else:
346338
raise ValueError(f"indexes_position must be \"right\" or \"left\"")
347339

0 commit comments

Comments
 (0)