From 35b067c6d60b62debdc60ac789e9c75c7414afef Mon Sep 17 00:00:00 2001 From: Ben Tanzer Date: Tue, 2 Sep 2025 16:17:39 -0700 Subject: [PATCH 1/9] Update AWS parser for HTML emails --- circuit_maintenance_parser/parsers/aws.py | 73 ++++++++++++++++------- 1 file changed, 53 insertions(+), 20 deletions(-) diff --git a/circuit_maintenance_parser/parsers/aws.py b/circuit_maintenance_parser/parsers/aws.py index 8ff24f78..ce0a8e5d 100644 --- a/circuit_maintenance_parser/parsers/aws.py +++ b/circuit_maintenance_parser/parsers/aws.py @@ -62,29 +62,62 @@ def parse_text(self, text): """ data = {"circuits": []} impact = Impact.OUTAGE - maintenace_id = "" + maintenance_id = "" status = Status.CONFIRMED - for line in text.splitlines(): - if "planned maintenance" in line.lower() or "maintenance has been scheduled" in line.lower(): - data["summary"] = line - search = re.search( - r"([A-Z][a-z]{2}, [0-9]{1,2} [A-Z][a-z]{2,9} [0-9]{4} [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{2,3}) to ([A-Z][a-z]{2}, [0-9]{1,2} [A-Z][a-z]{2,9} [0-9]{4} [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{2,3})", - line, - ) - if search: - data["start"] = self.dt2ts(parser.parse(search.group(1))) - data["end"] = self.dt2ts(parser.parse(search.group(2))) - maintenace_id += str(data["start"]) - maintenace_id += str(data["end"]) - if "may become unavailable" in line.lower(): - impact = Impact.OUTAGE - elif "has been cancelled" in line.lower(): - status = Status.CANCELLED - elif re.match(r"[a-z]{5}-[a-z0-9]{8}", line): - maintenace_id += line + if re.search(r'', text, re.IGNORECASE): + soup = bs4.BeautifulSoup(text, 'html.parser') + clean_string = soup.get_text() + clean_string = re.sub('=20', '', clean_string) + clean_list = clean_string.splitlines() + cleaner_list = [] + for line in clean_list: + newline = line.strip() + if newline != "": + cleaner_list.append(newline) + sumstart = cleaner_list.index('Hello,') + try: + sumend = cleaner_list.index('[1] https://aws.amazon.com/support') + except ValueError: + sumend = len(cleaner_list) + summary = "" + for line in cleaner_list[sumstart:sumend]: + summary+=f"{line}\n" + if "may become unavailable" in summary.lower(): + impact = Impact.OUTAGE + elif "has been cancelled" in summary.lower(): + status = Status.CANCELLED + start_time = cleaner_list[cleaner_list.index('Start time')+1] + end_time = cleaner_list[cleaner_list.index('End time')+1] + data["start"] = self.dt2ts(parser.parse(start_time)) + data["end"] = self.dt2ts(parser.parse(end_time)) + data["summary"] = summary + for line in summary.splitlines(): + line = line.strip() + maintenance_id += line data["circuits"].append(CircuitImpact(circuit_id=line, impact=impact)) + maintenance_id += str(data["start"]) + maintenance_id += str(data["end"]) + else: + for line in text.splitlines(): + if "planned maintenance" in line.lower() or "maintenance has been scheduled" in line.lower(): + data["summary"] = line + search = re.search( + r"([A-Z][a-z]{2}, [0-9]{1,2} [A-Z][a-z]{2,9} [0-9]{4} [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{2,3}) to ([A-Z][a-z]{2}, [0-9]{1,2} [A-Z][a-z]{2,9} [0-9]{4} [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{2,3})", + line, + ) + if search: + data["start"] = self.dt2ts(parser.parse(search.group(1))) + data["end"] = self.dt2ts(parser.parse(search.group(2))) + maintenance_id += str(data["start"]) + maintenance_id += str(data["end"]) + if "may become unavailable" in line.lower(): + impact = Impact.OUTAGE + elif "has been cancelled" in line.lower(): + status = Status.CANCELLED + maintenance_id += line + data["circuits"].append(CircuitImpact(circuit_id=line, impact=impact)) # No maintenance ID found in emails, so a hash value is being generated using the start, # end and IDs of all circuits in the notification. - data["maintenance_id"] = hashlib.sha256(maintenace_id.encode("utf-8")).hexdigest() # nosec + data["maintenance_id"] = hashlib.sha256(maintenance_id.encode("utf-8")).hexdigest() # nosec data["status"] = status return [data] From fd8dfea2793c142d586b3de67d6927e4a1724d8a Mon Sep 17 00:00:00 2001 From: Ben Tanzer Date: Wed, 3 Sep 2025 10:32:08 -0700 Subject: [PATCH 2/9] reformat with ruff --- circuit_maintenance_parser/parsers/aws.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/circuit_maintenance_parser/parsers/aws.py b/circuit_maintenance_parser/parsers/aws.py index ce0a8e5d..4ce02aed 100644 --- a/circuit_maintenance_parser/parsers/aws.py +++ b/circuit_maintenance_parser/parsers/aws.py @@ -64,30 +64,30 @@ def parse_text(self, text): impact = Impact.OUTAGE maintenance_id = "" status = Status.CONFIRMED - if re.search(r'', text, re.IGNORECASE): - soup = bs4.BeautifulSoup(text, 'html.parser') + if re.search(r"", text, re.IGNORECASE): + soup = bs4.BeautifulSoup(text, "html.parser") clean_string = soup.get_text() - clean_string = re.sub('=20', '', clean_string) + clean_string = re.sub("=20", "", clean_string) clean_list = clean_string.splitlines() cleaner_list = [] for line in clean_list: newline = line.strip() if newline != "": cleaner_list.append(newline) - sumstart = cleaner_list.index('Hello,') + sumstart = cleaner_list.index("Hello,") try: - sumend = cleaner_list.index('[1] https://aws.amazon.com/support') + sumend = cleaner_list.index("[1] https://aws.amazon.com/support") except ValueError: sumend = len(cleaner_list) summary = "" for line in cleaner_list[sumstart:sumend]: - summary+=f"{line}\n" + summary += f"{line}\n" if "may become unavailable" in summary.lower(): impact = Impact.OUTAGE elif "has been cancelled" in summary.lower(): status = Status.CANCELLED - start_time = cleaner_list[cleaner_list.index('Start time')+1] - end_time = cleaner_list[cleaner_list.index('End time')+1] + start_time = cleaner_list[cleaner_list.index("Start time") + 1] + end_time = cleaner_list[cleaner_list.index("End time") + 1] data["start"] = self.dt2ts(parser.parse(start_time)) data["end"] = self.dt2ts(parser.parse(end_time)) data["summary"] = summary From b13b8f32757ffbce8cd764b914e8af921311c6cc Mon Sep 17 00:00:00 2001 From: Ben Tanzer Date: Thu, 4 Sep 2025 12:17:03 -0700 Subject: [PATCH 3/9] Updating and breaking method into smaller ones --- circuit_maintenance_parser/parsers/aws.py | 1306 ++++++++++++++++++++- 1 file changed, 1249 insertions(+), 57 deletions(-) diff --git a/circuit_maintenance_parser/parsers/aws.py b/circuit_maintenance_parser/parsers/aws.py index 4ce02aed..72c6c468 100644 --- a/circuit_maintenance_parser/parsers/aws.py +++ b/circuit_maintenance_parser/parsers/aws.py @@ -8,7 +8,13 @@ import bs4 # type: ignore from dateutil import parser -from circuit_maintenance_parser.parser import CircuitImpact, EmailSubjectParser, Impact, Status, Text +from circuit_maintenance_parser.parser import ( + CircuitImpact, + EmailSubjectParser, + Impact, + Status, + Text, +) # pylint: disable=too-many-nested-blocks, too-many-branches @@ -39,8 +45,8 @@ def get_text_hook(raw): soup = bs4.BeautifulSoup(quopri.decodestring(raw), features="lxml") return soup.text - def parse_text(self, text): - """Parse text. + def parse_plaintext(self, text, data): + r"""Parse text. Example: Hello, @@ -59,65 +65,1251 @@ def parse_text(self, text): This maintenance is scheduled to avoid disrupting redundant connections at = the same time. + ALTERNATE: + Planned maintenance has been scheduled on an AWS Direct Connect endpoint in= + EdgeConnex, Hillsboro, OR. During this maintenance window, your AWS Direct= + Connect services associated with this event may become unavailable.\n\nThi= + s maintenance is scheduled to avoid disrupting redundant connections at the= + same time.\n\nIf you encounter any problems with your connection after the= + end of this maintenance window, please contact AWS Support(1).\n\n(1) http= + s://aws.amazon.com/support. For more details, please see https://phd.aws.am= + azon.com/phd/home?region=3Dus-west-2#/dashboard/open-issues + + Region: us-west-2 + Account Id: 11111111111 + + Affected Resources: + dxvif-fffg1111 + dxcon-fh700000 + dxlag-fh847853 + dxvif-fg000000 + dxvif-f0000000 + dxvif-ffx17y56 + + Start Time: Wed, 3 Sep 2025 09:00:00 GMT + End Time: Wed, 3 Sep 2025 13:00:00 GMT + """ + impact = Impact.OUTAGE + for line in text.splitlines(): + if ( + "planned maintenance" in line.lower() + or "maintenance has been scheduled" in line.lower() + ): + data["summary"] = line + search = re.search( + r"([A-Z][a-z]{2}, [0-9]{1,2} [A-Z][a-z]{2,9} [0-9]{4} [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{2,3}) to ([A-Z][a-z]{2}, [0-9]{1,2} [A-Z][a-z]{2,9} [0-9]{4} [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{2,3})", + line, + ) + if search: + data["start"] = self.dt2ts(parser.parse(search.group(1))) + data["end"] = self.dt2ts(parser.parse(search.group(2))) + starttimesearch = re.search( + r"^Start Time:\s*([A-Za-z]{3}, \d{1,2} [A-Za-z]{3} \d{4} \d{2}:\d{2}:\d{2} GMT)$", + line, + ) + endtimesearch = re.search( + r"^End Time:\s*([A-Za-z]{3}, \d{1,2} [A-Za-z]{3} \d{4} \d{2}:\d{2}:\d{2} GMT)$", + line, + ) + if starttimesearch: + data["start"] = self.dt2ts(parser.parse(starttimesearch.group(1))) + if endtimesearch: + data["end"] = self.dt2ts(parser.parse(endtimesearch.group(1))) + if "has been cancelled" in line.lower(): + data["status"] = Status.CANCELLED + if re.match(r"[a-z]{5}-[a-z0-9]{8}", line): + data["circuits"].append(CircuitImpact(circuit_id=line, impact=impact)) + return data + + def parse_html(self, text, data): + """Parses AWS HTML notifications. + + Wrapper method to deal with both html and plaintext emails. + + Args: + text (str): email text. + data (dict): the dictionary structure started in wrapper method. + + Returns: + data (dict): dictionary structure with maintenance details + Example: + + + + + =20 + + + + + + + + + + =20 + + + + + + =20 + =20 + + + =20 + =20 + + + =20 + + + =20 + =20 +
+ =20 + =20 + + =20 + =20 +
+ =20 + + + + + + +
+ + =20 + + + =20 + =20 +
+ =20 + + + + + + +
+ + =20 +
+ + =20 +
+ =20 + + + =20 + + + + =20 + +
+ =20 +

AWS Health= + Event

+ =20 +
+ =20 +
+ =20 + + =20 +
+ =20 + + + =20 + + + + =20 + +
+ =20 + + + + + + +
+ =20 + + =20 +
+ =20 +
+ =20 +
+ =20 + +
+ =20 + +
+ =20 +
+ =20 + =20 + + + + =20 + =20 +
+ =20 + + + + + + +
+ + =20 +
+ =20 + + + =20 + + + + =20 + + + + =20 + + + + =20 + + + + =20 + +
+ =20 + + =20 +
+ =20 +
AWS Direct Connect Planned Maintenance Notification [AWS Account: 11= + 1111111111]
+ =20 +
+ =20 + + + + + + +
+ + View details in service console + +
+ =20 +
+ =20 +
+ Hello, +
+
Planned maintenance has been scheduled on an AWS Direct Connect endpo= + int in Sonda Quilicura, Santiago from Tue, 16 Sep 2025 01:00:00 GMT to Tue,= + 16 Sep 2025 05:00:00 GMT for 4 hours. During this maintenance window, your= + AWS Direct Connect services listed below may become unavailable. +
+
dxcon-abc12345 +
dxvif-1234hfjd +
dxlag-fge1111 +
dxcon-ffucore +
dxcon-fg885ug +
+
This maintenance is scheduled to avoid disrupting redundant connectio= + ns at the same time. +
+
If you encounter any problems with your connection after the end of t= + his maintenance window, please contact AWS Support[1]. +
+
[1] https://aws.amazon.com/support +
+
. +
+ =20 +
+ =20 +
+ =20 + +
+ =20 +
+ =20 + =20 + + + + =20 + =20 +
+ =20 + + + + + + +
+ + =20 +
+ =20 + + + =20 + + + + =20 + + + + =20 + +
+ =20 +

+

+ =20 + + =20 + =20 +
+ =20 +
Message metadata
+ =20 +
+ =20 +
+ =20 + +
+ =20 +
+ =20 + =20 + + =20 + + =20 + =20 +
+ =20 + + + + + + +
+ + + + =20 +
+ =20 + + + =20 + + + + =20 + + + + =20 + + + + =20 + + + + =20 + +
+ =20 +

Affected account

+

111111111111

+ =20 +
+ =20 +

Event type code

+

AWS_DIRECTCONNECT_MAINTENANCE_SCHEDULED

+ =20 +
+ =20 +

Event region

+

sa-east-1

+ =20 +
+ =20 +

End time

+

Tue, 16 Sep 2025 05:00:00 GMT

+ =20 +
+ =20 +
+ =20 + + + + =20 +
+ =20 + + + =20 + + + + =20 + + + + =20 + + + + =20 + +
+ =20 +

Service

+

DIRECTCONNECT

+ =20 +
+ =20 +

Event type category

+

scheduledChange

+ =20 +
+ =20 +

Start time

+

Tue, 16 Sep 2025 01:00:00 GMT

+ =20 +
+ =20 +
+ =20 + +
+ =20 +
+ =20 + =20 + +
+ =20 +
+ =20 + =20 + + =20 + =20 + =20 + + =20 + =20 +
+ =20 + + + + + + +
+ + =20 + =20 +
+ =20 + + + + + + +
+ + =20 +
+ =20 + + + =20 + + + + =20 + + + + =20 + +
+ =20 +
AWS managed notification details
+ =20 +
+ =20 +
AWS Health: Operations events notifications are generated by AWS and= + sent to selected account contacts. You can add additional delivery channel= + s for these notifications using AWS managed notifica= + tions subscriptions.
+ =20 +
+ =20 +
+ =20 + +
+ =20 +
+ =20 + =20 + + =20 + =20 +
+ =20 + + + + + + +
+ + + + =20 +
+ =20 + + + =20 + + + + =20 + +
+ =20 +

Event type

+

AWS Health Event

+ =20 +
+ =20 +
+ =20 + + + + =20 +
+ =20 + + + =20 + + + + =20 + +
+ =20 +

Category

+

Operations

+ =20 +
+ =20 +
+ =20 + +
+ =20 +
+ =20 + =20 + + =20 + =20 +
+ =20 + + + + + + +
+ + =20 +
+ =20 + + + =20 + + + + =20 + + + + =20 + + + + =20 + +
+ =20 +

+

+ =20 + + =20 + =20 +
+ =20 +
Thank you,
+ =20 +
+ =20 +
Amazon Web Services
+ =20 +
+ =20 +
+ =20 + +
+ =20 +
+ =20 + =20 + + + + =20 + =20 +
+ =20 + + + + + + +
+ + =20 +
+ =20 + + + =20 + + + + =20 + +
+ =20 +
You are receiving this email because NOTICYEMAIL@COMPANYNAEME.com is sub= + scribed to AWS Health: Operations events notifications. If you do not wis= + h to receive emails for these AWS managed notifications, you may unsubscribe. If you believe you've received this email by = + error or are experiencing issues managing email subscription, please contact us. +

+ Amazon Web Services, Inc. is a subsidiary of Amazon.com, Inc. AMA= + ZON WEB SERVICES AWS, and related logos are trademarks of Amazon Web Servic= + es, Inc. or its affiliates. +

+ This message was produced and distributed by Amazon Web Services,= + Inc. or its affiliates 410 Terry Ave. North, Seattle, WA = + 98109. +

+ =C2=A9 2025, Amazon Web Services, Inc. or its af= + filiates. All rights reserved. Read our Privacy Notice. + =20 +
+ =20 +
+ =20 + +
+ =20 +
+ =20 + =20 + +
+ =20 +
+ =20 + =20 + + =20 + =20 +
+ =20 + 3D"" + + + =20 """ - data = {"circuits": []} impact = Impact.OUTAGE + soup = bs4.BeautifulSoup(text, "html.parser") + clean_string = soup.get_text() + clean_string = re.sub("=20", "", clean_string) + clean_list = clean_string.splitlines() + cleaner_list = [] + for line in clean_list: + newline = line.strip() + if newline != "": + cleaner_list.append(newline) + sumstart = cleaner_list.index("Hello,") + try: + sumend = cleaner_list.index("[1] https://aws.amazon.com/support") + except ValueError: + sumend = len(cleaner_list) + summary = "" + for line in cleaner_list[sumstart:sumend]: + summary += f"{line}\n" + if "has been cancelled" in summary.lower(): + data["status"] = Status.CANCELLED + start_time = cleaner_list[cleaner_list.index("Start time") + 1] + end_time = cleaner_list[cleaner_list.index("End time") + 1] + data["start"] = self.dt2ts(parser.parse(start_time)) + data["end"] = self.dt2ts(parser.parse(end_time)) + data["summary"] = summary + for line in summary.splitlines(): + line = line.strip() + if re.match(r"[a-z]{5}-[a-z0-9]{8}", line): + data["circuits"].append(CircuitImpact(circuit_id=line, impact=impact)) + return data + + def parse_text(self, text): + """Parses AWS notifications. + + Wrapper method to deal with both html and plaintext emails. + + Args: + text (str): email text. + + Returns: + data (dict): dictionary structure with maintenance details + """ + data = { + "circuits": [], + "status": Status.CONFIRMED, + } maintenance_id = "" - status = Status.CONFIRMED if re.search(r"", text, re.IGNORECASE): - soup = bs4.BeautifulSoup(text, "html.parser") - clean_string = soup.get_text() - clean_string = re.sub("=20", "", clean_string) - clean_list = clean_string.splitlines() - cleaner_list = [] - for line in clean_list: - newline = line.strip() - if newline != "": - cleaner_list.append(newline) - sumstart = cleaner_list.index("Hello,") - try: - sumend = cleaner_list.index("[1] https://aws.amazon.com/support") - except ValueError: - sumend = len(cleaner_list) - summary = "" - for line in cleaner_list[sumstart:sumend]: - summary += f"{line}\n" - if "may become unavailable" in summary.lower(): - impact = Impact.OUTAGE - elif "has been cancelled" in summary.lower(): - status = Status.CANCELLED - start_time = cleaner_list[cleaner_list.index("Start time") + 1] - end_time = cleaner_list[cleaner_list.index("End time") + 1] - data["start"] = self.dt2ts(parser.parse(start_time)) - data["end"] = self.dt2ts(parser.parse(end_time)) - data["summary"] = summary - for line in summary.splitlines(): - line = line.strip() - maintenance_id += line - data["circuits"].append(CircuitImpact(circuit_id=line, impact=impact)) - maintenance_id += str(data["start"]) - maintenance_id += str(data["end"]) + data = self.parse_html(text, data) else: - for line in text.splitlines(): - if "planned maintenance" in line.lower() or "maintenance has been scheduled" in line.lower(): - data["summary"] = line - search = re.search( - r"([A-Z][a-z]{2}, [0-9]{1,2} [A-Z][a-z]{2,9} [0-9]{4} [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{2,3}) to ([A-Z][a-z]{2}, [0-9]{1,2} [A-Z][a-z]{2,9} [0-9]{4} [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{2,3})", - line, - ) - if search: - data["start"] = self.dt2ts(parser.parse(search.group(1))) - data["end"] = self.dt2ts(parser.parse(search.group(2))) - maintenance_id += str(data["start"]) - maintenance_id += str(data["end"]) - if "may become unavailable" in line.lower(): - impact = Impact.OUTAGE - elif "has been cancelled" in line.lower(): - status = Status.CANCELLED - maintenance_id += line - data["circuits"].append(CircuitImpact(circuit_id=line, impact=impact)) + data = self.parse_plaintext(text, data) # No maintenance ID found in emails, so a hash value is being generated using the start, # end and IDs of all circuits in the notification. - data["maintenance_id"] = hashlib.sha256(maintenance_id.encode("utf-8")).hexdigest() # nosec - data["status"] = status + for circuit in data["circuits"]: + maintenance_id += circuit.circuit_id + maintenance_id += str(data["start"]) + maintenance_id += str(data["end"]) + data["maintenance_id"] = hashlib.sha256( + maintenance_id.encode("utf-8") + ).hexdigest() # nosec return [data] From c60250bbed0ec4323554cbc7ed6f04e07db47493 Mon Sep 17 00:00:00 2001 From: Ben Tanzer Date: Thu, 4 Sep 2025 12:17:42 -0700 Subject: [PATCH 4/9] ruff formatting change --- circuit_maintenance_parser/parsers/aws.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/circuit_maintenance_parser/parsers/aws.py b/circuit_maintenance_parser/parsers/aws.py index 72c6c468..3ea29362 100644 --- a/circuit_maintenance_parser/parsers/aws.py +++ b/circuit_maintenance_parser/parsers/aws.py @@ -91,10 +91,7 @@ def parse_plaintext(self, text, data): """ impact = Impact.OUTAGE for line in text.splitlines(): - if ( - "planned maintenance" in line.lower() - or "maintenance has been scheduled" in line.lower() - ): + if "planned maintenance" in line.lower() or "maintenance has been scheduled" in line.lower(): data["summary"] = line search = re.search( r"([A-Z][a-z]{2}, [0-9]{1,2} [A-Z][a-z]{2,9} [0-9]{4} [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{2,3}) to ([A-Z][a-z]{2}, [0-9]{1,2} [A-Z][a-z]{2,9} [0-9]{4} [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{2,3})", @@ -1309,7 +1306,5 @@ def parse_text(self, text): maintenance_id += circuit.circuit_id maintenance_id += str(data["start"]) maintenance_id += str(data["end"]) - data["maintenance_id"] = hashlib.sha256( - maintenance_id.encode("utf-8") - ).hexdigest() # nosec + data["maintenance_id"] = hashlib.sha256(maintenance_id.encode("utf-8")).hexdigest() # nosec return [data] From 81af9d394d6325efd9d78b6c2fc9e7d1aa44f5c9 Mon Sep 17 00:00:00 2001 From: Ben Tanzer Date: Fri, 12 Sep 2025 11:15:41 -0700 Subject: [PATCH 5/9] Introducing tests --- circuit_maintenance_parser/parsers/aws.py | 17 +- tests/unit/data/aws/aws1_result.json | 2 +- .../data/aws/aws1_text_parser_result.json | 2 +- tests/unit/data/aws/aws2_result.json | 2 +- .../data/aws/aws2_text_parser_result.json | 2 +- tests/unit/data/aws/aws3_result.json | 2 +- .../data/aws/aws3_text_parser_result.json | 2 +- tests/unit/data/aws/aws4_html.eml | 1307 +++++++++++++++++ .../data/aws/aws4_html_parser_result.json | 31 + tests/unit/data/aws/aws4_html_result.json | 0 tests/unit/test_e2e.py | 9 + tests/unit/test_parsers.py | 5 + 12 files changed, 1369 insertions(+), 12 deletions(-) create mode 100644 tests/unit/data/aws/aws4_html.eml create mode 100644 tests/unit/data/aws/aws4_html_parser_result.json create mode 100644 tests/unit/data/aws/aws4_html_result.json diff --git a/circuit_maintenance_parser/parsers/aws.py b/circuit_maintenance_parser/parsers/aws.py index 3ea29362..27abd2b5 100644 --- a/circuit_maintenance_parser/parsers/aws.py +++ b/circuit_maintenance_parser/parsers/aws.py @@ -91,7 +91,10 @@ def parse_plaintext(self, text, data): """ impact = Impact.OUTAGE for line in text.splitlines(): - if "planned maintenance" in line.lower() or "maintenance has been scheduled" in line.lower(): + if ( + "planned maintenance" in line.lower() + or "maintenance has been scheduled" in line.lower() + ): data["summary"] = line search = re.search( r"([A-Z][a-z]{2}, [0-9]{1,2} [A-Z][a-z]{2,9} [0-9]{4} [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{2,3}) to ([A-Z][a-z]{2}, [0-9]{1,2} [A-Z][a-z]{2,9} [0-9]{4} [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{2,3})", @@ -1253,6 +1256,7 @@ def parse_html(self, text, data): soup = bs4.BeautifulSoup(text, "html.parser") clean_string = soup.get_text() clean_string = re.sub("=20", "", clean_string) + clean_string = re.sub("=", "", clean_string) clean_list = clean_string.splitlines() cleaner_list = [] for line in clean_list: @@ -1264,9 +1268,8 @@ def parse_html(self, text, data): sumend = cleaner_list.index("[1] https://aws.amazon.com/support") except ValueError: sumend = len(cleaner_list) - summary = "" - for line in cleaner_list[sumstart:sumend]: - summary += f"{line}\n" + newline = " " + summary = newline.join(cleaner_list[sumstart:sumend]) if "has been cancelled" in summary.lower(): data["status"] = Status.CANCELLED start_time = cleaner_list[cleaner_list.index("Start time") + 1] @@ -1274,7 +1277,7 @@ def parse_html(self, text, data): data["start"] = self.dt2ts(parser.parse(start_time)) data["end"] = self.dt2ts(parser.parse(end_time)) data["summary"] = summary - for line in summary.splitlines(): + for line in cleaner_list[sumstart:sumend]: line = line.strip() if re.match(r"[a-z]{5}-[a-z0-9]{8}", line): data["circuits"].append(CircuitImpact(circuit_id=line, impact=impact)) @@ -1306,5 +1309,7 @@ def parse_text(self, text): maintenance_id += circuit.circuit_id maintenance_id += str(data["start"]) maintenance_id += str(data["end"]) - data["maintenance_id"] = hashlib.sha256(maintenance_id.encode("utf-8")).hexdigest() # nosec + data["maintenance_id"] = hashlib.sha256( + maintenance_id.encode("utf-8") + ).hexdigest() # nosec return [data] diff --git a/tests/unit/data/aws/aws1_result.json b/tests/unit/data/aws/aws1_result.json index 5f0ab788..e11511db 100644 --- a/tests/unit/data/aws/aws1_result.json +++ b/tests/unit/data/aws/aws1_result.json @@ -28,7 +28,7 @@ } ], "end": 1621519200, - "maintenance_id": "a81ca7da4301432e1542f67d34414c71e7ccb22d233a6d1b0a70a722b386f368", + "maintenance_id": "b57f17ce354c1e0c8563f9cc0a75ac0c81f6672ffa6606a31a13ff4de2b383ae", "organizer": "aws-account-notifications@amazon.com", "provider": "aws", "sequence": 1, diff --git a/tests/unit/data/aws/aws1_text_parser_result.json b/tests/unit/data/aws/aws1_text_parser_result.json index a7ef74f2..1818a8a6 100644 --- a/tests/unit/data/aws/aws1_text_parser_result.json +++ b/tests/unit/data/aws/aws1_text_parser_result.json @@ -27,7 +27,7 @@ } ], "end": 1621519200, - "maintenance_id": "a81ca7da4301432e1542f67d34414c71e7ccb22d233a6d1b0a70a722b386f368", + "maintenance_id": "b57f17ce354c1e0c8563f9cc0a75ac0c81f6672ffa6606a31a13ff4de2b383ae", "start": 1621497600, "status": "CONFIRMED", "summary": "Planned maintenance has been scheduled on an AWS Direct Connect router in A Block, New York, NY from Thu, 20 May 2021 08:00:00 GMT to Thu, 20 May 2021 14:00:00 GMT for 6 hours. During this maintenance window, your AWS Direct Connect services listed below may become unavailable." diff --git a/tests/unit/data/aws/aws2_result.json b/tests/unit/data/aws/aws2_result.json index c798b050..fec63ae1 100644 --- a/tests/unit/data/aws/aws2_result.json +++ b/tests/unit/data/aws/aws2_result.json @@ -28,7 +28,7 @@ } ], "end": 1631584920, - "maintenance_id": "6a01cfd83c1249d5f9a9c97de5232501794f37a8c15d433d09c029229f25659d", + "maintenance_id": "db4caacefd12cfa63678029e330bff2dd7a964b98f730b04ee099d76751f0f3d", "organizer": "aws-account-notifications@amazon.com", "provider": "aws", "sequence": 1, diff --git a/tests/unit/data/aws/aws2_text_parser_result.json b/tests/unit/data/aws/aws2_text_parser_result.json index 695a1e05..2e211362 100644 --- a/tests/unit/data/aws/aws2_text_parser_result.json +++ b/tests/unit/data/aws/aws2_text_parser_result.json @@ -27,7 +27,7 @@ } ], "end": 1631584920, - "maintenance_id": "6a01cfd83c1249d5f9a9c97de5232501794f37a8c15d433d09c029229f25659d", + "maintenance_id": "db4caacefd12cfa63678029e330bff2dd7a964b98f730b04ee099d76751f0f3d", "start": 1631559720, "status": "CANCELLED", "summary": "We would like to inform you that the planned maintenance that was scheduled for AWS Direct Connect endpoint in Equinix SG2, Singapore, SGP from Mon, 13 Sep 2021 19:02:00 GMT to Tue, 14 Sep 2021 02:02:00 GMT has been cancelled. Please find below your AWS Direct Connect services that would have been affected by this planned maintenance." diff --git a/tests/unit/data/aws/aws3_result.json b/tests/unit/data/aws/aws3_result.json index 8578011c..7dca673c 100644 --- a/tests/unit/data/aws/aws3_result.json +++ b/tests/unit/data/aws/aws3_result.json @@ -16,7 +16,7 @@ } ], "end": 1703055600, - "maintenance_id": "065ad66cb4e401827f675a56799be7ceb9cd66fc180ee5878c54533f7f196b52", + "maintenance_id": "387c99e743b96cf135b8e97d56c2dbbc02a0acda4eaf9637bcfeb23a7ebc96e2", "organizer": "aws-account-notifications@amazon.com", "provider": "aws", "sequence": 1, diff --git a/tests/unit/data/aws/aws3_text_parser_result.json b/tests/unit/data/aws/aws3_text_parser_result.json index b3637eb5..ef15fb13 100644 --- a/tests/unit/data/aws/aws3_text_parser_result.json +++ b/tests/unit/data/aws/aws3_text_parser_result.json @@ -15,7 +15,7 @@ } ], "end": 1703055600, - "maintenance_id": "065ad66cb4e401827f675a56799be7ceb9cd66fc180ee5878c54533f7f196b52", + "maintenance_id": "387c99e743b96cf135b8e97d56c2dbbc02a0acda4eaf9637bcfeb23a7ebc96e2", "start": 1703041200, "status": "CONFIRMED", "summary": "Emergency maintenance has been scheduled on an AWS Direct Connect endpoint in Datacenter Foo3, Anywhere, USA from Wed, 20 Dec 2023 03:00:00 GMT to Wed, 20 Dec 2023 07:00:00 GMT for 4 hours. This maintenance will cause a disruption to the following Direct Connect connections you own:" diff --git a/tests/unit/data/aws/aws4_html.eml b/tests/unit/data/aws/aws4_html.eml new file mode 100644 index 00000000..6c111ba3 --- /dev/null +++ b/tests/unit/data/aws/aws4_html.eml @@ -0,0 +1,1307 @@ +Delivered-To: myname@COMPANYNAEME.com +Received: by 2002:a05:6402:381b:b0:61c:6287:7445 with SMTP id es27csp1474854edb; + Mon, 1 Sep 2025 17:13:18 -0700 (PDT) +X-Forwarded-Encrypted: i=4; AJvYcCVBOolu7HaJFXTiqz1J48lVTgXDgopItvcQsOG2NB5wEqWouzUM2GStoNexTDpd40N/BZHLhuXr@COMPANYNAEME.com +X-Received: by 2002:a05:6214:478e:b0:714:36b2:a268 with SMTP id 6a1803df08f44-71436b2ab2bmr94495816d6.50.1756771998475; + Mon, 01 Sep 2025 17:13:18 -0700 (PDT) +ARC-Seal: i=3; a=rsa-sha256; t=1756771998; cv=pass; + d=google.com; s=arc-20240605; + b=cPktlDuAiZspHLxRKm2ek/0olYkdB9rb6Q6NLNs16eyf+/aqSw4yyHzYNlh1fakYJY + eRdaYLJJVgfcA39qk1+uJBahPstDabysoWPJEYckCj9fy66c7epvsZ0tD87pIiQFR6ZY + 4YzCz67UaKIQlyKhttx8HZCEE3o2UL5EcmyV/Lf9BN+qSdneFaEg7UQ3odO3aUuuqWS8 + q5276sShGB+lkMZGW2/3jPSHCrcWD+hn8I/4LywE23GuroyyZl64AtZ20uRvg8xtZtLk + CgHPdbruml/BUGNG3vhJNK+V/6EC7oZvpKv0n86jFbd2DawEbx68m5q3q2cSTazH3BMu + 5t7Q== +ARC-Message-Signature: i=3; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=arc-20240605; + h=list-unsubscribe:list-archive:list-help:list-post:list-id + :mailing-list:precedence:reply-to:feedback-id:message-id + :content-transfer-encoding:mime-version:subject:to:from:date + :dkim-signature; + bh=5fB5zaNSSsmYxdy6RGsUUC3cK6VOosKb5grQz+R6QnM=; + fh=ACW0q7uUsk6zSaemq5vmZ2K0/v+Ovs8/oM2clrq7jUg=; + b=lZIvr2XCiKiFFwfi2GjvuF0PqxUNgjtVWz6vhsvt48978zFVT6UhIq/SIMnwQmZJKg + Dw6rJgpZ89nNbLvWYQz7rkjfDhsfMW5VWQXLHJCtnC8od30uxy7n4e93zde9MFCRdSyI + AfMWG8kmonKiN7gRAoyZRnXarZBsayemJFrw9Oh6vakoyv3K0E6E76BGeFmlUHEQmXeu + kiged6FF13GnHvzXormmxvQSiuDNXdqrCi3YNt0nCTW/5ks/Fn6rSQgg/8RFO3P1OscL + KxTcnhkGwoJaFFWKs+5SBpvpvvFd4wZKNFy335J+6pjzh3nOonBY9PQtNLWsIhrNrJF5 + x1uQ==; + dara=google.com +ARC-Authentication-Results: i=3; mx.google.com; + dkim=pass header.i=@COMPANYNAEME.com header.s=COMPANYNAEME header.b=U7QrCQWA; + arc=pass (i=2 spf=pass spfdomain=user-notifications.email.aws.com dkim=pass dkdomain=aws.com dkim=pass dkdomain=amazonses.com dmarc=pass fromdomain=aws.com); + spf=pass (google.com: domain of TEAMEMAIL+bncbcqibrfyzuhrbhpn3dcqmgqewcpexxy@COMPANYNAEME.com designates 209.85.220.69 as permitted sender) smtp.mailfrom=TEAMEMAIL+bncBCQIBRFYZUHRBHPN3DCQMGQEWCPEXXY@COMPANYNAEME.com; + dmarc=pass (p=QUARANTINE sp=QUARANTINE dis=NONE) header.from=COMPANYNAEME.com; + dara=pass header.i=@COMPANYNAEME.com +Return-Path: +Received: from mail-sor-f69.google.com (mail-sor-f69.google.com. [209.85.220.69]) + by mx.google.com with SMTPS id 6a1803df08f44-720d0b71610sor2315776d6.5.2025.09.01.17.13.18 + for + (Google Transport Security); + Mon, 01 Sep 2025 17:13:18 -0700 (PDT) +Received-SPF: pass (google.com: domain of TEAMEMAIL+bncbcqibrfyzuhrbhpn3dcqmgqewcpexxy@COMPANYNAEME.com designates 209.85.220.69 as permitted sender) client-ip=209.85.220.69; +Authentication-Results: mx.google.com; + dkim=pass header.i=@COMPANYNAEME.com header.s=COMPANYNAEME header.b=U7QrCQWA; + arc=pass (i=2 spf=pass spfdomain=user-notifications.email.aws.com dkim=pass dkdomain=aws.com dkim=pass dkdomain=amazonses.com dmarc=pass fromdomain=aws.com); + spf=pass (google.com: domain of TEAMEMAIL+bncbcqibrfyzuhrbhpn3dcqmgqewcpexxy@COMPANYNAEME.com designates 209.85.220.69 as permitted sender) smtp.mailfrom=TEAMEMAIL+bncBCQIBRFYZUHRBHPN3DCQMGQEWCPEXXY@COMPANYNAEME.com; + dmarc=pass (p=QUARANTINE sp=QUARANTINE dis=NONE) header.from=COMPANYNAEME.com; + dara=pass header.i=@COMPANYNAEME.com +ARC-Seal: i=2; a=rsa-sha256; t=1756771998; cv=pass; + d=google.com; s=arc-20240605; + b=cDKoJ/tE2k6YHOAEUDlTHWeL8oCgeuXKh4rvA318C26ci46kwGv26XThi1vbXH2pOH + RlDIRDW8amHKoBudi3rLmqGj96OH3QAa5LJQ2cwuoOh5LBfJLiiOFOlDssXcwa2fD3ei + bIowzRKxE4HVxH3e/hdGvRkTnkP7Gxeo0dMYjLhGKU0+Nj7d/ItRff+G+LDa1NIn0Y91 + IPwNyuf2JGf7uAVQ7Hx6UqHlLHFjFxjUgMxhzh+Th6hOkV2SMUh67RJCpu1tkcwHbFsM + GAw6257NVJ+41RRWgCdYztBpavYafXeIfFIJajqD6c5wWQxtp2NovviTyq++99JZf0zg + 0YWA== +ARC-Message-Signature: i=2; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=arc-20240605; + h=list-unsubscribe:list-archive:list-help:list-post:list-id + :mailing-list:precedence:reply-to:feedback-id:message-id + :content-transfer-encoding:mime-version:subject:to:from:date + :dkim-signature; + bh=5fB5zaNSSsmYxdy6RGsUUC3cK6VOosKb5grQz+R6QnM=; + fh=ACW0q7uUsk6zSaemq5vmZ2K0/v+Ovs8/oM2clrq7jUg=; + b=dwWXm0KP1cKr3QBB2Xb47LbYAYnzAJoCUxOVxmlnc0yyqUaLpVqnngYh/VN9lPStMN + TY2MInn773OUAfoy2x2g7hw29R0puqfWCu+rd/nDUc+03qWbBtCu27wnRRurK6neoyKO + bxIDcVANk/eivcrIqttooDAFjxNswGzdzW2HmaZWVZKiyU+Bu3dmZROc3ONDC+ffkcgA + he+qfk9prDHIfsecFCcfevdWkPv6wf3nL6YWnF8ibWpOvVvf/rsqqdFna6tZfHaXCJF3 + Nq4fOQ0iVsRoxQFDgP9j5onlv2KP8dZfU9KTcJ2A5kzBuWMTt6ZcXVbpiw8z4gfXNdru + xm7Q==; + darn=COMPANYNAEME.com +ARC-Authentication-Results: i=2; mx.google.com; + dkim=pass header.i=@aws.com header.s=3qt4u7sb66lrl6cbnrt63xxwvmgzws7o header.b=LbK6bRHB; + dkim=pass header.i=@amazonses.com header.s=6gbrjpgwjskckoa6a5zn6fwqkn67xbtw header.b=QOxhL6Ey; + spf=pass (google.com: domain of 0100019907c54f20-82a2d9c4-36e4-4098-9721-f6725fd3359e-000000@user-notifications.email.aws.com designates 54.240.9.75 as permitted sender) smtp.mailfrom=0100019907c54f20-82a2d9c4-36e4-4098-9721-f6725fd3359e-000000@user-notifications.email.aws.com; + dmarc=pass (p=QUARANTINE sp=QUARANTINE dis=NONE) header.from=aws.com +DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; + d=COMPANYNAEME.com; s=COMPANYNAEME; t=1756771998; x=1757376798; darn=COMPANYNAEME.com; + h=list-unsubscribe:list-archive:list-help:list-post:list-id + :mailing-list:precedence:reply-to:x-original-authentication-results + :x-original-sender:feedback-id:message-id:content-transfer-encoding + :mime-version:subject:to:from:date:from:to:cc:subject:date + :message-id:reply-to; + bh=5fB5zaNSSsmYxdy6RGsUUC3cK6VOosKb5grQz+R6QnM=; + b=U7QrCQWAkaiUXtXZHHbShEPbYMRLJpOtawy6A0ypd7R/i4dzduYwNRK9+FJpECsZbr + M+oqGmedX18wH/+Hhz2SPV6E5rYMllDXMBE1LrIcGzO3Zt+xhKCUjRmWasW6tGHx0K54 + 1TpEXU4OmP9SOl1T2ShuxQN+tp/qkJXQ3a9M0= +X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; + d=1e100.net; s=20230601; t=1756771998; x=1757376798; + h=x-spam-checked-in-group:list-unsubscribe:list-archive:list-help + :list-post:x-spam-checked-in-group:list-id:mailing-list:precedence + :reply-to:x-original-authentication-results:x-original-sender + :feedback-id:message-id:content-transfer-encoding:mime-version + :subject:to:from:date:x-beenthere:x-beenthere:x-gm-message-state + :from:to:cc:subject:date:message-id:reply-to; + bh=5fB5zaNSSsmYxdy6RGsUUC3cK6VOosKb5grQz+R6QnM=; + b=uNDaTBfJ+dLX4EjR0d1zGqtKcY5FP9WbctuOGIggJ9BBoQEi02NpuMVYA1AMZpkuQ9 + ovyGydEjnyHyG2Ki3DJAD01ZfTtStIPvry3GrZ3dgorAfNmH6SH2cOlucxL4S3RXhNAk + /EodtQCWUAI1rJ5wNSgUL7FbqIhgEU7Yw30mOxpS9LxCLDN6eZMVYirXG12JYJhjuZub + r4Kfe97op7N6PR29Cc/0G4tuEcYmz6ii06RuVL241+rgYi/Piu46zPUYgx5M8WueqlCU + ffZgw3GSYIfZ6+M510HwjLbQAx7GxKP8CNfmNy7nF7GiwHAjJdHHWATDDHYjOoXPOwN9 + R80g== +X-Forwarded-Encrypted: i=2; AJvYcCXLyP+hH/ZwXLxI3BcU3H6KcqJZXCDiTqtb+hnCRfNYaf4x/Tc3gowAsk/CxkmgZSmTzNf5Pine@COMPANYNAEME.com +X-Gm-Message-State: AOJu0Ywoc8aQ4YWO+kn2e4ExY73hrgafOrp3bulPEbmOagzXQ8Xr+N73 + rYTz0p9qAPV1XzH9A9NaPjovOOVBaK2ygWPZwF8PfVqMNvyHy+n8tFThSZOgyBCtrifn4A== +X-Google-Smtp-Source: AGHT+IGJ829/4N4yioCblBW1awLp69wCnWB8lTZTKu3h721wL+PlFYLeTzr6FseENO6TIvBBK8064g== +X-Received: by 2002:a05:6214:230e:b0:70e:b49e:6a8f with SMTP id 6a1803df08f44-70fac8abb74mr123301966d6.45.1756771997654; + Mon, 01 Sep 2025 17:13:17 -0700 (PDT) +X-BeenThere: TEAMEMAIL@COMPANYNAEME.com; h=AZMbMZfEvZ3FaiiEceFCh+MoExR9vriSDUIlzoBgfR8tGu7/Xw== +Received: by 2002:a05:6214:301b:b0:70d:e7ba:ea21 with SMTP id + 6a1803df08f44-70df04b1e47ls76294416d6.1.-pod-prod-09-us; Mon, 01 Sep 2025 + 17:13:16 -0700 (PDT) +X-Forwarded-Encrypted: i=2; AJvYcCUSVDy3sVMA/cdfqKOPePF/Rehwvd8xgP8EHSnlX8NgiE5fyWWZuBDURS2SDJfLhUob4Lkvo30=@COMPANYNAEME.com +X-Received: by 2002:a05:6214:400c:b0:70d:ec97:c4d0 with SMTP id 6a1803df08f44-70fac870228mr110591326d6.41.1756771996823; + Mon, 01 Sep 2025 17:13:16 -0700 (PDT) +X-BeenThere: NOTICYEMAIL@COMPANYNAEME.com; h=AZMbMZf30FJt2RAG+QTsSjkRa6O7S+XYvsIdtXD4IvmfaKE4Sw== +Received: by 2002:a05:6214:258a:b0:707:4335:5f7 with SMTP id + 6a1803df08f44-70df00b19f5ls45007866d6.0.-pod-prod-09-us; Mon, 01 Sep 2025 + 17:13:16 -0700 (PDT) +X-Received: by 2002:ad4:5c64:0:b0:70d:f0ff:342d with SMTP id 6a1803df08f44-70fac6f6f89mr120542266d6.14.1756771996007; + Mon, 01 Sep 2025 17:13:16 -0700 (PDT) +ARC-Seal: i=1; a=rsa-sha256; t=1756771995; cv=none; + d=google.com; s=arc-20240605; + b=BiLU5KCzF0UI1ZbSDSDzPKcVqUe0s9+kXw5WT4HV/dEw7C8OneAWAG2WfFV5cbZrzK + u5/vIED6A2MbxetjxOHje8uGE6rHVGXYsY6Dek82+CGkoKzZ/Yt1asBI/pgdv00kzK37 + MzKkZMoNV0ZyGQXFL1+xtphIg6z/GbwUm2qda9/e7t3OXOmeB/VJQchl6oefVGkvRyzn + eF4Qc6brZDW+GufUcupAwwVj2nleoHeR3akYlJg0pLsa+2Emv2gX4piwoeB2s+OLOd8y + sSZc11hS7k+aiXX8qM9kHi6Z1dcR566YAkrwPjgQy3MDs/xWDDADuVZZ7Ovi733cKxUr + HnYQ== +ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=arc-20240605; + h=feedback-id:message-id:content-transfer-encoding:mime-version + :subject:to:from:date:dkim-signature:dkim-signature; + bh=5fB5zaNSSsmYxdy6RGsUUC3cK6VOosKb5grQz+R6QnM=; + fh=/BZp/CoWS56RYHJk/Nq7+rYFbsXKZMYT25P0V/4q4R8=; + b=a3P3BQ1VySb3LriRmN4i+PgpmUDJaz72+xPPC++9PfUKDkEWVOJIuJPd0hQIUs6JzG + 5xnJAL+W0fL3KNeI0KSD1D5HdZxZpLrBor+C8PSyWyYXSixxVjt/M3RHGg44tfuLFDs8 + lgLK2HujqboMvMw6K+fjnkVriMbbdFkzpl0a8zM6HwKg2t0nlixIfpYSWWNjr5QtofKD + cSYb3drYkDurPlmUpcGX0kFiraKmtfjXXhWBzuhgsEP9DcgaExflqHl1epVGZDHaSwEo + BJyN6VLHtffBP/rvgYuQRByK/iZAZnW/jj70UvM9JzjeS7XTBJ7XxbNYII4vC57qSqKK + fWEg==; + dara=google.com +ARC-Authentication-Results: i=1; mx.google.com; + dkim=pass header.i=@aws.com header.s=3qt4u7sb66lrl6cbnrt63xxwvmgzws7o header.b=LbK6bRHB; + dkim=pass header.i=@amazonses.com header.s=6gbrjpgwjskckoa6a5zn6fwqkn67xbtw header.b=QOxhL6Ey; + spf=pass (google.com: domain of 0100019907c54f20-82a2d9c4-36e4-4098-9721-f6725fd3359e-000000@user-notifications.email.aws.com designates 54.240.9.75 as permitted sender) smtp.mailfrom=0100019907c54f20-82a2d9c4-36e4-4098-9721-f6725fd3359e-000000@user-notifications.email.aws.com; + dmarc=pass (p=QUARANTINE sp=QUARANTINE dis=NONE) header.from=aws.com +Received: from a9-75.smtp-out.amazonses.com (a9-75.smtp-out.amazonses.com. [54.240.9.75]) + by mx.google.com with ESMTPS id 6a1803df08f44-720b500a9eesi1481246d6.471.2025.09.01.17.13.15 + for + (version=TLS1_3 cipher=TLS_AES_128_GCM_SHA256 bits=128/128); + Mon, 01 Sep 2025 17:13:15 -0700 (PDT) +Received-SPF: pass (google.com: domain of 0100019907c54f20-82a2d9c4-36e4-4098-9721-f6725fd3359e-000000@user-notifications.email.aws.com designates 54.240.9.75 as permitted sender) client-ip=54.240.9.75; +Date: Tue, 2 Sep 2025 00:13:15 +0000 +From: "'Amazon Web Services' via TEAM NAME Notices" +To: NOTICYEMAIL@COMPANYNAEME.com +Subject: [NRE] [NOTICYEMAIL] AWS Direct Connect Planned Maintenance + Notification [AWS Account: 111111111111] +MIME-Version: 1.0 +Content-Type: text/html; charset="UTF-8" +Content-Transfer-Encoding: quoted-printable +X-NOTIFICATIONS-NOTIFICATION-EVENT-ARN: arn:aws:notifications::111111111111:managed-notification-configuration/category/AWS-Health/sub-category/Operations/event/a01k43vr7zdhqc4pfztnf2f6ar2 +X-NOTIFICATIONS-EMAIL-CONTACT-ID: arn:aws:notifications-contacts::111111111111:emailcontact/a01k3m4t2hrnt7zwa7r390frs00 +X-NOTIFICATIONS-CLIENT-REQUEST-ID: arn:aws:silverwire:us-east-1:silvermine:AWS_DIRECTCONNECT_MAINTENANCE_SCHEDULED/AWS_DIRECTCONNECT_MAINTENANCE_SCHEDULED_84BDD6BC9D0B29720DC58CBCFB104FBB/f21310c2736a8d55e44b66eb05a95b687299f5096ddcb41a3924bdaad0db550f/UNO-43ce03cc902a71e3c4bc8c35bf83e9c4 +Message-ID: <0100019907c54f20-82a2d9c4-36e4-4098-9721-f6725fd3359e-000000@email.amazonses.com> +Feedback-ID: ::1.us-east-1.YXthQgddzDZw994cvMHZS9b4qW/Udf7tCUgnW4fvLGk=:AmazonSES +X-SES-Outgoing: 2025.09.02-54.240.9.75 +X-Original-Sender: health@aws.com +X-Original-Authentication-Results: mx.google.com; dkim=pass + header.i=@aws.com header.s=3qt4u7sb66lrl6cbnrt63xxwvmgzws7o + header.b=LbK6bRHB; dkim=pass header.i=@amazonses.com + header.s=6gbrjpgwjskckoa6a5zn6fwqkn67xbtw header.b=QOxhL6Ey; spf=pass + (google.com: domain of 0100019907c54f20-82a2d9c4-36e4-4098-9721-f6725fd3359e-000000@user-notifications.email.aws.com + designates 54.240.9.75 as permitted sender) smtp.mailfrom=0100019907c54f20-82a2d9c4-36e4-4098-9721-f6725fd3359e-000000@user-notifications.email.aws.com; + dmarc=pass (p=QUARANTINE sp=QUARANTINE dis=NONE) header.from=aws.com +X-Original-From: Amazon Web Services +Reply-To: Amazon Web Services +Precedence: list +Mailing-list: list TEAMEMAIL@COMPANYNAEME.com; contact TEAMEMAIL+owners@COMPANYNAEME.com +List-ID: +X-Spam-Checked-In-Group: NOTICYEMAIL@COMPANYNAEME.com +X-Google-Group-Id: 936915414963 +List-Post: , +List-Help: , + +List-Archive: +List-Unsubscribe: , + +X-Spam-Checked-In-Group: TEAMEMAIL@COMPANYNAEME.com + + + + + + =20 + + + + + + + + + + =20 + + + + + + =20 + =20 + + + =20 + =20 + + + =20 + + + =20 + =20 +
+ =20 + =20 + + =20 + =20 +
+ =20 + + + + + + +
+ + =20 + + + =20 + =20 +
+ =20 + + + + + + +
+ + =20 +
+ + =20 +
+ =20 + + + =20 + + + + =20 + +
+ =20 +

AWS Health= + Event

+ =20 +
+ =20 +
+ =20 + + =20 +
+ =20 + + + =20 + + + + =20 + +
+ =20 + + + + + + +
+ =20 + + =20 +
+ =20 +
+ =20 +
+ =20 + +
+ =20 + +
+ =20 +
+ =20 + =20 + + + + =20 + =20 +
+ =20 + + + + + + +
+ + =20 +
+ =20 + + + =20 + + + + =20 + + + + =20 + + + + =20 + + + + =20 + +
+ =20 + + =20 +
+ =20 +
AWS Direct Connect Planned Maintenance Notification [AWS Account: 11= +1111111111]
+ =20 +
+ =20 + + + + + + +
+ + View details in service console + +
+ =20 +
+ =20 +
+ Hello, +
+
Planned maintenance has been scheduled on an AWS Direct Connect endpo= +int in Sonda Quilicura, Santiago from Tue, 16 Sep 2025 01:00:00 GMT to Tue,= + 16 Sep 2025 05:00:00 GMT for 4 hours. During this maintenance window, your= + AWS Direct Connect services listed below may become unavailable. +
+
dxcon-abc12345 +
dxvif-1234hfjd +
dxlag-fge11110 +
dxcon-ffucoreh +
dxcon-fg885ug5 +
+
This maintenance is scheduled to avoid disrupting redundant connectio= +ns at the same time. +
+
If you encounter any problems with your connection after the end of t= +his maintenance window, please contact AWS Support[1]. +
+
[1] https://aws.amazon.com/support +
+
. +
+ =20 +
+ =20 +
+ =20 + +
+ =20 +
+ =20 + =20 + + + + =20 + =20 +
+ =20 + + + + + + +
+ + =20 +
+ =20 + + + =20 + + + + =20 + + + + =20 + +
+ =20 +

+

+ =20 + + =20 + =20 +
+ =20 +
Message metadata
+ =20 +
+ =20 +
+ =20 + +
+ =20 +
+ =20 + =20 + + =20 + + =20 + =20 +
+ =20 + + + + + + +
+ + + + =20 +
+ =20 + + + =20 + + + + =20 + + + + =20 + + + + =20 + + + + =20 + +
+ =20 +

Affected account

+

111111111111

+ =20 +
+ =20 +

Event type code

+

AWS_DIRECTCONNECT_MAINTENANCE_SCHEDULED

+ =20 +
+ =20 +

Event region

+

sa-east-1

+ =20 +
+ =20 +

End time

+

Tue, 16 Sep 2025 05:00:00 GMT

+ =20 +
+ =20 +
+ =20 + + + + =20 +
+ =20 + + + =20 + + + + =20 + + + + =20 + + + + =20 + +
+ =20 +

Service

+

DIRECTCONNECT

+ =20 +
+ =20 +

Event type category

+

scheduledChange

+ =20 +
+ =20 +

Start time

+

Tue, 16 Sep 2025 01:00:00 GMT

+ =20 +
+ =20 +
+ =20 + +
+ =20 +
+ =20 + =20 + +
+ =20 +
+ =20 + =20 + + =20 + =20 + =20 + + =20 + =20 +
+ =20 + + + + + + +
+ + =20 + =20 +
+ =20 + + + + + + +
+ + =20 +
+ =20 + + + =20 + + + + =20 + + + + =20 + +
+ =20 +
AWS managed notification details
+ =20 +
+ =20 +
AWS Health: Operations events notifications are generated by AWS and= + sent to selected account contacts. You can add additional delivery channel= +s for these notifications using AWS managed notifica= +tions subscriptions.
+ =20 +
+ =20 +
+ =20 + +
+ =20 +
+ =20 + =20 + + =20 + =20 +
+ =20 + + + + + + +
+ + + + =20 +
+ =20 + + + =20 + + + + =20 + +
+ =20 +

Event type

+

AWS Health Event

+ =20 +
+ =20 +
+ =20 + + + + =20 +
+ =20 + + + =20 + + + + =20 + +
+ =20 +

Category

+

Operations

+ =20 +
+ =20 +
+ =20 + +
+ =20 +
+ =20 + =20 + + =20 + =20 +
+ =20 + + + + + + +
+ + =20 +
+ =20 + + + =20 + + + + =20 + + + + =20 + + + + =20 + +
+ =20 +

+

+ =20 + + =20 + =20 +
+ =20 +
Thank you,
+ =20 +
+ =20 +
Amazon Web Services
+ =20 +
+ =20 +
+ =20 + +
+ =20 +
+ =20 + =20 + + + + =20 + =20 +
+ =20 + + + + + + +
+ + =20 +
+ =20 + + + =20 + + + + =20 + +
+ =20 +
You are receiving this email because NOTICYEMAIL@COMPANYNAEME.com is sub= +scribed to AWS Health: Operations events notifications. If you do not wis= +h to receive emails for these AWS managed notifications, you may unsubscribe. If you believe you've received this email by = +error or are experiencing issues managing email subscription, please contact us. +

+ Amazon Web Services, Inc. is a subsidiary of Amazon.com, Inc. AMA= +ZON WEB SERVICES AWS, and related logos are trademarks of Amazon Web Servic= +es, Inc. or its affiliates. +

+ This message was produced and distributed by Amazon Web Services,= + Inc. or its affiliates 410 Terry Ave. North, Seattle, WA = +98109. +

+ =C2=A9 2025, Amazon Web Services, Inc. or its af= +filiates. All rights reserved. Read our Privacy Notice. + =20 +
+ =20 +
+ =20 + +
+ =20 +
+ =20 + =20 + +
+ =20 +
+ =20 + =20 + + =20 + =20 +
+ =20 + 3D"" + + + =20 + + diff --git a/tests/unit/data/aws/aws4_html_parser_result.json b/tests/unit/data/aws/aws4_html_parser_result.json new file mode 100644 index 00000000..5c60e35f --- /dev/null +++ b/tests/unit/data/aws/aws4_html_parser_result.json @@ -0,0 +1,31 @@ + [ + { + "circuits": [ + { + "circuit_id": "dxcon-abc12345", + "impact": "OUTAGE" + }, + { + "circuit_id": "dxvif-1234hfjd", + "impact": "OUTAGE" + }, + { + "circuit_id": "dxlag-fge11110", + "impact": "OUTAGE" + }, + { + "circuit_id": "dxcon-ffucoreh", + "impact": "OUTAGE" + }, + { + "circuit_id": "dxcon-fg885ug5", + "impact": "OUTAGE" + } + ], + "end": 1757998800, + "maintenance_id": "369a433f3071c0837b379bc170d410e35a7a733d61c2efaf51bd95a3a4073d71", + "start": 1757984400, + "status": "CONFIRMED", + "summary": "Planned maintenance has been scheduled on an AWS Direct Connect endpoint in Sonda Quilicura, Santiago from Tue, 16 Sep 2025 01:00:00 GMT to Tue, 16 Sep 2025 05:00:00 GMT for 4 hours. During this maintenance window, your AWS Direct Connect services listed below may become unavailable." + } + ] \ No newline at end of file diff --git a/tests/unit/data/aws/aws4_html_result.json b/tests/unit/data/aws/aws4_html_result.json new file mode 100644 index 00000000..e69de29b diff --git a/tests/unit/test_e2e.py b/tests/unit/test_e2e.py index a61ca933..a5dd9068 100644 --- a/tests/unit/test_e2e.py +++ b/tests/unit/test_e2e.py @@ -159,6 +159,15 @@ Path(dir_path, "data", "aws", "aws3_result.json"), ], ), + ( + AWS, + [ + ("email", Path(dir_path, "data", "aws", "aws4_html.eml")), + ], + [ + Path(dir_path, "data", "aws", "aws4_html_result.json"), + ], + ), # BSO ( BSO, diff --git a/tests/unit/test_parsers.py b/tests/unit/test_parsers.py index 524f281e..7a70c0fd 100644 --- a/tests/unit/test_parsers.py +++ b/tests/unit/test_parsers.py @@ -170,6 +170,11 @@ def default(self, o): Path(dir_path, "data", "aws", "aws3.eml"), Path(dir_path, "data", "aws", "aws3_text_parser_result.json"), ), + ( + TextParserAWS1, + Path(dir_path, "data", "aws", "aws4_html.eml"), + Path(dir_path, "data", "aws", "aws4_html_parser_result.json"), + ), # BSO ( HtmlParserBSO1, From 9bc27e47b5c373d1484dc7c86ef4745d7b22916c Mon Sep 17 00:00:00 2001 From: Ben Tanzer Date: Mon, 15 Sep 2025 11:10:32 -0700 Subject: [PATCH 6/9] move html to own parser --- .../aws/aws4_html_result.json => aws-test.py | 0 circuit_maintenance_parser/parsers/aws.py | 65 ++++++++----------- circuit_maintenance_parser/provider.py | 3 +- .../unit/data/aws/{aws4_html.eml => aws4.eml} | 0 tests/unit/data/aws/aws4_result.json | 35 ++++++++++ tests/unit/test_e2e.py | 4 +- tests/unit/test_parsers.py | 2 +- 7 files changed, 68 insertions(+), 41 deletions(-) rename tests/unit/data/aws/aws4_html_result.json => aws-test.py (100%) rename tests/unit/data/aws/{aws4_html.eml => aws4.eml} (100%) create mode 100644 tests/unit/data/aws/aws4_result.json diff --git a/tests/unit/data/aws/aws4_html_result.json b/aws-test.py similarity index 100% rename from tests/unit/data/aws/aws4_html_result.json rename to aws-test.py diff --git a/circuit_maintenance_parser/parsers/aws.py b/circuit_maintenance_parser/parsers/aws.py index 27abd2b5..a7b8eece 100644 --- a/circuit_maintenance_parser/parsers/aws.py +++ b/circuit_maintenance_parser/parsers/aws.py @@ -8,13 +8,7 @@ import bs4 # type: ignore from dateutil import parser -from circuit_maintenance_parser.parser import ( - CircuitImpact, - EmailSubjectParser, - Impact, - Status, - Text, -) +from circuit_maintenance_parser.parser import CircuitImpact, EmailSubjectParser, Html, Impact, Status, Text # pylint: disable=too-many-nested-blocks, too-many-branches @@ -45,7 +39,7 @@ def get_text_hook(raw): soup = bs4.BeautifulSoup(quopri.decodestring(raw), features="lxml") return soup.text - def parse_plaintext(self, text, data): + def parse_text(self, text): r"""Parse text. Example: @@ -89,6 +83,11 @@ def parse_plaintext(self, text, data): Start Time: Wed, 3 Sep 2025 09:00:00 GMT End Time: Wed, 3 Sep 2025 13:00:00 GMT """ + data = { + "circuits": [], + "status": Status.CONFIRMED, + } + maintenance_id = "" impact = Impact.OUTAGE for line in text.splitlines(): if ( @@ -119,16 +118,27 @@ def parse_plaintext(self, text, data): data["status"] = Status.CANCELLED if re.match(r"[a-z]{5}-[a-z0-9]{8}", line): data["circuits"].append(CircuitImpact(circuit_id=line, impact=impact)) - return data + # No maintenance ID found in emails, so a hash value is being generated using the start, + # end and IDs of all circuits in the notification. + for circuit in data["circuits"]: + maintenance_id += circuit.circuit_id + maintenance_id += str(data["start"]) + maintenance_id += str(data["end"]) + data["maintenance_id"] = hashlib.sha256( + maintenance_id.encode("utf-8") + ).hexdigest() # nosec + return [data] + +class HtmlParserAWS1(Html): + """Notifications Parser for AWS HTML Emails.""" - def parse_html(self, text, data): + def parse_html(self, soup): """Parses AWS HTML notifications. Wrapper method to deal with both html and plaintext emails. Args: - text (str): email text. - data (dict): the dictionary structure started in wrapper method. + soup (str): email text. Returns: data (dict): dictionary structure with maintenance details @@ -1252,8 +1262,13 @@ def parse_html(self, text, data): =20 """ + data = { + "circuits": [], + "status": Status.CONFIRMED, + } + maintenance_id = "" impact = Impact.OUTAGE - soup = bs4.BeautifulSoup(text, "html.parser") + soup = bs4.BeautifulSoup(soup, "html.parser") clean_string = soup.get_text() clean_string = re.sub("=20", "", clean_string) clean_string = re.sub("=", "", clean_string) @@ -1281,30 +1296,6 @@ def parse_html(self, text, data): line = line.strip() if re.match(r"[a-z]{5}-[a-z0-9]{8}", line): data["circuits"].append(CircuitImpact(circuit_id=line, impact=impact)) - return data - - def parse_text(self, text): - """Parses AWS notifications. - - Wrapper method to deal with both html and plaintext emails. - - Args: - text (str): email text. - - Returns: - data (dict): dictionary structure with maintenance details - """ - data = { - "circuits": [], - "status": Status.CONFIRMED, - } - maintenance_id = "" - if re.search(r"", text, re.IGNORECASE): - data = self.parse_html(text, data) - else: - data = self.parse_plaintext(text, data) - # No maintenance ID found in emails, so a hash value is being generated using the start, - # end and IDs of all circuits in the notification. for circuit in data["circuits"]: maintenance_id += circuit.circuit_id maintenance_id += str(data["start"]) diff --git a/circuit_maintenance_parser/provider.py b/circuit_maintenance_parser/provider.py index e5ef0bb9..4bec9815 100644 --- a/circuit_maintenance_parser/provider.py +++ b/circuit_maintenance_parser/provider.py @@ -17,7 +17,7 @@ from circuit_maintenance_parser.parsers.apple import SubjectParserApple, TextParserApple from circuit_maintenance_parser.parsers.aquacomms import HtmlParserAquaComms1, SubjectParserAquaComms1 from circuit_maintenance_parser.parsers.att import HtmlParserATT1, XlsxParserATT1 -from circuit_maintenance_parser.parsers.aws import SubjectParserAWS1, TextParserAWS1 +from circuit_maintenance_parser.parsers.aws import HtmlParserAWS1, SubjectParserAWS1, TextParserAWS1 from circuit_maintenance_parser.parsers.bso import HtmlParserBSO1 from circuit_maintenance_parser.parsers.cogent import HtmlParserCogent1, SubjectParserCogent1, TextParserCogent1 from circuit_maintenance_parser.parsers.colt import CsvParserColt1, SubjectParserColt1, SubjectParserColt2 @@ -251,6 +251,7 @@ class AWS(GenericProvider): _processors: List[GenericProcessor] = PrivateAttr( [ + CombinedProcessor(data_parsers=[EmailDateParser, HtmlParserAWS1]), CombinedProcessor(data_parsers=[EmailDateParser, TextParserAWS1, SubjectParserAWS1]), ] ) diff --git a/tests/unit/data/aws/aws4_html.eml b/tests/unit/data/aws/aws4.eml similarity index 100% rename from tests/unit/data/aws/aws4_html.eml rename to tests/unit/data/aws/aws4.eml diff --git a/tests/unit/data/aws/aws4_result.json b/tests/unit/data/aws/aws4_result.json new file mode 100644 index 00000000..c7ad67e9 --- /dev/null +++ b/tests/unit/data/aws/aws4_result.json @@ -0,0 +1,35 @@ +[ + { + "account": "111111111111", + "circuits": [ + { + "circuit_id": "dxcon-abc12345", + "impact": "OUTAGE" + }, + { + "circuit_id": "dxvif-1234hfjd", + "impact": "OUTAGE" + }, + { + "circuit_id": "dxlag-fge11110", + "impact": "OUTAGE" + }, + { + "circuit_id": "dxcon-ffucoreh", + "impact": "OUTAGE" + }, + { + "circuit_id": "dxcon-fg885ug5", + "impact": "OUTAGE" + } + ], + "end": 1757998800, + "maintenance_id": "369a433f3071c0837b379bc170d410e35a7a733d61c2efaf51bd95a3a4073d71", + "organizer": "aws-account-notifications@amazon.com", + "provider": "aws", + "sequence": 1, + "start": 1757984400, + "status": "CONFIRMED", + "summary": "Planned maintenance has been scheduled on an AWS Direct Connect endpoint in Sonda Quilicura, Santiago from Tue, 16 Sep 2025 01:00:00 GMT to Tue, 16 Sep 2025 05:00:00 GMT for 4 hours. During this maintenance window, your AWS Direct Connect services listed below may become unavailable." + } +] \ No newline at end of file diff --git a/tests/unit/test_e2e.py b/tests/unit/test_e2e.py index a5dd9068..135ebd51 100644 --- a/tests/unit/test_e2e.py +++ b/tests/unit/test_e2e.py @@ -162,10 +162,10 @@ ( AWS, [ - ("email", Path(dir_path, "data", "aws", "aws4_html.eml")), + ("html", Path(dir_path, "data", "aws", "aws4.eml")), ], [ - Path(dir_path, "data", "aws", "aws4_html_result.json"), + Path(dir_path, "data", "aws", "aws4_result.json"), ], ), # BSO diff --git a/tests/unit/test_parsers.py b/tests/unit/test_parsers.py index 7a70c0fd..e33d62ae 100644 --- a/tests/unit/test_parsers.py +++ b/tests/unit/test_parsers.py @@ -172,7 +172,7 @@ def default(self, o): ), ( TextParserAWS1, - Path(dir_path, "data", "aws", "aws4_html.eml"), + Path(dir_path, "data", "aws", "aws4.eml"), Path(dir_path, "data", "aws", "aws4_html_parser_result.json"), ), # BSO From e2d81d612e0d3384be59eba68cd3925b7cec6f5c Mon Sep 17 00:00:00 2001 From: Ben Tanzer Date: Mon, 15 Sep 2025 13:20:37 -0700 Subject: [PATCH 7/9] Include email timestamp and move aws html parsing to own method --- aws-test.py | 0 circuit_maintenance_parser/parsers/aws.py | 33 ++++----- circuit_maintenance_parser/provider.py | 2 +- .../data/aws/aws4_html_parser_result.json | 4 +- tests/unit/data/aws/aws4_result.json | 71 ++++++++++--------- tests/unit/test_parsers.py | 4 +- 6 files changed, 57 insertions(+), 57 deletions(-) delete mode 100644 aws-test.py diff --git a/aws-test.py b/aws-test.py deleted file mode 100644 index e69de29b..00000000 diff --git a/circuit_maintenance_parser/parsers/aws.py b/circuit_maintenance_parser/parsers/aws.py index a7b8eece..5667dcc9 100644 --- a/circuit_maintenance_parser/parsers/aws.py +++ b/circuit_maintenance_parser/parsers/aws.py @@ -8,7 +8,14 @@ import bs4 # type: ignore from dateutil import parser -from circuit_maintenance_parser.parser import CircuitImpact, EmailSubjectParser, Html, Impact, Status, Text +from circuit_maintenance_parser.parser import ( + CircuitImpact, + EmailSubjectParser, + Html, + Impact, + Status, + Text, +) # pylint: disable=too-many-nested-blocks, too-many-branches @@ -90,10 +97,7 @@ def parse_text(self, text): maintenance_id = "" impact = Impact.OUTAGE for line in text.splitlines(): - if ( - "planned maintenance" in line.lower() - or "maintenance has been scheduled" in line.lower() - ): + if "planned maintenance" in line.lower() or "maintenance has been scheduled" in line.lower(): data["summary"] = line search = re.search( r"([A-Z][a-z]{2}, [0-9]{1,2} [A-Z][a-z]{2,9} [0-9]{4} [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{2,3}) to ([A-Z][a-z]{2}, [0-9]{1,2} [A-Z][a-z]{2,9} [0-9]{4} [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{2,3})", @@ -124,11 +128,10 @@ def parse_text(self, text): maintenance_id += circuit.circuit_id maintenance_id += str(data["start"]) maintenance_id += str(data["end"]) - data["maintenance_id"] = hashlib.sha256( - maintenance_id.encode("utf-8") - ).hexdigest() # nosec + data["maintenance_id"] = hashlib.sha256(maintenance_id.encode("utf-8")).hexdigest() # nosec return [data] + class HtmlParserAWS1(Html): """Notifications Parser for AWS HTML Emails.""" @@ -1262,22 +1265,17 @@ def parse_html(self, soup): =20 """ - data = { - "circuits": [], - "status": Status.CONFIRMED, - } + data = {"circuits": [], "status": Status.CONFIRMED, "stamp": 0} maintenance_id = "" impact = Impact.OUTAGE - soup = bs4.BeautifulSoup(soup, "html.parser") clean_string = soup.get_text() - clean_string = re.sub("=20", "", clean_string) - clean_string = re.sub("=", "", clean_string) clean_list = clean_string.splitlines() cleaner_list = [] for line in clean_list: newline = line.strip() if newline != "": cleaner_list.append(newline) + data["stamp"] = self.dt2ts(parser.parse(cleaner_list[2])) sumstart = cleaner_list.index("Hello,") try: sumend = cleaner_list.index("[1] https://aws.amazon.com/support") @@ -1292,6 +1290,7 @@ def parse_html(self, soup): data["start"] = self.dt2ts(parser.parse(start_time)) data["end"] = self.dt2ts(parser.parse(end_time)) data["summary"] = summary + data["account"] = cleaner_list[cleaner_list.index("Affected account") + 1] for line in cleaner_list[sumstart:sumend]: line = line.strip() if re.match(r"[a-z]{5}-[a-z0-9]{8}", line): @@ -1300,7 +1299,5 @@ def parse_html(self, soup): maintenance_id += circuit.circuit_id maintenance_id += str(data["start"]) maintenance_id += str(data["end"]) - data["maintenance_id"] = hashlib.sha256( - maintenance_id.encode("utf-8") - ).hexdigest() # nosec + data["maintenance_id"] = hashlib.sha256(maintenance_id.encode("utf-8")).hexdigest() # nosec return [data] diff --git a/circuit_maintenance_parser/provider.py b/circuit_maintenance_parser/provider.py index 4bec9815..f0b66cb7 100644 --- a/circuit_maintenance_parser/provider.py +++ b/circuit_maintenance_parser/provider.py @@ -251,7 +251,7 @@ class AWS(GenericProvider): _processors: List[GenericProcessor] = PrivateAttr( [ - CombinedProcessor(data_parsers=[EmailDateParser, HtmlParserAWS1]), + CombinedProcessor(data_parsers=[HtmlParserAWS1]), CombinedProcessor(data_parsers=[EmailDateParser, TextParserAWS1, SubjectParserAWS1]), ] ) diff --git a/tests/unit/data/aws/aws4_html_parser_result.json b/tests/unit/data/aws/aws4_html_parser_result.json index 5c60e35f..a44fc2ac 100644 --- a/tests/unit/data/aws/aws4_html_parser_result.json +++ b/tests/unit/data/aws/aws4_html_parser_result.json @@ -1,5 +1,6 @@ [ { + "account": "111111111111", "circuits": [ { "circuit_id": "dxcon-abc12345", @@ -25,7 +26,8 @@ "end": 1757998800, "maintenance_id": "369a433f3071c0837b379bc170d410e35a7a733d61c2efaf51bd95a3a4073d71", "start": 1757984400, + "stamp": 1756771998, "status": "CONFIRMED", - "summary": "Planned maintenance has been scheduled on an AWS Direct Connect endpoint in Sonda Quilicura, Santiago from Tue, 16 Sep 2025 01:00:00 GMT to Tue, 16 Sep 2025 05:00:00 GMT for 4 hours. During this maintenance window, your AWS Direct Connect services listed below may become unavailable." + "summary": "Hello, Planned maintenance has been scheduled on an AWS Direct Connect endpoint in Sonda Quilicura, Santiago from Tue, 16 Sep 2025 01:00:00 GMT to Tue, 16 Sep 2025 05:00:00 GMT for 4 hours. During this maintenance window, your AWS Direct Connect services listed below may become unavailable. dxcon-abc12345 dxvif-1234hfjd dxlag-fge11110 dxcon-ffucoreh dxcon-fg885ug5 This maintenance is scheduled to avoid disrupting redundant connections at the same time. If you encounter any problems with your connection after the end of this maintenance window, please contact AWS Support[1]." } ] \ No newline at end of file diff --git a/tests/unit/data/aws/aws4_result.json b/tests/unit/data/aws/aws4_result.json index c7ad67e9..553c9a82 100644 --- a/tests/unit/data/aws/aws4_result.json +++ b/tests/unit/data/aws/aws4_result.json @@ -1,35 +1,36 @@ -[ - { - "account": "111111111111", - "circuits": [ - { - "circuit_id": "dxcon-abc12345", - "impact": "OUTAGE" - }, - { - "circuit_id": "dxvif-1234hfjd", - "impact": "OUTAGE" - }, - { - "circuit_id": "dxlag-fge11110", - "impact": "OUTAGE" - }, - { - "circuit_id": "dxcon-ffucoreh", - "impact": "OUTAGE" - }, - { - "circuit_id": "dxcon-fg885ug5", - "impact": "OUTAGE" - } - ], - "end": 1757998800, - "maintenance_id": "369a433f3071c0837b379bc170d410e35a7a733d61c2efaf51bd95a3a4073d71", - "organizer": "aws-account-notifications@amazon.com", - "provider": "aws", - "sequence": 1, - "start": 1757984400, - "status": "CONFIRMED", - "summary": "Planned maintenance has been scheduled on an AWS Direct Connect endpoint in Sonda Quilicura, Santiago from Tue, 16 Sep 2025 01:00:00 GMT to Tue, 16 Sep 2025 05:00:00 GMT for 4 hours. During this maintenance window, your AWS Direct Connect services listed below may become unavailable." - } -] \ No newline at end of file + [ + { + "account": "111111111111", + "circuits": [ + { + "circuit_id": "dxcon-abc12345", + "impact": "OUTAGE" + }, + { + "circuit_id": "dxvif-1234hfjd", + "impact": "OUTAGE" + }, + { + "circuit_id": "dxlag-fge11110", + "impact": "OUTAGE" + }, + { + "circuit_id": "dxcon-ffucoreh", + "impact": "OUTAGE" + }, + { + "circuit_id": "dxcon-fg885ug5", + "impact": "OUTAGE" + } + ], + "end": 1757998800, + "maintenance_id": "369a433f3071c0837b379bc170d410e35a7a733d61c2efaf51bd95a3a4073d71", + "start": 1757984400, + "organizer": "aws-account-notifications@amazon.com", + "provider": "aws", + "sequence": 1, + "stamp": 1756771998, + "status": "CONFIRMED", + "summary": "Hello, Planned maintenance has been scheduled on an AWS Direct Connect endpoint in Sonda Quilicura, Santiago from Tue, 16 Sep 2025 01:00:00 GMT to Tue, 16 Sep 2025 05:00:00 GMT for 4 hours. During this maintenance window, your AWS Direct Connect services listed below may become unavailable. dxcon-abc12345 dxvif-1234hfjd dxlag-fge11110 dxcon-ffucoreh dxcon-fg885ug5 This maintenance is scheduled to avoid disrupting redundant connections at the same time. If you encounter any problems with your connection after the end of this maintenance window, please contact AWS Support[1]." + } + ] \ No newline at end of file diff --git a/tests/unit/test_parsers.py b/tests/unit/test_parsers.py index e33d62ae..cb8f1c40 100644 --- a/tests/unit/test_parsers.py +++ b/tests/unit/test_parsers.py @@ -11,7 +11,7 @@ from circuit_maintenance_parser.parsers.apple import TextParserApple from circuit_maintenance_parser.parsers.aquacomms import HtmlParserAquaComms1, SubjectParserAquaComms1 from circuit_maintenance_parser.parsers.att import HtmlParserATT1, XlsxParserATT1 -from circuit_maintenance_parser.parsers.aws import SubjectParserAWS1, TextParserAWS1 +from circuit_maintenance_parser.parsers.aws import HtmlParserAWS1, SubjectParserAWS1, TextParserAWS1 from circuit_maintenance_parser.parsers.bso import HtmlParserBSO1 from circuit_maintenance_parser.parsers.cogent import HtmlParserCogent1 from circuit_maintenance_parser.parsers.colt import CsvParserColt1, SubjectParserColt1, SubjectParserColt2 @@ -171,7 +171,7 @@ def default(self, o): Path(dir_path, "data", "aws", "aws3_text_parser_result.json"), ), ( - TextParserAWS1, + HtmlParserAWS1, Path(dir_path, "data", "aws", "aws4.eml"), Path(dir_path, "data", "aws", "aws4_html_parser_result.json"), ), From b1bab681dea2e3fb3126b424029674764a350c05 Mon Sep 17 00:00:00 2001 From: beetanz Date: Mon, 29 Sep 2025 10:43:27 -0700 Subject: [PATCH 8/9] Update circuit_maintenance_parser/parsers/aws.py Co-authored-by: Josh VanDeraa --- circuit_maintenance_parser/parsers/aws.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/circuit_maintenance_parser/parsers/aws.py b/circuit_maintenance_parser/parsers/aws.py index 5667dcc9..6052e846 100644 --- a/circuit_maintenance_parser/parsers/aws.py +++ b/circuit_maintenance_parser/parsers/aws.py @@ -47,7 +47,7 @@ def get_text_hook(raw): return soup.text def parse_text(self, text): - r"""Parse text. + """Parse text. Example: Hello, From 2fc954b1ad159d8ab842f7f24b06499ec3d25ba2 Mon Sep 17 00:00:00 2001 From: Ben Tanzer Date: Mon, 13 Oct 2025 09:31:58 -0700 Subject: [PATCH 9/9] add to changelog --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1fb6e7b9..2862d773 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,8 @@ # Changelog +### Added +- [#330](https://github.com/networktocode/circuit-maintenance-parser/pull/330) - Add parsers for AWS HTML emails + # Changelog ## v2.8.0 - 2025-06-06