Skip to content

Commit 3b4e1d6

Browse files
committed
Merge remote-tracking branch 'origin/develop' into release-v2.7.0
2 parents d9197d3 + 0bf7922 commit 3b4e1d6

36 files changed

+2067
-837
lines changed

.github/workflows/ci.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ jobs:
113113
strategy:
114114
fail-fast: true
115115
matrix:
116-
python-version: ["3.8", "3.9", "3.10", "3.11"]
116+
python-version: ["3.9", "3.10", "3.11", "3.12"]
117117
pydantic: ["2.x"]
118118
include:
119119
- python-version: "3.11"

README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,10 +63,12 @@ By default, there is a `GenericProvider` that supports a `SimpleProcessor` using
6363
- EXA (formerly GTT) (\*)
6464
- NTT
6565
- PacketFabric
66+
- PCCW
6667
- Telstra (\*)
6768

6869
#### Supported providers based on other parsers
6970

71+
- Apple
7072
- AWS
7173
- AquaComms
7274
- BSO
@@ -82,6 +84,7 @@ By default, there is a `GenericProvider` that supports a `SimpleProcessor` using
8284
- Megaport
8385
- Momentum
8486
- Netflix (AS2906 only)
87+
- PCCW
8588
- Seaborn
8689
- Sparkle
8790
- Tata

circuit_maintenance_parser/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
HGC,
1313
NTT,
1414
AquaComms,
15+
Apple,
1516
Arelion,
1617
Cogent,
1718
Colt,
@@ -26,6 +27,7 @@
2627
Momentum,
2728
Netflix,
2829
PacketFabric,
30+
PCCW,
2931
Seaborn,
3032
Sparkle,
3133
Tata,
@@ -39,6 +41,7 @@
3941

4042
SUPPORTED_PROVIDERS = (
4143
GenericProvider,
44+
Apple,
4245
AquaComms,
4346
Arelion,
4447
AWS,
@@ -58,6 +61,7 @@
5861
Netflix,
5962
NTT,
6063
PacketFabric,
64+
PCCW,
6165
Seaborn,
6266
Sparkle,
6367
Tata,

circuit_maintenance_parser/parser.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ class Parser(BaseModel):
4343
def get_data_types(cls) -> List[str]:
4444
"""Return the expected data type."""
4545
try:
46-
return cls._data_types.get_default()
46+
return cls._data_types.get_default() # type: ignore[attr-defined]
4747
except AttributeError:
4848
# TODO: This exception handling is required for Pydantic 1.x compatibility. To be removed when the dependency is deprecated.
4949
return cls()._data_types
Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
"""Apple peering parser."""
2+
import email
3+
import re
4+
5+
from datetime import datetime, timezone
6+
from typing import Dict, List
7+
8+
from circuit_maintenance_parser.output import Impact, Status
9+
from circuit_maintenance_parser.parser import EmailSubjectParser, Text, CircuitImpact
10+
11+
12+
class SubjectParserApple(EmailSubjectParser):
13+
"""Subject parser for Apple notification."""
14+
15+
def parse_subject(self, subject: str) -> List[Dict]:
16+
"""Use the subject of the email as summary.
17+
18+
Args:
19+
subject (str): Message subjects
20+
21+
Returns:
22+
List[Dict]: List of attributes for Maintenance object
23+
"""
24+
return [{"summary": subject}]
25+
26+
27+
class TextParserApple(Text):
28+
"""Parse the plaintext content of an Apple notification.
29+
30+
Args:
31+
Text (str): Plaintext message
32+
"""
33+
34+
def parse_text(self, text: str) -> List[Dict]:
35+
"""Extract attributes from an Apple notification email.
36+
37+
Args:
38+
text (str): plaintext message
39+
40+
Returns:
41+
List[Dict]: List of attributes for Maintenance object
42+
"""
43+
data = {
44+
"circuits": self._circuits(text),
45+
"maintenance_id": self._maintenance_id(text),
46+
"start": self._start_time(text),
47+
"stamp": self._start_time(text),
48+
"end": self._end_time(text),
49+
"status": Status.CONFIRMED, # Have yet to see anything but confirmation.
50+
"organizer": "peering-noc@group.apple.com",
51+
"provider": "apple",
52+
"account": "Customer info unavailable",
53+
}
54+
return [data]
55+
56+
def _circuits(self, text):
57+
pattern = r"Peer AS: (\d*)"
58+
match = re.search(pattern, text)
59+
return [CircuitImpact(circuit_id=f"AS{match.group(1)}", impact=Impact.OUTAGE)]
60+
61+
def _maintenance_id(self, text):
62+
# Apple ticket numbers always starts with "CHG".
63+
pattern = r"CHG(\d*)"
64+
match = re.search(pattern, text)
65+
return match.group(0)
66+
67+
def _get_time(self, pattern, text):
68+
# Apple sends timestamps as RFC2822 for the US
69+
# but a custom format for EU datacenters.
70+
match = re.search(pattern, text)
71+
try:
72+
# Try EU timestamp
73+
return int(
74+
datetime.strptime(match.group(1), "%Y-%m-%d(%a) %H:%M %Z").replace(tzinfo=timezone.utc).timestamp()
75+
)
76+
except ValueError:
77+
# Try RFC2822 - US timestamp
78+
rfc2822 = match.group(1)
79+
time_tuple = email.utils.parsedate_tz(rfc2822)
80+
return email.utils.mktime_tz(time_tuple)
81+
82+
def _start_time(self, text):
83+
pattern = "Start Time: ([a-zA-Z0-9 :()-]*)"
84+
return self._get_time(pattern, text)
85+
86+
def _end_time(self, text):
87+
pattern = "End Time: ([a-zA-Z0-9 :()-]*)"
88+
return self._get_time(pattern, text)
Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
"""Circuit maintenance parser for PCCW Email notifications."""
2+
import re
3+
from typing import List, Dict, Tuple, Any, ClassVar
4+
from datetime import datetime
5+
6+
from bs4.element import ResultSet # type: ignore
7+
from circuit_maintenance_parser.output import Status
8+
from circuit_maintenance_parser.parser import Html, EmailSubjectParser
9+
10+
11+
class HtmlParserPCCW(Html):
12+
"""Custom Parser for HTML portion of PCCW circuit maintenance notifications."""
13+
14+
DATE_TIME_FORMAT: ClassVar[str] = "%d/%m/%Y %H:%M:%S"
15+
PROVIDER: ClassVar[str] = "PCCW Global"
16+
17+
def parse_html(self, soup: ResultSet) -> List[Dict]:
18+
"""Parse PCCW circuit maintenance email.
19+
20+
Args:
21+
soup: BeautifulSoup ResultSet containing the email HTML content
22+
23+
Returns:
24+
List containing a dictionary with parsed maintenance data
25+
"""
26+
data: Dict[str, Any] = {
27+
"circuits": [],
28+
"provider": self.PROVIDER,
29+
"account": self._extract_account(soup),
30+
}
31+
start_time, end_time = self._extract_maintenance_window(soup)
32+
data["start"] = self.dt2ts(start_time)
33+
data["end"] = self.dt2ts(end_time)
34+
35+
return [data]
36+
37+
def _extract_account(self, soup: ResultSet) -> str:
38+
"""Extract customer account from soup."""
39+
customer_field = soup.find(string=re.compile("Customer Name :", re.IGNORECASE))
40+
return customer_field.split(":")[1].strip()
41+
42+
def _extract_maintenance_window(self, soup: ResultSet) -> Tuple[datetime, datetime]:
43+
"""Extract start and end times from maintenance window."""
44+
datetime_field = soup.find(string=re.compile("Date Time :", re.IGNORECASE))
45+
time_parts = (
46+
datetime_field.lower().replace("date time :", "-").replace("to", "-").replace("gmt", "-").split("-")
47+
)
48+
start_time = datetime.strptime(time_parts[1].strip(), self.DATE_TIME_FORMAT)
49+
end_time = datetime.strptime(time_parts[2].strip(), self.DATE_TIME_FORMAT)
50+
return start_time, end_time
51+
52+
53+
class SubjectParserPCCW(EmailSubjectParser):
54+
"""Custom Parser for Email subject of PCCW circuit maintenance notifications.
55+
56+
This parser extracts maintenance ID, status and summary from the email subject line.
57+
"""
58+
59+
# Only completion notification doesn't come with ICal. Other such as planned outage, urgent maintenance,
60+
# amendment and cacellation notifications come with ICal. Hence, maintenance status is set to COMPLETED.
61+
DEFAULT_STATUS: ClassVar[Status] = Status.COMPLETED
62+
63+
def parse_subject(self, subject: str) -> List[Dict]:
64+
"""Parse PCCW circuit maintenance email subject.
65+
66+
Args:
67+
subject: Email subject string to parse
68+
69+
Returns:
70+
List containing a dictionary with parsed subject data including:
71+
- maintenance_id: Extracted from end of subject
72+
- status: Default COMPLETED status
73+
- summary: Cleaned subject line
74+
"""
75+
data: Dict[str, Any] = {
76+
"maintenance_id": self._extract_maintenance_id(subject),
77+
"status": self.DEFAULT_STATUS,
78+
"summary": self._clean_summary(subject),
79+
}
80+
81+
return [data]
82+
83+
def _extract_maintenance_id(self, subject: str) -> str:
84+
"""Extract maintenance ID from the end of subject line."""
85+
return subject.split("-")[-1].strip()
86+
87+
def _clean_summary(self, subject: str) -> str:
88+
"""Clean and format the summary text."""
89+
return subject.strip().replace("\n", "")

circuit_maintenance_parser/parsers/tata.py

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -35,20 +35,15 @@ def parse_html(self, soup: ResultSet) -> List[Dict]:
3535
)
3636
elif prev_lower in ("activity window (gmt)", "revised activity window (gmt)"):
3737
start_end = curr.split("to")
38-
data["start"] = self._parse_time(start_end[0])
39-
data["end"] = self._parse_time(start_end[1])
38+
data["start"] = self.dt2ts(datetime.strptime(start_end[0].strip(), "%Y-%m-%d %H:%M:%S %Z"))
39+
data["end"] = self.dt2ts(datetime.strptime(start_end[1].strip(), "%Y-%m-%d %H:%M:%S %Z"))
4040
elif "extended up to time window" in prev_lower:
4141
if "gmt" in curr.lower():
42-
data["end"] = self._parse_time(curr)
42+
data["end"] = self.dt2ts(datetime.strptime(curr, "%Y-%m-%d %H:%M:%S %Z"))
4343
prev = span.text.strip()
4444

4545
return [data]
4646

47-
@staticmethod
48-
def _parse_time(string: str) -> int:
49-
"""Convert YYYY-MM-DD HH:MM:SS GMT to epoch."""
50-
return int((datetime.strptime(string.strip(), "%Y-%m-%d %H:%M:%S GMT") - datetime(1970, 1, 1)).total_seconds())
51-
5247

5348
class SubjectParserTata(EmailSubjectParser):
5449
"""Custom Parser for Email subject of Tata circuit maintenance notifications."""

circuit_maintenance_parser/provider.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414
from circuit_maintenance_parser.errors import ProcessorError, ProviderError
1515
from circuit_maintenance_parser.output import Maintenance
1616
from circuit_maintenance_parser.parser import EmailDateParser, ICal
17+
18+
from circuit_maintenance_parser.parsers.apple import SubjectParserApple, TextParserApple
1719
from circuit_maintenance_parser.parsers.aquacomms import HtmlParserAquaComms1, SubjectParserAquaComms1
1820
from circuit_maintenance_parser.parsers.aws import SubjectParserAWS1, TextParserAWS1
1921
from circuit_maintenance_parser.parsers.bso import HtmlParserBSO1
@@ -30,6 +32,7 @@
3032
from circuit_maintenance_parser.parsers.momentum import HtmlParserMomentum1, SubjectParserMomentum1
3133
from circuit_maintenance_parser.parsers.netflix import TextParserNetflix1
3234
from circuit_maintenance_parser.parsers.openai import OpenAIParser
35+
from circuit_maintenance_parser.parsers.pccw import HtmlParserPCCW, SubjectParserPCCW
3336
from circuit_maintenance_parser.parsers.seaborn import (
3437
HtmlParserSeaborn1,
3538
HtmlParserSeaborn2,
@@ -204,6 +207,15 @@ def get_provider_type(cls) -> str:
204207
####################
205208

206209

210+
class Apple(GenericProvider):
211+
"""Apple provider custom class."""
212+
213+
_processors: List[GenericProcessor] = [
214+
CombinedProcessor(data_parsers=[TextParserApple, SubjectParserApple]),
215+
]
216+
_default_organizer = "peering-noc@group.apple.com"
217+
218+
207219
class AquaComms(GenericProvider):
208220
"""AquaComms provider custom class."""
209221

@@ -406,6 +418,29 @@ class PacketFabric(GenericProvider):
406418
_default_organizer = PrivateAttr("support@packetfabric.com")
407419

408420

421+
class PCCW(GenericProvider):
422+
"""PCCW provider custom class."""
423+
424+
_include_filter = PrivateAttr(
425+
{
426+
"Icalendar": ["BEGIN"],
427+
"ical": ["BEGIN"],
428+
EMAIL_HEADER_SUBJECT: [
429+
"Completion - Planned Outage Notification",
430+
"Completion - Urgent Maintenance Notification",
431+
],
432+
}
433+
)
434+
435+
_processors: List[GenericProcessor] = PrivateAttr(
436+
[
437+
SimpleProcessor(data_parsers=[ICal]),
438+
CombinedProcessor(data_parsers=[HtmlParserPCCW, SubjectParserPCCW, EmailDateParser]),
439+
]
440+
)
441+
_default_organizer = "mailto:gsoc-planned-event@pccwglobal.com"
442+
443+
409444
class Seaborn(GenericProvider):
410445
"""Seaborn provider custom class."""
411446

0 commit comments

Comments
 (0)