Merge pull request #299 from networktocode/release-v2.7.0

Release v2.7.0
networktocode · Jan 7, 2025 · 4f4a64f · 4f4a64f
2 parents 30acf33 + 2078966
commit 4f4a64f
Show file tree

Hide file tree

Showing 70 changed files with 3,557 additions and 841 deletions.
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -113,7 +113,7 @@ jobs:
     strategy:
       fail-fast: true
       matrix:
-        python-version: ["3.8", "3.9", "3.10", "3.11"]
+        python-version: ["3.9", "3.10", "3.11", "3.12"]
         pydantic: ["2.x"]
         include:
           - python-version: "3.11"

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,25 @@
 # Changelog
 
+# Changelog
+
+## v2.7.0 - 2025-01-10
+
+### Added
+
+- [#303](https://github.com/networktocode/circuit-maintenance-parser/pull/303) - Add new parser for Apple
+- [#302](https://github.com/networktocode/circuit-maintenance-parser/pull/302) - Add support for Python 3.12
+- [#301](https://github.com/networktocode/circuit-maintenance-parser/pull/301) - Add new parser for PCCW
+- [#297](https://github.com/networktocode/circuit-maintenance-parser/pull/297) - Add new parser for Tata Communications
+
+### Changed
+
+- [#302](https://github.com/networktocode/circuit-maintenance-parser/pull/302) - Drop support for Python 3.8
+- [#291](https://github.com/networktocode/circuit-maintenance-parser/pull/291) - Update Windstream Parser for new emails
+
+### Dependencies
+
+- [#295](https://github.com/networktocode/circuit-maintenance-parser/pull/295) - Remove pydantic dotenv extra
+
 ## v2.6.1 - 2024-06-04
 
 ### Fixed

diff --git a/README.md b/README.md
@@ -63,10 +63,12 @@ By default, there is a `GenericProvider` that supports a `SimpleProcessor` using
 - EXA (formerly GTT) (\*)
 - NTT
 - PacketFabric
+- PCCW
 - Telstra (\*)
 
 #### Supported providers based on other parsers
 
+- Apple
 - AWS
 - AquaComms
 - BSO
@@ -82,8 +84,10 @@ By default, there is a `GenericProvider` that supports a `SimpleProcessor` using
 - Megaport
 - Momentum
 - Netflix (AS2906 only)
+- PCCW
 - Seaborn
 - Sparkle
+- Tata
 - Telstra (\*)
 - Turkcell
 - Verizon

diff --git a/circuit_maintenance_parser/__init__.py b/circuit_maintenance_parser/__init__.py
@@ -12,6 +12,7 @@
     HGC,
     NTT,
     AquaComms,
+    Apple,
     Arelion,
     Cogent,
     Colt,
@@ -26,8 +27,10 @@
     Momentum,
     Netflix,
     PacketFabric,
+    PCCW,
     Seaborn,
     Sparkle,
+    Tata,
     Telia,
     Telstra,
     Turkcell,
@@ -38,6 +41,7 @@
 
 SUPPORTED_PROVIDERS = (
     GenericProvider,
+    Apple,
     AquaComms,
     Arelion,
     AWS,
@@ -57,8 +61,10 @@
     Netflix,
     NTT,
     PacketFabric,
+    PCCW,
     Seaborn,
     Sparkle,
+    Tata,
     Telia,
     Telstra,
     Turkcell,

diff --git a/circuit_maintenance_parser/parser.py b/circuit_maintenance_parser/parser.py
@@ -43,7 +43,7 @@ class Parser(BaseModel):
     def get_data_types(cls) -> List[str]:
         """Return the expected data type."""
         try:
-            return cls._data_types.get_default()
+            return cls._data_types.get_default()  # type: ignore[attr-defined]
         except AttributeError:
             # TODO: This exception handling is required for Pydantic 1.x compatibility. To be removed when the dependency is deprecated.
             return cls()._data_types

diff --git a/circuit_maintenance_parser/parsers/apple.py b/circuit_maintenance_parser/parsers/apple.py
@@ -0,0 +1,88 @@
+"""Apple peering parser."""
+import email
+import re
+
+from datetime import datetime, timezone
+from typing import Dict, List
+
+from circuit_maintenance_parser.output import Impact, Status
+from circuit_maintenance_parser.parser import EmailSubjectParser, Text, CircuitImpact
+
+
+class SubjectParserApple(EmailSubjectParser):
+    """Subject parser for Apple notification."""
+
+    def parse_subject(self, subject: str) -> List[Dict]:
+        """Use the subject of the email as summary.
+
+        Args:
+            subject (str): Message subjects
+
+        Returns:
+            List[Dict]: List of attributes for Maintenance object
+        """
+        return [{"summary": subject}]
+
+
+class TextParserApple(Text):
+    """Parse the plaintext content of an Apple notification.
+
+    Args:
+        Text (str): Plaintext message
+    """
+
+    def parse_text(self, text: str) -> List[Dict]:
+        """Extract attributes from an Apple notification email.
+
+        Args:
+            text (str): plaintext message
+
+        Returns:
+            List[Dict]: List of attributes for Maintenance object
+        """
+        data = {
+            "circuits": self._circuits(text),
+            "maintenance_id": self._maintenance_id(text),
+            "start": self._start_time(text),
+            "stamp": self._start_time(text),
+            "end": self._end_time(text),
+            "status": Status.CONFIRMED,  # Have yet to see anything but confirmation.
+            "organizer": "[email protected]",
+            "provider": "apple",
+            "account": "Customer info unavailable",
+        }
+        return [data]
+
+    def _circuits(self, text):
+        pattern = r"Peer AS: (\d*)"
+        match = re.search(pattern, text)
+        return [CircuitImpact(circuit_id=f"AS{match.group(1)}", impact=Impact.OUTAGE)]
+
+    def _maintenance_id(self, text):
+        # Apple ticket numbers always starts with "CHG".
+        pattern = r"CHG(\d*)"
+        match = re.search(pattern, text)
+        return match.group(0)
+
+    def _get_time(self, pattern, text):
+        # Apple sends timestamps as RFC2822 for the US
+        # but a custom format for EU datacenters.
+        match = re.search(pattern, text)
+        try:
+            # Try EU timestamp
+            return int(
+                datetime.strptime(match.group(1), "%Y-%m-%d(%a) %H:%M %Z").replace(tzinfo=timezone.utc).timestamp()
+            )
+        except ValueError:
+            # Try RFC2822 - US timestamp
+            rfc2822 = match.group(1)
+            time_tuple = email.utils.parsedate_tz(rfc2822)
+            return email.utils.mktime_tz(time_tuple)
+
+    def _start_time(self, text):
+        pattern = "Start Time: ([a-zA-Z0-9 :()-]*)"
+        return self._get_time(pattern, text)
+
+    def _end_time(self, text):
+        pattern = "End Time: ([a-zA-Z0-9 :()-]*)"
+        return self._get_time(pattern, text)
diff --git a/circuit_maintenance_parser/parsers/pccw.py b/circuit_maintenance_parser/parsers/pccw.py
@@ -0,0 +1,89 @@
+"""Circuit maintenance parser for PCCW Email notifications."""
+import re
+from typing import List, Dict, Tuple, Any, ClassVar
+from datetime import datetime
+
+from bs4.element import ResultSet  # type: ignore
+from circuit_maintenance_parser.output import Status
+from circuit_maintenance_parser.parser import Html, EmailSubjectParser
+
+
+class HtmlParserPCCW(Html):
+    """Custom Parser for HTML portion of PCCW circuit maintenance notifications."""
+
+    DATE_TIME_FORMAT: ClassVar[str] = "%d/%m/%Y %H:%M:%S"
+    PROVIDER: ClassVar[str] = "PCCW Global"
+
+    def parse_html(self, soup: ResultSet) -> List[Dict]:
+        """Parse PCCW circuit maintenance email.
+
+        Args:
+            soup: BeautifulSoup ResultSet containing the email HTML content
+
+        Returns:
+            List containing a dictionary with parsed maintenance data
+        """
+        data: Dict[str, Any] = {
+            "circuits": [],
+            "provider": self.PROVIDER,
+            "account": self._extract_account(soup),
+        }
+        start_time, end_time = self._extract_maintenance_window(soup)
+        data["start"] = self.dt2ts(start_time)
+        data["end"] = self.dt2ts(end_time)
+
+        return [data]
+
+    def _extract_account(self, soup: ResultSet) -> str:
+        """Extract customer account from soup."""
+        customer_field = soup.find(string=re.compile("Customer Name :", re.IGNORECASE))
+        return customer_field.split(":")[1].strip()
+
+    def _extract_maintenance_window(self, soup: ResultSet) -> Tuple[datetime, datetime]:
+        """Extract start and end times from maintenance window."""
+        datetime_field = soup.find(string=re.compile("Date Time :", re.IGNORECASE))
+        time_parts = (
+            datetime_field.lower().replace("date time :", "-").replace("to", "-").replace("gmt", "-").split("-")
+        )
+        start_time = datetime.strptime(time_parts[1].strip(), self.DATE_TIME_FORMAT)
+        end_time = datetime.strptime(time_parts[2].strip(), self.DATE_TIME_FORMAT)
+        return start_time, end_time
+
+
+class SubjectParserPCCW(EmailSubjectParser):
+    """Custom Parser for Email subject of PCCW circuit maintenance notifications.
+
+    This parser extracts maintenance ID, status and summary from the email subject line.
+    """
+
+    # Only completion notification doesn't come with ICal. Other such as planned outage, urgent maintenance,
+    # amendment and cacellation notifications come with ICal. Hence, maintenance status is set to COMPLETED.
+    DEFAULT_STATUS: ClassVar[Status] = Status.COMPLETED
+
+    def parse_subject(self, subject: str) -> List[Dict]:
+        """Parse PCCW circuit maintenance email subject.
+
+        Args:
+            subject: Email subject string to parse
+
+        Returns:
+            List containing a dictionary with parsed subject data including:
+                - maintenance_id: Extracted from end of subject
+                - status: Default COMPLETED status
+                - summary: Cleaned subject line
+        """
+        data: Dict[str, Any] = {
+            "maintenance_id": self._extract_maintenance_id(subject),
+            "status": self.DEFAULT_STATUS,
+            "summary": self._clean_summary(subject),
+        }
+
+        return [data]
+
+    def _extract_maintenance_id(self, subject: str) -> str:
+        """Extract maintenance ID from the end of subject line."""
+        return subject.split("-")[-1].strip()
+
+    def _clean_summary(self, subject: str) -> str:
+        """Clean and format the summary text."""
+        return subject.strip().replace("\n", "")
diff --git a/circuit_maintenance_parser/parsers/tata.py b/circuit_maintenance_parser/parsers/tata.py
@@ -0,0 +1,66 @@
+# pylint: disable=disallowed-name
+"""Circuit maintenance parser for Tata Email notifications."""
+from typing import List, Dict, Any
+from datetime import datetime
+
+from bs4.element import ResultSet  # type: ignore
+from circuit_maintenance_parser.output import Impact, Status
+from circuit_maintenance_parser.parser import Html, EmailSubjectParser
+
+
+class HtmlParserTata(Html):
+    """Custom Parser for HTML portion of Tata circuit maintenance notifications."""
+
+    def parse_html(self, soup: ResultSet) -> List[Dict]:
+        """Parse Tata circuit maintenance email."""
+        prev: str = ""
+        data: Dict[str, Any] = {
+            "account": "N/A",
+            "circuits": [],
+            "organizer": soup.select("a[href^=mailto]")[0].text.strip(),
+        }
+        for span in soup.find_all("span"):
+            curr = span.text.strip()
+            if curr != prev:
+                prev_lower = prev.lower()
+                if prev_lower == "ticket reference - tcl":
+                    data["maintenance_id"] = curr
+                elif prev_lower == "service id":
+                    for circuit in curr.split(","):
+                        data["circuits"].append(
+                            {
+                                "circuit_id": circuit.strip(),
+                                "impact": Impact.OUTAGE,
+                            }
+                        )
+                elif prev_lower in ("activity window (gmt)", "revised activity window (gmt)"):
+                    start_end = curr.split("to")
+                    data["start"] = self.dt2ts(datetime.strptime(start_end[0].strip(), "%Y-%m-%d %H:%M:%S %Z"))
+                    data["end"] = self.dt2ts(datetime.strptime(start_end[1].strip(), "%Y-%m-%d %H:%M:%S %Z"))
+                elif "extended up to time window" in prev_lower:
+                    if "gmt" in curr.lower():
+                        data["end"] = self.dt2ts(datetime.strptime(curr, "%Y-%m-%d %H:%M:%S %Z"))
+            prev = span.text.strip()
+
+        return [data]
+
+
+class SubjectParserTata(EmailSubjectParser):
+    """Custom Parser for Email subject of Tata circuit maintenance notifications."""
+
+    def parse_subject(self, subject: str) -> List[Dict]:
+        """Parse Tata Email subject for summary and status."""
+        data: Dict[str, Any] = {"summary": subject.strip().replace("\n", "")}
+        subject_lower = subject.lower()
+        if "completion" in subject_lower:
+            data["status"] = Status.COMPLETED
+        elif "reschedule" in subject_lower or "extension" in subject_lower:
+            data["status"] = Status.RE_SCHEDULED
+        elif "reminder" in subject_lower:
+            data["status"] = Status.CONFIRMED
+        elif "cancellation" in subject_lower:
+            data["status"] = Status.CANCELLED
+        else:
+            data["status"] = Status.CONFIRMED
+
+        return [data]
diff --git a/circuit_maintenance_parser/parsers/windstream.py b/circuit_maintenance_parser/parsers/windstream.py
@@ -41,28 +41,25 @@ def parse_html(self, soup):
 
         data["summary"] = summary_text
 
-        table = soup.find("table")
-        for row in table.find_all("tr"):
-            if len(row) < 2:
-                continue
-            cols = row.find_all("td")
-            header_tag = cols[0].string
-            if header_tag is None or header_tag == "Maintenance Address:":
-                continue
-            header_tag = header_tag.string.strip()
-            value_tag = cols[1].string.strip()
-            if header_tag == "WMT:":
-                data["maintenance_id"] = value_tag
-            elif "Date & Time:" in header_tag:
-                dt_time = convert_timezone(value_tag)
-                if "Event Start" in header_tag:
-                    data["start"] = int(dt_time.replace(tzinfo=timezone.utc).timestamp())
-                elif "Event End" in header_tag:
-                    data["end"] = int(dt_time.replace(tzinfo=timezone.utc).timestamp())
-            elif header_tag == "Outage":
-                impact = Impact("OUTAGE")
-            else:
-                continue
+        impact = soup.find("td", string="Outage").find_next_sibling("td").string
+        if impact:
+            impact = Impact("OUTAGE")
+
+        maint_id = soup.find("td", string="WMT:").find_next_sibling("td").string
+        if maint_id:
+            data["maintenance_id"] = maint_id
+
+        event = soup.find("td", string="Event Start Date & Time:").find_next_sibling("td").string
+        if event:
+            dt_time = convert_timezone(event)
+            data["start"] = int(dt_time.replace(tzinfo=timezone.utc).timestamp())
+            event = ""
+
+        event = soup.find("td", string="Event End Date & Time:").find_next_sibling("td").string
+        if event:
+            dt_time = convert_timezone(event)
+            data["end"] = int(dt_time.replace(tzinfo=timezone.utc).timestamp())
+            event = ""
 
         table = soup.find("table", "circuitTable")
         for row in table.find_all("tr"):