8
8
9
9
import asyncio
10
10
import json
11
- from typing import Dict , List , Optional , Any
11
+ import re
12
+ from datetime import datetime
13
+ from typing import Dict , List , Optional , Any , Union
12
14
from urllib .parse import urljoin
13
15
14
16
import aiohttp
17
+ import pytz
15
18
from selectolax .parser import HTMLParser
16
19
17
20
from .models .meeting import Meeting
18
21
19
22
BASE_URL = "https://tulsa-ok.granicus.com/ViewPublisher.php?view_id=4"
23
+ CENTRAL_TZ = pytz .timezone ("America/Chicago" )
20
24
21
25
22
26
async def fetch_page (url : str , session : aiohttp .ClientSession ) -> str :
@@ -36,7 +40,95 @@ async def fetch_page(url: str, session: aiohttp.ClientSession) -> str:
36
40
return await response .text ()
37
41
38
42
39
- async def parse_meetings (html : str ) -> List [Dict [str , str ]]:
43
+ def parse_date_string (date_str : str ) -> Optional [datetime ]:
44
+ """
45
+ Parse the date string into a datetime object with Central timezone.
46
+
47
+ Args:
48
+ date_str: The raw date string from HTML
49
+
50
+ Returns:
51
+ A datetime object with Central timezone or None if parsing fails
52
+ """
53
+ # Replace non-breaking spaces with regular spaces
54
+ date_str = date_str .replace ("\u00a0 " , " " )
55
+
56
+ # Replace multiple spaces with a single space
57
+ date_str = re .sub (r"\s+" , " " , date_str )
58
+
59
+ # Find the month, day, year, and time parts
60
+ # Pattern typically looks like "March 12, 2025 - 5:00 PM"
61
+ match = re .search (
62
+ r"([A-Za-z]+)\s+(\d{1,2}),?\s+(\d{4}).*?(\d{1,2}):(\d{2})\s*([APM]{2})" ,
63
+ date_str ,
64
+ )
65
+
66
+ if match :
67
+ month_str , day_str , year_str , hour_str , minute_str , am_pm = match .groups ()
68
+
69
+ # Convert month name to number
70
+ try :
71
+ month_num = datetime .strptime (month_str , "%B" ).month
72
+ except ValueError :
73
+ # Try abbreviated month name
74
+ try :
75
+ month_num = datetime .strptime (month_str , "%b" ).month
76
+ except ValueError :
77
+ return None
78
+
79
+ # Convert to integers
80
+ day = int (day_str )
81
+ year = int (year_str )
82
+ hour = int (hour_str )
83
+ minute = int (minute_str )
84
+
85
+ # Adjust hour for PM
86
+ if am_pm .upper () == "PM" and hour < 12 :
87
+ hour += 12
88
+ elif am_pm .upper () == "AM" and hour == 12 :
89
+ hour = 0
90
+
91
+ # Create naive datetime
92
+ naive_dt = datetime (year , month_num , day , hour , minute )
93
+
94
+ # Localize to Central Time
95
+ return CENTRAL_TZ .localize (naive_dt )
96
+
97
+ return None
98
+
99
+
100
+ def clean_date_string (date_str : str ) -> str :
101
+ """
102
+ Clean up the date string by removing extra whitespace, newlines, and normalizing formats.
103
+
104
+ Args:
105
+ date_str: The raw date string from HTML
106
+
107
+ Returns:
108
+ A cleaned date string in the format "Month Day, Year - Time"
109
+ """
110
+ # Replace non-breaking spaces with regular spaces
111
+ date_str = date_str .replace ("\u00a0 " , " " )
112
+
113
+ # Replace multiple spaces with a single space
114
+ date_str = re .sub (r"\s+" , " " , date_str )
115
+
116
+ # Find the month, day, year, and time parts
117
+ # Pattern typically looks like "March 12, 2025 - 5:00 PM"
118
+ match = re .search (
119
+ r"([A-Za-z]+)\s+(\d{1,2}),?\s+(\d{4}).*?(\d{1,2}:\d{2}\s*[APM]{2})" , date_str
120
+ )
121
+
122
+ if match :
123
+ month , day , year , time = match .groups ()
124
+ # Format consistently
125
+ return f"{ month } { day } , { year } - { time } "
126
+
127
+ # If the regex doesn't match, do basic cleanup
128
+ return date_str .strip ()
129
+
130
+
131
+ async def parse_meetings (html : str ) -> List [Dict [str , Any ]]:
40
132
"""
41
133
Parse the meeting data from the HTML content.
42
134
@@ -68,9 +160,17 @@ async def parse_meetings(html: str) -> List[Dict[str, str]]:
68
160
if len (cells ) < 5 :
69
161
continue
70
162
163
+ # Parse the date string into a datetime object
164
+ date_text = cells [1 ].text ()
165
+ date_obj = parse_date_string (date_text )
166
+
167
+ # Get a cleaned date string as a fallback
168
+ date_str = clean_date_string (date_text )
169
+
71
170
meeting_data = {
72
171
"meeting" : cells [0 ].text ().strip (),
73
- "date" : cells [1 ].text ().strip (),
172
+ "date" : date_obj .isoformat () if date_obj else date_str ,
173
+ "date_display" : date_str , # Keep a human-readable version
74
174
"duration" : cells [2 ].text ().strip (),
75
175
"agenda" : None ,
76
176
"video" : None ,
0 commit comments