From 745d48be953288f4034b7a386fc81e7b13cd1422 Mon Sep 17 00:00:00 2001
From: theOehrly <23384863+theOehrly@users.noreply.github.com>
Date: Wed, 23 Aug 2023 17:48:16 +0200
Subject: [PATCH] ENH: generate session split times from lap timing instead of
 from session status (more reliable)

---
 fastf1/_api.py             | 29 ++++++++++++++-----
 fastf1/core.py             | 59 ++++++++++++++++++++++++--------------
 fastf1/req.py              |  2 +-
 fastf1/tests/test_cache.py |  3 +-
 fastf1/tests/test_laps.py  | 30 ++++++++++++++++---
 5 files changed, 89 insertions(+), 34 deletions(-)

diff --git a/fastf1/_api.py b/fastf1/_api.py
index 3570a6196..0bde24025 100644
--- a/fastf1/_api.py
+++ b/fastf1/_api.py
@@ -82,7 +82,6 @@ def make_path(wname, wdate, sname, sdate):
                 'GapToLeader': np.NaN, 'IntervalToPositionAhead': np.NaN}
 
 
-@Cache.api_request_wrapper
 def timing_data(path, response=None, livedata=None):
     """
     .. warning::
@@ -149,7 +148,17 @@ def timing_data(path, response=None, livedata=None):
     Raises:
         SessionNotAvailableError: in case the F1 livetiming api returns no data
     """
+    # wraps _extended_timing_data to provide compatibility to the old return
+    # values
+    laps_data, stream_data, session_split_times \
+        = _extended_timing_data(path, response=response, livedata=livedata)
+    return laps_data, stream_data
+
 
+@Cache.api_request_wrapper
+def _extended_timing_data(path, response=None, livedata=None):
+    # extended over the documentation of ``timing_data``:
+    #   - returns session_split_times for splitting Q1/Q2/Q3 additionally
     # possible optional sanity checks (TODO, maybe):
     #   - inlap has to be followed by outlap
     #   - pit stops may never be negative (missing outlap)
@@ -181,13 +190,19 @@ def timing_data(path, response=None, livedata=None):
     laps_data = {key: list() for key, val in EMPTY_LAPS.items()}
     stream_data = {key: list() for key, val in EMPTY_STREAM.items()}
 
+    session_split_times = [datetime.timedelta(days=1), ] * 3
+
     for drv in resp_per_driver.keys():
-        drv_laps_data = _laps_data_driver(resp_per_driver[drv], EMPTY_LAPS, drv)
+        drv_laps_data, drv_session_split_times \
+            = _laps_data_driver(resp_per_driver[drv], EMPTY_LAPS, drv)
         drv_stream_data = _stream_data_driver(resp_per_driver[drv], EMPTY_STREAM, drv)
 
         if (drv_laps_data is None) or (drv_stream_data is None):
             continue
 
+        for i, split_time in enumerate(drv_session_split_times):
+            session_split_times[i] = min(drv_session_split_times[i], session_split_times[i])
+
         for key in EMPTY_LAPS.keys():
             laps_data[key].extend(drv_laps_data[key])
 
@@ -202,7 +217,7 @@ def timing_data(path, response=None, livedata=None):
     # pandas doesn't correctly infer bool dtype columns, set type explicitly
     laps_data[['IsPersonalBest']] = laps_data[['IsPersonalBest']].astype(bool)
 
-    return laps_data, stream_data
+    return laps_data, stream_data, session_split_times
 
 
 @soft_exceptions("lap alignment",
@@ -459,7 +474,7 @@ def _laps_data_driver(driver_raw, empty_vals, drv):
                 lapcnt += 1
 
     if lapcnt == 0:  # no data at all for this driver
-        return None
+        return None, None
 
     # done reading the data, do postprocessing
 
@@ -490,7 +505,7 @@ def data_in_lap(lap_n):
 
     if not drv_data['Time']:
         # ensure that there is still data left after potentially removing a lap
-        return drv_data
+        return drv_data, session_split_times
 
     for i in range(len(drv_data['Time'])):
         sector_sum = datetime.timedelta(0)
@@ -573,7 +588,7 @@ def data_in_lap(lap_n):
 
     if not drv_data['Time']:
         # ensure that there is still data left after potentially removing a lap
-        return drv_data
+        return drv_data, session_split_times
 
     # more lap sync, this time check which lap triggered with the lowest latency
     for i in range(len(drv_data['Time']) - 1, 0, -1):
@@ -655,7 +670,7 @@ def data_in_lap(lap_n):
             f"integrity error(s) near lap(s): {integrity_errors}.\n"
             f"This might be a bug and should be reported.")
 
-    return drv_data
+    return drv_data, session_split_times
 
 
 def _stream_data_driver(driver_raw, empty_vals, drv):
diff --git a/fastf1/core.py b/fastf1/core.py
index 957cf7c24..80890e713 100644
--- a/fastf1/core.py
+++ b/fastf1/core.py
@@ -1038,6 +1038,8 @@ def __init__(self, event, session_name, f1_api_support=False):
         self._weather_data: pd.DataFrame
         self._results: SessionResults
 
+        self._session_split_times: Optional[list] = None
+
     def __repr__(self):
         return (f"{self.event.year} Season Round {self.event.RoundNumber}: "
                 f"{self.event.EventName} - {self.name}")
@@ -1247,7 +1249,11 @@ def load(self, *, laps=True, telemetry=True, weather=True, messages=True,
 
     @soft_exceptions("lap timing data", "Failed to load timing data!", _logger)
     def _load_laps_data(self, livedata=None):
-        data, _ = api.timing_data(self.api_path, livedata=livedata)
+        data, _, session_split_times \
+            = api._extended_timing_data(self.api_path, livedata=livedata)
+
+        self._session_split_times = session_split_times
+
         app_data = api.timing_app_data(self.api_path, livedata=livedata)
         _logger.info("Processing timing data...")
         # Matching data and app_data. Not super straightforward
@@ -1598,6 +1604,7 @@ def _calculate_quali_like_session_results(self, force=False):
         for i, session in enumerate(sessions):
             session_name = f'Q{i + 1}'
             if session is not None:
+                session = session.pick_quicklaps()  # 107% rule applies per Q
                 laps = (
                     session[~session['LapTime'].isna() & ~session['Deleted']]
                     .copy()
@@ -2772,32 +2779,42 @@ def split_qualifying_sessions(self) -> List[Optional["Laps"]]:
         elif self.session.session_status is None:
             raise ValueError("Session status data is unavailable!")
 
-        # get the timestamps for 'Started' from the session status data
-        # note that after a red flag, a session can be 'Started' as well.
-        # Therefore, it is necessary to check for red flags and ignore
-        # the first 'Started' entry after a red flag.
-        split_times = list()
-        session_suspended = False
-        for _, row in self.session.session_status.iterrows():
-            if row['Status'] == 'Started':
-                if not session_suspended:
-                    split_times.append(row['Time'])
-                else:
+        if self.session._session_split_times:
+            # prefer using the split times that were generated by the timing
+            # data parser, those are more reliable
+            split_times = self.session._session_split_times.copy()
+        else:
+            # get the timestamps for 'Started' from the session status data
+            # note that after a red flag, a session can be 'Started' as well.
+            # Therefore, it is necessary to check for red flags and ignore
+            # the first 'Started' entry after a red flag.
+            split_times = list()
+            session_suspended = False
+            for _, row in self.session.session_status.iterrows():
+                if row['Status'] == 'Started':
+                    if not session_suspended:
+                        split_times.append(row['Time'])
+                    else:
+                        session_suspended = False
+                elif row['Status'] == 'Aborted':
+                    session_suspended = True
+                elif row['Status'] == 'Finished':
+                    # This handles the case when a qualifying session isn't
+                    # restarted after a red flag.
                     session_suspended = False
-            elif row['Status'] == 'Aborted':
-                session_suspended = True
-            elif row['Status'] in ('Finished', 'Inactive'):
-                # This handles the case when a qualifying session isn't
-                # restarted after a red flag.
-                session_suspended = False
 
         # add the very last timestamp, to get an end for the last interval
         split_times.append(self.session.session_status['Time'].iloc[-1])
-
         laps = [None, None, None]
         for i in range(len(split_times) - 1):
-            laps[i] = self[(self['Time'] > split_times[i])
-                           & (self['Time'] < split_times[i + 1])]
+            # split by start time instead of end time, because the split times
+            # that are generated from timing data may not account for crashed
+            # cars being returned or having a generated lap time that results
+            # in a late 'Time' value!
+            laps[i] = self[(self['LapStartTime'] > split_times[i])
+                           & (self['LapStartTime'] < split_times[i + 1])]
+            if laps[i].empty:
+                laps[i] = None
         return laps
 
     def iterlaps(self, require: Optional[Iterable] = None) \
diff --git a/fastf1/req.py b/fastf1/req.py
index 39d01ca9b..7575dd8aa 100644
--- a/fastf1/req.py
+++ b/fastf1/req.py
@@ -198,7 +198,7 @@ class Cache:
     """
     _CACHE_DIR = None
     # version of the api parser code (unrelated to release version number)
-    _API_CORE_VERSION = 10
+    _API_CORE_VERSION = 11
     _IGNORE_VERSION = False
     _FORCE_RENEW = False
 
diff --git a/fastf1/tests/test_cache.py b/fastf1/tests/test_cache.py
index ba3ea601d..62a4d5a16 100644
--- a/fastf1/tests/test_cache.py
+++ b/fastf1/tests/test_cache.py
@@ -98,7 +98,8 @@ def _test_cache_used_and_clear(tmpdir):
         expected_dir_list = ['car_data.ff1pkl', 'position_data.ff1pkl',
                              'driver_info.ff1pkl',
                              'session_status_data.ff1pkl',
-                             'timing_app_data.ff1pkl', 'timing_data.ff1pkl',
+                             'timing_app_data.ff1pkl',
+                             '_extended_timing_data.ff1pkl',
                              'track_status_data.ff1pkl',
                              'weather_data.ff1pkl',
                              'race_control_messages.ff1pkl']
diff --git a/fastf1/tests/test_laps.py b/fastf1/tests/test_laps.py
index c77691686..03798980f 100644
--- a/fastf1/tests/test_laps.py
+++ b/fastf1/tests/test_laps.py
@@ -167,10 +167,15 @@ def test_lap_get_weather_data(reference_laps_data):
 
 
 @pytest.mark.f1telapi
-def test_split_quali_laps():
+@pytest.mark.parametrize("source", ["session_status", "timing_data"])
+def test_split_quali_laps(source):
     session = fastf1.get_session(2023, 2, 'Q')
     session.load(telemetry=False, weather=False)
 
+    if source == "session_status":
+        # delete precalculated split times (from api parser)
+        session._session_split_times = None
+
     q1, q2, q3 = session.laps.split_qualifying_sessions()
 
     assert len(q1['DriverNumber'].unique()) == 20
@@ -179,10 +184,15 @@ def test_split_quali_laps():
 
 
 @pytest.mark.f1telapi
-def test_split_sprint_shootout_laps():
+@pytest.mark.parametrize("source", ["session_status", "timing_data"])
+def test_split_sprint_shootout_laps(source):
     session = fastf1.get_session(2023, 4, 'SS')
     session.load(telemetry=False, weather=False)
 
+    if source == "session_status":
+        # delete precalculated split times (from api parser)
+        session._session_split_times = None
+
     q1, q2, q3 = session.laps.split_qualifying_sessions()
 
     assert len(q1['DriverNumber'].unique()) == 20
@@ -193,13 +203,19 @@ def test_split_sprint_shootout_laps():
 
 
 @pytest.mark.f1telapi
-def test_calculated_quali_results():
+@pytest.mark.parametrize("source", ["session_status", "timing_data"])
+def test_calculated_quali_results(source):
     session = fastf1.get_session(2023, 4, 'Q')
     session.load(telemetry=False, weather=False)
 
     # copy and delete (!) before recalculating
     ergast_results = session.results.copy()
     session.results.loc[:, ('Q1', 'Q2', 'Q3')] = pd.NaT
+
+    if source == "session_status":
+        # delete precalculated split times (from api parser)
+        session._session_split_times = None
+
     session._calculate_quali_like_session_results(force=True)
 
     # Note that differences may exist if one or more drivers didn't set a
@@ -211,7 +227,8 @@ def test_calculated_quali_results():
 
 
 @pytest.mark.f1telapi
-def test_quali_q3_cancelled():
+@pytest.mark.parametrize("source", ["session_status", "timing_data"])
+def test_quali_q3_cancelled(source):
     session = fastf1.get_session(2023, 4, 'Q')
     session.load(telemetry=False, weather=False)
 
@@ -220,6 +237,11 @@ def test_quali_q3_cancelled():
     # no lap data is available.
     session.session_status.drop([13, 14, 15, 16], inplace=True)
     session.results['Q3'] = pd.NaT
+    if source == "session_status":
+        # delete precalculated split times (from api parser)
+        session._session_split_times = None
+    else:
+        session._session_split_times.pop(-1)
 
     # Test split_qualifying_sessions()
     q1, q2, q3 = session.laps.split_qualifying_sessions()