From 745d48be953288f4034b7a386fc81e7b13cd1422 Mon Sep 17 00:00:00 2001 From: theOehrly <23384863+theOehrly@users.noreply.github.com> Date: Wed, 23 Aug 2023 17:48:16 +0200 Subject: [PATCH] ENH: generate session split times from lap timing instead of from session status (more reliable) --- fastf1/_api.py | 29 ++++++++++++++----- fastf1/core.py | 59 ++++++++++++++++++++++++-------------- fastf1/req.py | 2 +- fastf1/tests/test_cache.py | 3 +- fastf1/tests/test_laps.py | 30 ++++++++++++++++--- 5 files changed, 89 insertions(+), 34 deletions(-) diff --git a/fastf1/_api.py b/fastf1/_api.py index 3570a6196..0bde24025 100644 --- a/fastf1/_api.py +++ b/fastf1/_api.py @@ -82,7 +82,6 @@ def make_path(wname, wdate, sname, sdate): 'GapToLeader': np.NaN, 'IntervalToPositionAhead': np.NaN} -@Cache.api_request_wrapper def timing_data(path, response=None, livedata=None): """ .. warning:: @@ -149,7 +148,17 @@ def timing_data(path, response=None, livedata=None): Raises: SessionNotAvailableError: in case the F1 livetiming api returns no data """ + # wraps _extended_timing_data to provide compatibility to the old return + # values + laps_data, stream_data, session_split_times \ + = _extended_timing_data(path, response=response, livedata=livedata) + return laps_data, stream_data + +@Cache.api_request_wrapper +def _extended_timing_data(path, response=None, livedata=None): + # extended over the documentation of ``timing_data``: + # - returns session_split_times for splitting Q1/Q2/Q3 additionally # possible optional sanity checks (TODO, maybe): # - inlap has to be followed by outlap # - pit stops may never be negative (missing outlap) @@ -181,13 +190,19 @@ def timing_data(path, response=None, livedata=None): laps_data = {key: list() for key, val in EMPTY_LAPS.items()} stream_data = {key: list() for key, val in EMPTY_STREAM.items()} + session_split_times = [datetime.timedelta(days=1), ] * 3 + for drv in resp_per_driver.keys(): - drv_laps_data = _laps_data_driver(resp_per_driver[drv], EMPTY_LAPS, drv) + drv_laps_data, drv_session_split_times \ + = _laps_data_driver(resp_per_driver[drv], EMPTY_LAPS, drv) drv_stream_data = _stream_data_driver(resp_per_driver[drv], EMPTY_STREAM, drv) if (drv_laps_data is None) or (drv_stream_data is None): continue + for i, split_time in enumerate(drv_session_split_times): + session_split_times[i] = min(drv_session_split_times[i], session_split_times[i]) + for key in EMPTY_LAPS.keys(): laps_data[key].extend(drv_laps_data[key]) @@ -202,7 +217,7 @@ def timing_data(path, response=None, livedata=None): # pandas doesn't correctly infer bool dtype columns, set type explicitly laps_data[['IsPersonalBest']] = laps_data[['IsPersonalBest']].astype(bool) - return laps_data, stream_data + return laps_data, stream_data, session_split_times @soft_exceptions("lap alignment", @@ -459,7 +474,7 @@ def _laps_data_driver(driver_raw, empty_vals, drv): lapcnt += 1 if lapcnt == 0: # no data at all for this driver - return None + return None, None # done reading the data, do postprocessing @@ -490,7 +505,7 @@ def data_in_lap(lap_n): if not drv_data['Time']: # ensure that there is still data left after potentially removing a lap - return drv_data + return drv_data, session_split_times for i in range(len(drv_data['Time'])): sector_sum = datetime.timedelta(0) @@ -573,7 +588,7 @@ def data_in_lap(lap_n): if not drv_data['Time']: # ensure that there is still data left after potentially removing a lap - return drv_data + return drv_data, session_split_times # more lap sync, this time check which lap triggered with the lowest latency for i in range(len(drv_data['Time']) - 1, 0, -1): @@ -655,7 +670,7 @@ def data_in_lap(lap_n): f"integrity error(s) near lap(s): {integrity_errors}.\n" f"This might be a bug and should be reported.") - return drv_data + return drv_data, session_split_times def _stream_data_driver(driver_raw, empty_vals, drv): diff --git a/fastf1/core.py b/fastf1/core.py index 957cf7c24..80890e713 100644 --- a/fastf1/core.py +++ b/fastf1/core.py @@ -1038,6 +1038,8 @@ def __init__(self, event, session_name, f1_api_support=False): self._weather_data: pd.DataFrame self._results: SessionResults + self._session_split_times: Optional[list] = None + def __repr__(self): return (f"{self.event.year} Season Round {self.event.RoundNumber}: " f"{self.event.EventName} - {self.name}") @@ -1247,7 +1249,11 @@ def load(self, *, laps=True, telemetry=True, weather=True, messages=True, @soft_exceptions("lap timing data", "Failed to load timing data!", _logger) def _load_laps_data(self, livedata=None): - data, _ = api.timing_data(self.api_path, livedata=livedata) + data, _, session_split_times \ + = api._extended_timing_data(self.api_path, livedata=livedata) + + self._session_split_times = session_split_times + app_data = api.timing_app_data(self.api_path, livedata=livedata) _logger.info("Processing timing data...") # Matching data and app_data. Not super straightforward @@ -1598,6 +1604,7 @@ def _calculate_quali_like_session_results(self, force=False): for i, session in enumerate(sessions): session_name = f'Q{i + 1}' if session is not None: + session = session.pick_quicklaps() # 107% rule applies per Q laps = ( session[~session['LapTime'].isna() & ~session['Deleted']] .copy() @@ -2772,32 +2779,42 @@ def split_qualifying_sessions(self) -> List[Optional["Laps"]]: elif self.session.session_status is None: raise ValueError("Session status data is unavailable!") - # get the timestamps for 'Started' from the session status data - # note that after a red flag, a session can be 'Started' as well. - # Therefore, it is necessary to check for red flags and ignore - # the first 'Started' entry after a red flag. - split_times = list() - session_suspended = False - for _, row in self.session.session_status.iterrows(): - if row['Status'] == 'Started': - if not session_suspended: - split_times.append(row['Time']) - else: + if self.session._session_split_times: + # prefer using the split times that were generated by the timing + # data parser, those are more reliable + split_times = self.session._session_split_times.copy() + else: + # get the timestamps for 'Started' from the session status data + # note that after a red flag, a session can be 'Started' as well. + # Therefore, it is necessary to check for red flags and ignore + # the first 'Started' entry after a red flag. + split_times = list() + session_suspended = False + for _, row in self.session.session_status.iterrows(): + if row['Status'] == 'Started': + if not session_suspended: + split_times.append(row['Time']) + else: + session_suspended = False + elif row['Status'] == 'Aborted': + session_suspended = True + elif row['Status'] == 'Finished': + # This handles the case when a qualifying session isn't + # restarted after a red flag. session_suspended = False - elif row['Status'] == 'Aborted': - session_suspended = True - elif row['Status'] in ('Finished', 'Inactive'): - # This handles the case when a qualifying session isn't - # restarted after a red flag. - session_suspended = False # add the very last timestamp, to get an end for the last interval split_times.append(self.session.session_status['Time'].iloc[-1]) - laps = [None, None, None] for i in range(len(split_times) - 1): - laps[i] = self[(self['Time'] > split_times[i]) - & (self['Time'] < split_times[i + 1])] + # split by start time instead of end time, because the split times + # that are generated from timing data may not account for crashed + # cars being returned or having a generated lap time that results + # in a late 'Time' value! + laps[i] = self[(self['LapStartTime'] > split_times[i]) + & (self['LapStartTime'] < split_times[i + 1])] + if laps[i].empty: + laps[i] = None return laps def iterlaps(self, require: Optional[Iterable] = None) \ diff --git a/fastf1/req.py b/fastf1/req.py index 39d01ca9b..7575dd8aa 100644 --- a/fastf1/req.py +++ b/fastf1/req.py @@ -198,7 +198,7 @@ class Cache: """ _CACHE_DIR = None # version of the api parser code (unrelated to release version number) - _API_CORE_VERSION = 10 + _API_CORE_VERSION = 11 _IGNORE_VERSION = False _FORCE_RENEW = False diff --git a/fastf1/tests/test_cache.py b/fastf1/tests/test_cache.py index ba3ea601d..62a4d5a16 100644 --- a/fastf1/tests/test_cache.py +++ b/fastf1/tests/test_cache.py @@ -98,7 +98,8 @@ def _test_cache_used_and_clear(tmpdir): expected_dir_list = ['car_data.ff1pkl', 'position_data.ff1pkl', 'driver_info.ff1pkl', 'session_status_data.ff1pkl', - 'timing_app_data.ff1pkl', 'timing_data.ff1pkl', + 'timing_app_data.ff1pkl', + '_extended_timing_data.ff1pkl', 'track_status_data.ff1pkl', 'weather_data.ff1pkl', 'race_control_messages.ff1pkl'] diff --git a/fastf1/tests/test_laps.py b/fastf1/tests/test_laps.py index c77691686..03798980f 100644 --- a/fastf1/tests/test_laps.py +++ b/fastf1/tests/test_laps.py @@ -167,10 +167,15 @@ def test_lap_get_weather_data(reference_laps_data): @pytest.mark.f1telapi -def test_split_quali_laps(): +@pytest.mark.parametrize("source", ["session_status", "timing_data"]) +def test_split_quali_laps(source): session = fastf1.get_session(2023, 2, 'Q') session.load(telemetry=False, weather=False) + if source == "session_status": + # delete precalculated split times (from api parser) + session._session_split_times = None + q1, q2, q3 = session.laps.split_qualifying_sessions() assert len(q1['DriverNumber'].unique()) == 20 @@ -179,10 +184,15 @@ def test_split_quali_laps(): @pytest.mark.f1telapi -def test_split_sprint_shootout_laps(): +@pytest.mark.parametrize("source", ["session_status", "timing_data"]) +def test_split_sprint_shootout_laps(source): session = fastf1.get_session(2023, 4, 'SS') session.load(telemetry=False, weather=False) + if source == "session_status": + # delete precalculated split times (from api parser) + session._session_split_times = None + q1, q2, q3 = session.laps.split_qualifying_sessions() assert len(q1['DriverNumber'].unique()) == 20 @@ -193,13 +203,19 @@ def test_split_sprint_shootout_laps(): @pytest.mark.f1telapi -def test_calculated_quali_results(): +@pytest.mark.parametrize("source", ["session_status", "timing_data"]) +def test_calculated_quali_results(source): session = fastf1.get_session(2023, 4, 'Q') session.load(telemetry=False, weather=False) # copy and delete (!) before recalculating ergast_results = session.results.copy() session.results.loc[:, ('Q1', 'Q2', 'Q3')] = pd.NaT + + if source == "session_status": + # delete precalculated split times (from api parser) + session._session_split_times = None + session._calculate_quali_like_session_results(force=True) # Note that differences may exist if one or more drivers didn't set a @@ -211,7 +227,8 @@ def test_calculated_quali_results(): @pytest.mark.f1telapi -def test_quali_q3_cancelled(): +@pytest.mark.parametrize("source", ["session_status", "timing_data"]) +def test_quali_q3_cancelled(source): session = fastf1.get_session(2023, 4, 'Q') session.load(telemetry=False, weather=False) @@ -220,6 +237,11 @@ def test_quali_q3_cancelled(): # no lap data is available. session.session_status.drop([13, 14, 15, 16], inplace=True) session.results['Q3'] = pd.NaT + if source == "session_status": + # delete precalculated split times (from api parser) + session._session_split_times = None + else: + session._session_split_times.pop(-1) # Test split_qualifying_sessions() q1, q2, q3 = session.laps.split_qualifying_sessions()