Skip to content

Pandas Cheatsheet

David Brazda edited this page Jun 4, 2024 · 2 revisions

Persistence

Parquet

filename = str(symbol) + "_" + day_start.strftime("%Y%m%d_%H%M%S") + "_" + day_stop.strftime("%Y%m%d_%H%M%S") + ".parquet
try:
   df=pd.read_parquet(filename,engine='pyarrow')
   print("Loaded from " + filename)
except FileNotFoundError:
   print("File not found, remote fetching")
   #df = fetch_stock_trades(symbol, day_start, day_stop)
   #fetches daily main session trade data
   df = fetch_trades_parallel(symbol, day_start, day_stop)
   print(df)
   df.to_parquet(filename, engine='pyarrow', compression='gzip')
   print("Saved as " + filename)

Multiindex

MI Time filtering

#filtr casu na multiindexu
       start_time = pd.Timestamp(start.time(), tz=zoneNY).time()
        end_time = pd.Timestamp(end.time(), tz=zoneNY).time()

        # Create a mask to filter rows within the specified time range
        mask = (df.index.get_level_values('timestamp').time >= start_time) & \
            (df.index.get_level_values('timestamp').time <= end_time)

        # Apply the mask to the DataFrame
        df = df[mask]

accessing (multiindex symbol, datetime)

ohlcv_df = ohlcv_df.loc[("BAC","2024-02-12 10:30"):("BAC", "2024-02-14 12:00")]

Analysis

rows_per_day = ohlcv_df.groupby(ohlcv_df.index.date).size() #per day
df.index.normalize().nunique #number of days
ohlcv_df.groupby(pd.Grouper(freq='W')).ngroups #number of weeks
Clone this wiki locally