Skip to content

Commit

Permalink
fix(py): modify the Tardis CSV schema definition to handle string IDs…
Browse files Browse the repository at this point in the history
… in trade data.
  • Loading branch information
nkaz001 committed Dec 6, 2024
1 parent 11d0067 commit 6e867df
Showing 1 changed file with 5 additions and 26 deletions.
31 changes: 5 additions & 26 deletions py-hftbacktest/hftbacktest/data/utils/tardis.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,34 +18,13 @@
event_dtype
)

trade_cols = [
'exchange',
'symbol',
'timestamp',
'local_timestamp',
'id',
'side',
'price',
'amount'
]

depth_cols = [
'exchange',
'symbol',
'timestamp',
'local_timestamp',
'is_snapshot',
'side',
'price',
'amount'
]

trade_schema = {
'exchange': pl.String,
'symbol': pl.String,
'timestamp': pl.Int64,
'local_timestamp': pl.Int64,
'id': pl.UInt64,
'id': pl.String,
'side': pl.String,
'price': pl.Float64,
'amount': pl.Float64,
Expand Down Expand Up @@ -125,16 +104,16 @@ def convert(
with open(file) as f:
line = f.readline()
header = line.strip().split(',')
if header == trade_cols:
if header == list(trade_schema.keys()):
schema = trade_schema
elif header == depth_cols:
elif header == list(depth_schema.keys()):
schema = depth_schema
except:
# Fails to infer the file type; let Polars infer the schema.
pass

df = pl.read_csv(file, schema=schema)
if df.columns == trade_cols:
if df.columns == list(trade_schema.keys()):
arr = (
df.with_columns(
pl.when(pl.col('side') == 'buy')
Expand Down Expand Up @@ -171,7 +150,7 @@ def convert(
)
tmp[row_num:row_num + len(arr)] = arr[:]
row_num += len(arr)
elif df.columns == depth_cols:
elif df.columns == list(depth_schema.keys()):
arr = (
df.with_columns(
(pl.col('timestamp') * 1000)
Expand Down

0 comments on commit 6e867df

Please sign in to comment.