Skip to content

Commit

Permalink
benchmark script used for some performance optimizations
Browse files Browse the repository at this point in the history
  • Loading branch information
mhowison committed Jun 23, 2024
1 parent 073867c commit 1ec407e
Showing 1 changed file with 41 additions and 0 deletions.
41 changes: 41 additions & 0 deletions test/benchmark.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import pandas as pd
from collections import Counter
from timeit import timeit

print(
"pandas read_csv:",
timeit(
'pd.read_csv("dataset.csv")',
setup='import pandas as pd',
number=10
)
)

print(
"pandas read_csv dtype=str:",
timeit(
'pd.read_csv("dataset.csv", dtype=str)',
setup='import pandas as pd',
number=10
)
)

df = pd.read_csv("dataset.csv")

for col in df.columns:
print(col)
print(
" pandas value_counts:",
timeit(
'df[col].value_counts()',
globals={"df": df, "col": col},
number=10
)
)
print(" Counter:",
timeit(
'Counter(df[col])',
globals={"df": df, "col": col, "Counter": Counter},
number=10
)
)

0 comments on commit 1ec407e

Please sign in to comment.