Skip to content

Commit

Permalink
TEST-modin-project#5903: Add third party unit tests for interoperability
Browse files Browse the repository at this point in the history
Signed-off-by: Labanya Mukhopadhyay <[email protected]>
  • Loading branch information
labanyamukhopadhyay committed Mar 30, 2023
1 parent edccfd0 commit 8c7d444
Show file tree
Hide file tree
Showing 56 changed files with 38,812 additions and 0 deletions.
1 change: 1 addition & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -750,6 +750,7 @@ jobs:
- run: pip install "dfsql>=0.4.2" "pyparsing<=2.4.7" && python -m pytest modin/experimental/sql/test/test_sql.py
- run: python -m pytest modin/test/interchange/dataframe_protocol/test_general.py
- run: python -m pytest modin/test/interchange/dataframe_protocol/pandas/test_protocol.py
- run: python -m pytest modin/pandas/test/interoperability
- uses: ./.github/workflows/upload-coverage

test-experimental:
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/push-to-master.yml
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ jobs:
python -m pytest modin/pandas/test/test_general.py
python -m pytest modin/pandas/test/test_io.py
python -m pytest modin/experimental/pandas/test/test_io_exp.py
python -m pytest modin/pandas/test/interoperability
test-docs:
runs-on: ubuntu-latest
Expand Down
144 changes: 144 additions & 0 deletions modin/pandas/test/interoperability/matplotlib/test_axes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
import numpy as np

import matplotlib.pyplot as plt

import modin.pandas as pd

from matplotlib.testing.decorators import (
check_figures_equal,
)

# Note: Some test cases are run twice: once normally and once with labeled data
# These two must be defined in the same test function or need to have
# different baseline images to prevent race conditions when pytest runs
# the tests with multiple threads.


@check_figures_equal(extensions=["png"])
def test_invisible_axes(fig_test, fig_ref):
ax = fig_test.subplots()
ax.set_visible(False)


def test_boxplot_dates_pandas():
# import modin.pandas as pd

# smoke test for boxplot and dates in pandas
data = np.random.rand(5, 2)
years = pd.date_range("1/1/2000", periods=2, freq=pd.DateOffset(years=1)).year
plt.figure()
plt.boxplot(data, positions=years)


def test_bar_pandas():
# Smoke test for pandas
df = pd.DataFrame(
{
"year": [2018, 2018, 2018],
"month": [1, 1, 1],
"day": [1, 2, 3],
"value": [1, 2, 3],
}
)
df["date"] = pd.to_datetime(df[["year", "month", "day"]])

monthly = df[["date", "value"]].groupby(["date"]).sum()
dates = monthly.index
forecast = monthly["value"]
baseline = monthly["value"]

fig, ax = plt.subplots()
ax.bar(dates, forecast, width=10, align="center")
ax.plot(dates, baseline, color="orange", lw=4)


def test_bar_pandas_indexed():
# Smoke test for indexed pandas
df = pd.DataFrame({"x": [1.0, 2.0, 3.0], "width": [0.2, 0.4, 0.6]}, index=[1, 2, 3])
fig, ax = plt.subplots()
ax.bar(df.x, 1.0, width=df.width)


def test_pandas_minimal_plot():
# smoke test that series and index objects do not warn
for x in [pd.Series([1, 2], dtype="float64"), pd.Series([1, 2], dtype="Float64")]:
plt.plot(x, x)
plt.plot(x.index, x)
plt.plot(x)
plt.plot(x.index)
df = pd.DataFrame({"col": [1, 2, 3]})
plt.plot(df)
plt.plot(df, df)


@check_figures_equal(extensions=["png"])
def test_violinplot_pandas_series(fig_test, fig_ref):
np.random.seed(110433579)
s1 = pd.Series(np.random.normal(size=7), index=[9, 8, 7, 6, 5, 4, 3])
s2 = pd.Series(np.random.normal(size=9), index=list("ABCDEFGHI"))
s3 = pd.Series(np.random.normal(size=11))
fig_test.subplots().violinplot([s1, s2, s3])
fig_ref.subplots().violinplot([s1.values, s2.values, s3.values])


def test_pandas_pcolormesh():
time = pd.date_range("2000-01-01", periods=10)
depth = np.arange(20)
data = np.random.rand(19, 9)

fig, ax = plt.subplots()
ax.pcolormesh(time, depth, data)


def test_pandas_indexing_dates():
dates = np.arange("2005-02", "2005-03", dtype="datetime64[D]")
values = np.sin(range(len(dates)))
df = pd.DataFrame({"dates": dates, "values": values})

ax = plt.gca()

without_zero_index = df[np.array(df.index) % 2 == 1].copy()
ax.plot("dates", "values", data=without_zero_index)


def test_pandas_errorbar_indexing():
df = pd.DataFrame(
np.random.uniform(size=(5, 4)),
columns=["x", "y", "xe", "ye"],
index=[1, 2, 3, 4, 5],
)
fig, ax = plt.subplots()
ax.errorbar("x", "y", xerr="xe", yerr="ye", data=df)


def test_pandas_index_shape():
df = pd.DataFrame({"XX": [4, 5, 6], "YY": [7, 1, 2]})
fig, ax = plt.subplots()
ax.plot(df.index, df["YY"])


def test_pandas_indexing_hist():
ser_1 = pd.Series(data=[1, 2, 2, 3, 3, 4, 4, 4, 4, 5])
ser_2 = ser_1.iloc[1:]
fig, ax = plt.subplots()
ax.hist(ser_2)


def test_pandas_bar_align_center():
# Tests fix for issue 8767
df = pd.DataFrame({"a": range(2), "b": range(2)})

fig, ax = plt.subplots(1)

ax.bar(df.loc[df["a"] == 1, "b"], df.loc[df["a"] == 1, "b"], align="center")

fig.canvas.draw()


def test_scatter_series_non_zero_index():
# create non-zero index
ids = range(10, 18)
x = pd.Series(np.random.uniform(size=8), index=ids)
y = pd.Series(np.random.uniform(size=8), index=ids)
c = pd.Series([1, 1, 1, 1, 1, 0, 0, 0], index=ids)
plt.scatter(x, y, c)
30 changes: 30 additions & 0 deletions modin/pandas/test/interoperability/matplotlib/test_cbook.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import numpy as np
import modin.pandas as pd
from matplotlib import cbook


def test_reshape2d_pandas():
# separate to allow the rest of the tests to run if no pandas...
X = np.arange(30).reshape(10, 3)
x = pd.DataFrame(X, columns=["a", "b", "c"])
Xnew = cbook._reshape_2D(x, "x")
# Need to check each row because _reshape_2D returns a list of arrays:
for x, xnew in zip(X.T, Xnew):
np.testing.assert_array_equal(x, xnew)


def test_index_of_pandas():
# separate to allow the rest of the tests to run if no pandas...
X = np.arange(30).reshape(10, 3)
x = pd.DataFrame(X, columns=["a", "b", "c"])
Idx, Xnew = cbook.index_of(x)
np.testing.assert_array_equal(X, Xnew)
IdxRef = np.arange(10)
np.testing.assert_array_equal(Idx, IdxRef)


def test_safe_first_element_pandas_series():
# deliberately create a pandas series with index not starting from 0
s = pd.Series(range(5), index=range(10, 15))
actual = cbook._safe_first_finite(s)
assert actual == 0
19 changes: 19 additions & 0 deletions modin/pandas/test/interoperability/matplotlib/test_collections.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import modin.pandas as pd
from matplotlib.collections import Collection


def test_pandas_indexing():

# Should not fail break when faced with a
# non-zero indexed series
index = [11, 12, 13]
ec = fc = pd.Series(["red", "blue", "green"], index=index)
lw = pd.Series([1, 2, 3], index=index)
ls = pd.Series(["solid", "dashed", "dashdot"], index=index)
aa = pd.Series([True, False, True], index=index)

Collection(edgecolors=ec)
Collection(facecolors=fc)
Collection(linewidths=lw)
Collection(linestyles=ls)
Collection(antialiaseds=aa)
16 changes: 16 additions & 0 deletions modin/pandas/test/interoperability/matplotlib/test_colors.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
from numpy.testing import assert_array_equal

import matplotlib.colors as mcolors

import modin.pandas as pd


def test_pandas_iterable():
# Using a list or series yields equivalent
# colormaps, i.e the series isn't seen as
# a single color
lst = ["red", "blue", "green"]
s = pd.Series(lst)
cm1 = mcolors.ListedColormap(lst, N=5)
cm2 = mcolors.ListedColormap(s, N=5)
assert_array_equal(cm1.colors, cm2.colors)
Loading

0 comments on commit 8c7d444

Please sign in to comment.