From 8c8d4fbe500e38ba41c7654a162c33163311f310 Mon Sep 17 00:00:00 2001 From: Liam Brannigan Date: Sat, 3 Feb 2024 04:06:26 +0000 Subject: [PATCH] docs(python): Add visualisation page to user guide (#13052) Co-authored-by: Stijn de Gooijer --- docs/development/contributing/index.md | 4 +- docs/requirements.txt | 3 + .../python/user-guide/misc/visualization.py | 130 ++++++++++++++++++ docs/user-guide/misc/visualization.md | 60 ++++++++ mkdocs.yml | 1 + py-polars/tests/docs/test_user_guide.py | 3 + 6 files changed, 199 insertions(+), 2 deletions(-) create mode 100644 docs/src/python/user-guide/misc/visualization.py create mode 100644 docs/user-guide/misc/visualization.md diff --git a/docs/development/contributing/index.md b/docs/development/contributing/index.md index e78517df33c7..70ef14cf8e85 100644 --- a/docs/development/contributing/index.md +++ b/docs/development/contributing/index.md @@ -150,9 +150,9 @@ The user guide is maintained in the `docs/user-guide` folder. Before creating a #### Building and serving the user guide -The user guide is built using [MkDocs](https://www.mkdocs.org/). You install the dependencies for building the user guide by running `make requirements` in the root of the repo. +The user guide is built using [MkDocs](https://www.mkdocs.org/). You install the dependencies for building the user guide by running `make build` in the root of the repo. -Run `mkdocs serve` to build and serve the user guide, so you can view it locally and see updates as you make changes. +Activate the virtual environment and run `mkdocs serve` to build and serve the user guide, so you can view it locally and see updates as you make changes. #### Creating a new user guide page diff --git a/docs/requirements.txt b/docs/requirements.txt index d87ce3879ae2..dccf92dd62d1 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -2,6 +2,9 @@ pandas pyarrow graphviz matplotlib +seaborn +plotly +altair mkdocs-material==9.5.2 mkdocs-macros-plugin==1.0.4 diff --git a/docs/src/python/user-guide/misc/visualization.py b/docs/src/python/user-guide/misc/visualization.py new file mode 100644 index 000000000000..f04288cb7812 --- /dev/null +++ b/docs/src/python/user-guide/misc/visualization.py @@ -0,0 +1,130 @@ +# --8<-- [start:dataframe] +import polars as pl + +path = "docs/data/iris.csv" + +df = pl.scan_csv(path).group_by("species").agg(pl.col("petal_length").mean()).collect() +print(df) +# --8<-- [end:dataframe] + +""" +# --8<-- [start:hvplot_show_plot] +df.plot.bar( + x="species", + y="petal_length", + width=650, +) +# --8<-- [end:hvplot_show_plot] +""" + +# --8<-- [start:hvplot_make_plot] +import hvplot + +plot = df.plot.bar( + x="species", + y="petal_length", + width=650, +) +hvplot.save(plot, "docs/images/hvplot_bar.html") +with open("docs/images/hvplot_bar.html", "r") as f: + chart_html = f.read() + print(f"{chart_html}") +# --8<-- [end:hvplot_make_plot] + +""" +# --8<-- [start:matplotlib_show_plot] +import matplotlib.pyplot as plt + +plt.bar(x=df["species"], height=df["petal_length"]) +# --8<-- [end:matplotlib_show_plot] +""" + +# --8<-- [start:matplotlib_make_plot] +import base64 + +import matplotlib.pyplot as plt + +plt.bar(x=df["species"], height=df["petal_length"]) +plt.savefig("docs/images/matplotlib_bar.png") +with open("docs/images/matplotlib_bar.png", "rb") as f: + png = base64.b64encode(f.read()).decode() + print(f'') +# --8<-- [end:matplotlib_make_plot] + +""" +# --8<-- [start:seaborn_show_plot] +import seaborn as sns +sns.barplot( + df, + x="species", + y="petal_length", +) +# --8<-- [end:seaborn_show_plot] +""" + +# --8<-- [start:seaborn_make_plot] +import seaborn as sns + +sns.barplot( + df, + x="species", + y="petal_length", +) +plt.savefig("docs/images/seaborn_bar.png") +with open("docs/images/seaborn_bar.png", "rb") as f: + png = base64.b64encode(f.read()).decode() + print(f'') +# --8<-- [end:seaborn_make_plot] + +""" +# --8<-- [start:plotly_show_plot] +import plotly.express as px + +px.bar( + df, + x="species", + y="petal_length", + width=400, +) +# --8<-- [end:plotly_show_plot] +""" + +# --8<-- [start:plotly_make_plot] +import plotly.express as px + +fig = px.bar( + df, + x="species", + y="petal_length", + width=650, +) +fig.write_html("docs/images/plotly_bar.html", full_html=False, include_plotlyjs="cdn") +with open("docs/images/plotly_bar.html", "r") as f: + chart_html = f.read() + print(f"{chart_html}") +# --8<-- [end:plotly_make_plot] + +""" +# --8<-- [start:altair_show_plot] +import altair as alt + +alt.Chart(df, width=700).mark_bar().encode(x="species:N", y="petal_length:Q") +# --8<-- [end:altair_show_plot] +""" + +# --8<-- [start:altair_make_plot] +import altair as alt + +chart = ( + alt.Chart(df, width=600) + .mark_bar() + .encode( + x="species:N", + y="petal_length:Q", + ) +) +chart.save("docs/images/altair_bar.html") +with open("docs/images/altair_bar.html", "r") as f: + chart_html = f.read() + print(f"{chart_html}") +# --8<-- [end:altair_make_plot] diff --git a/docs/user-guide/misc/visualization.md b/docs/user-guide/misc/visualization.md new file mode 100644 index 000000000000..88dcd83a18a6 --- /dev/null +++ b/docs/user-guide/misc/visualization.md @@ -0,0 +1,60 @@ +# Visualization + +Data in a Polars `DataFrame` can be visualized using common visualization libraries. + +We illustrate plotting capabilities using the Iris dataset. We scan a CSV and then do a group-by on the `species` column and get the mean of the `petal_length`. + +{{code_block('user-guide/misc/visualization','dataframe',[])}} + +```python exec="on" result="text" session="user-guide/misc/visualization" +--8<-- "python/user-guide/misc/visualization.py:dataframe" +``` + +## Built-in plotting with hvPlot + +Polars has a `plot` method to create interactive plots using [hvPlot](https://hvplot.holoviz.org/). + +{{code_block('user-guide/misc/visualization','hvplot_show_plot',[])}} + +```python exec="on" session="user-guide/misc/visualization" +--8<-- "python/user-guide/misc/visualization.py:hvplot_make_plot" +``` + +## Matplotlib + +To create a bar chart we can pass columns of a `DataFrame` directly to Matplotlib as a `Series` for each column. Matplotlib does not have explicit support for Polars objects but Matplotlib can accept a Polars `Series` because it can convert each Series to a numpy array, which is zero-copy for numeric +data without null values. + +{{code_block('user-guide/misc/visualization','matplotlib_show_plot',[])}} + +```python exec="on" session="user-guide/misc/visualization" +--8<-- "python/user-guide/misc/visualization.py:matplotlib_make_plot" +``` + +## Seaborn, Plotly & Altair + +[Seaborn](https://seaborn.pydata.org/), [Plotly](https://plotly.com/) & [Altair](https://altair-viz.github.io/) can accept a Polars `DataFrame` by leveraging the [dataframe interchange protocol](https://data-apis.org/dataframe-api/), which offers zero-copy conversion where possible. + +### Seaborn + +{{code_block('user-guide/misc/visualization','seaborn_show_plot',[])}} + +```python exec="on" session="user-guide/misc/visualization" +--8<-- "python/user-guide/misc/visualization.py:seaborn_make_plot" +``` + +### Plotly + +{{code_block('user-guide/misc/visualization','plotly_show_plot',[])}} + +```python exec="on" session="user-guide/misc/visualization" +--8<-- "python/user-guide/misc/visualization.py:plotly_make_plot" +``` + +### Altair + +{{code_block('user-guide/misc/visualization','altair_show_plot',[])}} + +```python exec="on" session="user-guide/misc/visualization" +--8<-- "python/user-guide/misc/visualization.py:altair_make_plot" +``` diff --git a/mkdocs.yml b/mkdocs.yml index 3fe9fb6ecfdb..805c235e5734 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -80,6 +80,7 @@ nav: - user-guide/ecosystem.md - Misc: - user-guide/misc/multiprocessing.md + - user-guide/misc/visualization.md - user-guide/misc/comparison.md - API reference: api/index.md diff --git a/py-polars/tests/docs/test_user_guide.py b/py-polars/tests/docs/test_user_guide.py index 032961dd936a..dc71ca5d8f23 100644 --- a/py-polars/tests/docs/test_user_guide.py +++ b/py-polars/tests/docs/test_user_guide.py @@ -15,6 +15,9 @@ python_snippets_dir = repo_root / "docs" / "src" / "python" snippet_paths = list(python_snippets_dir.rglob("*.py")) +# Skip visualization snippets +snippet_paths = [p for p in snippet_paths if "visualization" not in str(p)] + @pytest.fixture(scope="module") def _change_test_dir() -> Iterator[None]: