From 376aa6e3d0a32fcccb8309089d147e08458174a1 Mon Sep 17 00:00:00 2001 From: FBruzzesi Date: Mon, 16 Sep 2024 08:58:29 +0200 Subject: [PATCH] docs: improve check api reference --- docs/api-reference/dtypes.md | 1 + utils/check_api_reference.py | 46 ++++++++++++++++++++++++++---------- 2 files changed, 34 insertions(+), 13 deletions(-) diff --git a/docs/api-reference/dtypes.md b/docs/api-reference/dtypes.md index a607e9a54..c21b5c766 100644 --- a/docs/api-reference/dtypes.md +++ b/docs/api-reference/dtypes.md @@ -18,6 +18,7 @@ - Categorical - Enum - String + - Date - Datetime - Duration - Object diff --git a/utils/check_api_reference.py b/utils/check_api_reference.py index 1bf1f086e..7829a31a9 100644 --- a/utils/check_api_reference.py +++ b/utils/check_api_reference.py @@ -28,9 +28,11 @@ "item", "scatter", } +BASE_DTYPES = {"NumericType", "DType", "TemporalType"} -# TODO(Unassigned): make dtypes reference page as well files = {remove_suffix(i, ".py") for i in os.listdir("narwhals")} + +# Top level functions top_level_functions = [ i for i in nw.__dir__() if not i[0].isupper() and i[0] != "_" and i not in files ] @@ -50,7 +52,8 @@ print(extra) # noqa: T201 ret = 1 -top_level_functions = [ +# DataFrame methods +dataframe_methods = [ i for i in nw.from_native(pl.DataFrame()).__dir__() if not i[0].isupper() and i[0] != "_" @@ -62,16 +65,17 @@ for i in content.splitlines() if i.startswith(" - ") and not i.startswith(" - _") ] -if missing := set(top_level_functions).difference(documented): +if missing := set(dataframe_methods).difference(documented): print("DataFrame: not documented") # noqa: T201 print(missing) # noqa: T201 ret = 1 -if extra := set(documented).difference(top_level_functions): +if extra := set(documented).difference(dataframe_methods): print("DataFrame: outdated") # noqa: T201 print(extra) # noqa: T201 ret = 1 -top_level_functions = [ +# LazyFrame methods +lazyframe_methods = [ i for i in nw.from_native(pl.LazyFrame()).__dir__() if not i[0].isupper() and i[0] != "_" @@ -83,16 +87,17 @@ for i in content.splitlines() if i.startswith(" - ") ] -if missing := set(top_level_functions).difference(documented): +if missing := set(lazyframe_methods).difference(documented): print("LazyFrame: not documented") # noqa: T201 print(missing) # noqa: T201 ret = 1 -if extra := set(documented).difference(top_level_functions): +if extra := set(documented).difference(lazyframe_methods): print("LazyFrame: outdated") # noqa: T201 print(extra) # noqa: T201 ret = 1 -top_level_functions = [ +# Series methods +series_methods = [ i for i in nw.from_native(pl.Series(), series_only=True).__dir__() if not i[0].isupper() and i[0] != "_" @@ -104,16 +109,17 @@ for i in content.splitlines() if i.startswith(" - ") and not i.startswith(" - _") ] -if missing := set(top_level_functions).difference(documented).difference(NAMESPACES): +if missing := set(series_methods).difference(documented).difference(NAMESPACES): print("Series: not documented") # noqa: T201 print(missing) # noqa: T201 ret = 1 -if extra := set(documented).difference(top_level_functions): +if extra := set(documented).difference(series_methods): print("Series: outdated") # noqa: T201 print(extra) # noqa: T201 ret = 1 -top_level_functions = [ +# Expr methods +expr_methods = [ i for i in nw.Expr(lambda: 0).__dir__() if not i[0].isupper() and i[0] != "_" ] with open("docs/api-reference/expr.md") as fd: @@ -123,16 +129,30 @@ for i in content.splitlines() if i.startswith(" - ") ] -if missing := set(top_level_functions).difference(documented).difference(NAMESPACES): +if missing := set(expr_methods).difference(documented).difference(NAMESPACES): print("Expr: not documented") # noqa: T201 print(missing) # noqa: T201 ret = 1 -if extra := set(documented).difference(top_level_functions): +if extra := set(documented).difference(expr_methods): print("Expr: outdated") # noqa: T201 print(extra) # noqa: T201 ret = 1 # DTypes +dtypes = [ + i for i in nw.dtypes.__dir__() if i[0].isupper() and not i.isupper() and i[0] != "_" +] +with open("docs/api-reference/dtypes.md") as fd: + content = fd.read() +documented = [ + remove_prefix(i, " - ") + for i in content.splitlines() + if i.startswith(" - ") and not i.startswith(" - _") +] +if missing := set(dtypes).difference(documented).difference(BASE_DTYPES): + print("Dtype: not documented") # noqa: T201 + print(missing) # noqa: T201 + ret = 1 # dt