diff --git a/.nojekyll b/.nojekyll new file mode 100644 index 0000000..e69de29 diff --git a/404.html b/404.html new file mode 100644 index 0000000..9c77c97 --- /dev/null +++ b/404.html @@ -0,0 +1,1348 @@ + + + +
+ + + + + + + + + + + + + + +Define generic class for selecting columns based on expressions.
+ + + + + + + + + +Selector capturing column names matching the pattern specified.
+ +src/tidy_tools/core/selector.py
Selector capturing column names containing the exact pattern specified.
+ + +Selector capturing column names starting with the exact pattern specified.
+ + +Selector capturing column names ending with the exact pattern specified.
+ + +filter_nulls(self: DataFrame, *columns: ColumnReference, strict: bool = False, invert: bool = False) -> DataFrame
+
Keep all observations that represent null across all column(s).
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ self
+ |
+
+ DataFrame
+ |
+
+
+
+ Object inheriting from PySpark DataFrame. + |
+ + required + | +
+ *columns
+ |
+
+ ColumnReference
+ |
+
+
+
+ Arbitrary number of column references. All columns must exist in |
+
+ ()
+ |
+
+ strict
+ |
+
+ bool
+ |
+
+
+
+ Should condition be true for all column(s)? + |
+
+ False
+ |
+
+ invert
+ |
+
+ bool
+ |
+
+
+
+ Should observations that meet condition be kept (False) or removed (True)? + |
+
+ False
+ |
+
Returns:
+Type | +Description | +
---|---|
+ DataFrame
+ |
+
+
+
+ Observations that represent null across all column(s). + |
+
src/tidy_tools/core/filter.py
filter_regex(self: DataFrame, *columns: ColumnReference, pattern: str, strict: bool = False, invert: bool = False) -> DataFrame
+
Keep all observations that match the regular expression across all column(s).
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ self
+ |
+
+ DataFrame
+ |
+
+
+
+ Object inheriting from PySpark DataFrame. + |
+ + required + | +
+ *columns
+ |
+
+ ColumnReference
+ |
+
+
+
+ Arbitrary number of column references. All columns must exist in |
+
+ ()
+ |
+
+ pattern
+ |
+
+ str
+ |
+
+
+
+ Regular expression. Must be compiled according to |
+ + required + | +
+ strict
+ |
+
+ bool
+ |
+
+
+
+ Should condition be true for all column(s)? + |
+
+ False
+ |
+
+ invert
+ |
+
+ bool
+ |
+
+
+
+ Should observations that meet condition be kept (False) or removed (True)? + |
+
+ False
+ |
+
Returns:
+Type | +Description | +
---|---|
+ DataFrame
+ |
+
+
+
+ Observations that match the regular expression across all column(s). + |
+
src/tidy_tools/core/filter.py
filter_elements(self: DataFrame, *columns: ColumnReference, elements: Sequence, strict: bool = False, invert: bool = False) -> DataFrame
+
Keep all observations that exist within elements across all column(s).
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ self
+ |
+
+ DataFrame
+ |
+
+
+
+ Object inheriting from PySpark DataFrame. + |
+ + required + | +
+ *columns
+ |
+
+ ColumnReference
+ |
+
+
+
+ Arbitrary number of column references. All columns must exist in |
+
+ ()
+ |
+
+ elements
+ |
+
+ Sequence
+ |
+
+
+
+ Collection of items expected to exist in any/all column(s). + |
+ + required + | +
+ strict
+ |
+
+ bool
+ |
+
+
+
+ Should condition be true for all column(s)? + |
+
+ False
+ |
+
+ invert
+ |
+
+ bool
+ |
+
+
+
+ Should observations that meet condition be kept (False) or removed (True)? + |
+
+ False
+ |
+
Returns:
+Type | +Description | +
---|---|
+ DataFrame
+ |
+
+
+
+ Observations that exist within range across all column(s). + |
+
src/tidy_tools/core/filter.py
filter_range(self: DataFrame, *columns: ColumnReference, boundaries: Sequence[Any], strict: bool = False, invert: bool = False) -> DataFrame
+
Keep all observations that exist within range across all column(s).
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ self
+ |
+
+ DataFrame
+ |
+
+
+
+ Object inheriting from PySpark DataFrame. + |
+ + required + | +
+ *columns
+ |
+
+ ColumnReference
+ |
+
+
+
+ Arbitrary number of column references. All columns must exist in |
+
+ ()
+ |
+
+ boundaries
+ |
+
+ Sequence[Any]
+ |
+
+
+
+ Bounds of range. Must be of same type and in ascending order. + |
+ + required + | +
+ strict
+ |
+
+ bool
+ |
+
+
+
+ Should condition be true for all column(s)? + |
+
+ False
+ |
+
+ invert
+ |
+
+ bool
+ |
+
+
+
+ Should observations that meet condition be kept (False) or removed (True)? + |
+
+ False
+ |
+
Returns:
+Type | +Description | +
---|---|
+ DataFrame
+ |
+
+
+
+ Observations that exist within range across all column(s). + |
+
src/tidy_tools/core/filter.py
Parameters supported by TidyDataFrame contextual operations.
+ + +Attributes:
+Name | +Type | +Description | +
---|---|---|
name |
+
+ str
+ |
+
+
+
+ Name of DataFrame. + |
+
count |
+
+ bool
+ |
+
+
+
+ Whether to perform count operations. + |
+
display |
+
+ bool
+ |
+
+
+
+ Whether to perform display operations. + |
+
limit |
+
+ int
+ |
+
+
+
+ Default all display operations to display only |
+
log_handlers |
+
+ list[TidyLogHandler]
+ |
+
+
+
+ Sequence of TidyLogHandler instances to configure for TidyDataFrame. + |
+
Examples:
+>>> # assuming PySpark DataFrame is loaded
+>>> spark_data = ...
+>>>
+>>> # default configuration
+>>> default_context = TidyContext()
+>>> default_dataframe = TidyDataFrame(spark_data, default_context)
+>>>
+>>> # simple contextual configuration
+>>> basic_context = TidyContext(
+>>> name="ContextDataFrame",
+>>> count=False,
+>>> limit=10
+>>> )
+>>> basic_dataframe = TidyDataFrame(spark_data, basic_context)
+>>>
+>>> # attaching log handlers
+>>> logging_context = TidyContext(
+>>> name="LoggingHandlers",
+>>> log_handlers=[
+>>> TidyLogHandler(),
+>>> TidyFileHandler("example.log"),
+>>> TidyMemoHandler("serialized_example.log")
+>>> ]
+>>> )
+>>> logging_dataframe = TidyDataFrame(spark_data, logging_context)
+
classmethod
+
+
+load(context: str | Path | dict) -> TidyContext
+
Create TidyContext from pre-configured context.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ context
+ |
+
+ str | Path | dict
+ |
+
+
+
+ Reference to object containing TidyContext attributes. If |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ TidyContext
+ |
+
+
+
+ Instance of TidyContext configured with provided parameters. + |
+
src/tidy_tools/dataframe/context.py
Save attributes as serialized JSON object.
+ + +Returns:
+Type | +Description | +
---|---|
+ dict
+ |
+
+
+
+ Attributes of |
+
Save attributes to filepath
.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ filepath
+ |
+
+ str | Path
+ |
+
+
+
+ File to save attributes. This file can be loaded using the
+ |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ None
+ |
+
+
+
+ Stores output to file. + |
+
src/tidy_tools/dataframe/context.py
Enable tidy operations on a PySpark DataFrame with optional context.
+TidyDataFrame is a PySpark DataFrame with built-in logging functionality. +Directly inspired by the tidylog +project, TidyDataFrame decorates common DataFrame methods to detail the +impact of said method in real-time. Combined with the context to control +other behavior (e.g. disabling displays, logging to multiple handlers), +TidyDataFrame is the all-in-one logging solution for PySpark workflows.
+ + +Attributes:
+Name | +Type | +Description | +
---|---|---|
_data |
+
+ DataFrame
+ |
+
+
+
+ PySpark DataFrame object to perform tidy operations. + |
+
_context |
+
+ (dict, optional)
+ |
+
+
+
+ Context to control execution of TidyDataFrame. + |
+
property
+
+
+Return all column names and data types as a list.
+property
+
+
+Compute basic statistics for numeric and string columns.
+property
+
+
+Return schema as a pyspark.sql.types.StructType object.
+classmethod
+
+
+load(*source: str | Path, context: Optional[TidyContext] = None, read_func: Optional[Callable] = None, read_options: Optional[dict] = dict()) -> TidyDataFrame
+
Create TidyDataFrame directly from source(s).
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ *source
+ |
+
+ str | Path
+ |
+
+
+
+ Arbitrary number of file references containing data. + |
+
+ ()
+ |
+
+ context
+ |
+
+ TidyContext
+ |
+
+
+
+ Additional context parameters to pass. + |
+
+ None
+ |
+
+ read_func
+ |
+
+ Callable
+ |
+
+
+
+ Function for reading data from source(s). + |
+
+ None
+ |
+
+ read_options
+ |
+
+ dict
+ |
+
+
+
+ Additional parameters to pass to |
+
+ dict()
+ |
+
Returns:
+Type | +Description | +
---|---|
+ TidyDataFrame
+ |
+
+
+
+ Instance of TidyDataFrame loaded from source(s) with additional +parameters instructing read-in and/or context. + |
+
Examples:
+>>> # load data from a single source
+>>> tidy_data = TidyDataFrame.load("path/to/data.csv")
+>>>
+>>> # load data from multiple sources
+>>> tidy_data = TidyDataFrame.load(
+>>> "path/to/data.csv",
+>>> "path/to/another/file.txt",
+>>> "path/to/the/final/file.xlsx",
+>>> )
+>>>
+>>> # load data with context
+>>> tidy_data = TidyDataFrame.load(..., context=TidyContext(...))
+>>>
+>>> # load data with read-in instructions
+>>> tidy_data = TidyDataFrame.load(
+>>> ...,
+>>> read_func=spark.read.csv,
+>>> read_options={"header": "true"}
+>>> )
+
src/tidy_tools/dataframe/dataframe.py
Control execution of display method.
+This method masks the pyspark.sql.DataFrame.display
method. This method does not
+mask the native PySpark display function.
Often, the .display()
method will need to be disabled for logging purposes. Similar
+to toggling the .count()
method, users can temporarily disable a DataFrame's
+ability to display to the console by passing toggle_display = True
.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ limit
+ |
+
+ int
+ |
+
+
+
+ Number of rows to display to console. If context is provided, the limit provided +will be used. + |
+
+ None
+ |
+
Returns:
+Type | +Description | +
---|---|
+ None
+ |
+
+
+
+ Displays data to console or nothing if display is disabled. + |
+
src/tidy_tools/dataframe/dataframe.py
Control execution of display method.
+This method masks the pyspark.sql.DataFrame.display
method. This method does not
+mask the native PySpark display function.
Often, the .display()
method will need to be disabled for logging purposes. Similar
+to toggling the .count()
method, users can temporarily disable a DataFrame's
+ability to display to the console by passing toggle_display = True
.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ limit
+ |
+
+ int
+ |
+
+
+
+ Number of rows to display to console. If context is provided, the limit provided +will be used. + |
+
+ None
+ |
+
Returns:
+Type | +Description | +
---|---|
+ None
+ |
+
+
+
+ Displays data to console or nothing if display is disabled. + |
+
src/tidy_tools/dataframe/dataframe.py
Return number of rows in DataFrame.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ result
+ |
+
+ DataFrame
+ |
+
+
+
+ If provided, this will trigger a count operation. Else, the count will reference +the last count or zero if context disables count. + |
+
+ None
+ |
+
Returns:
+Type | +Description | +
---|---|
+ int
+ |
+
+
+
+ Number of rows in data or zero if count is disabled in context. + |
+
src/tidy_tools/dataframe/dataframe.py
transform(func: Callable, *args, **kwargs) -> TidyDataFrame
+
Concise syntax for chaining custom transformations together.
+If calling multiple times in succession, consider using TidyDataFrame.pipe
.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ func
+ |
+
+ Callable
+ |
+
+
+
+ Custom transformation function(s) to apply to data. + |
+ + required + | +
+ *args
+ |
+
+ tuple
+ |
+
+
+
+ Arbitrary number of positional arguments to pass to |
+
+ ()
+ |
+
+ **kwargs
+ |
+
+ dict
+ |
+
+
+
+ Arbitrary number of keyword arguments to pass to |
+
+ {}
+ |
+
Returns:
+Type | +Description | +
---|---|
+ TidyDataFrame
+ |
+
+
+
+ Transformed data. + |
+
src/tidy_tools/dataframe/dataframe.py
pipe(*funcs: Callable) -> TidyDataFrame
+
Iteratively apply custom transformation functions.
+Functional alias for TidyDataFrame.transform
.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ *funcs
+ |
+
+ Callable
+ |
+
+
+
+ Custom transformation function(s) to apply to data. + |
+
+ ()
+ |
+
Returns:
+Type | +Description | +
---|---|
+ TidyDataFrame
+ |
+
+
+
+ Transformed data. + |
+
src/tidy_tools/dataframe/dataframe.py
Generic log handler for system error streams.
+ + +Attributes:
+Name | +Type | +Description | +
---|---|---|
sink |
+
+ str | Path | TextIO
+ |
+
+
+
+ Destination for receiving logging messages. + |
+
level |
+
+ str
+ |
+
+
+
+ Minimum level to trace in logs. See |
+
format |
+
+ str
+ |
+
+
+
+ Template used for logged messages. + |
+
diagnose |
+
+ bool
+ |
+
+
+
+ Whether the exception trace should display the variables values +to eases the debugging. + |
+
catch |
+
+ bool
+ |
+
+
+
+ Whether errors occurring while sink handles logs messages should +be automatically caught. If True, an exception message is displayed +on sys.stderr but the exception is not propagated to the caller, +preventing your app to crash. + |
+
+ Bases: TidyLogHandler
Log handler for file streams.
+ + +Attributes:
+Name | +Type | +Description | +
---|---|---|
sink |
+
+ str | Path | TextIO
+ |
+
+
+
+ Destination for receiving logging messages. + |
+
level |
+
+ str
+ |
+
+
+
+ Minimum level to trace in logs. See |
+
format |
+
+ str
+ |
+
+
+
+ Template used for logged messages. + |
+
diagnose |
+
+ bool
+ |
+
+
+
+ Whether the exception trace should display the variables values +to eases the debugging. + |
+
catch |
+
+ bool
+ |
+
+
+
+ Whether errors occurring while sink handles logs messages should +be automatically caught. If True, an exception message is displayed +on sys.stderr but the exception is not propagated to the caller, +preventing your app to crash. + |
+
+ Bases: TidyFileHandler
Log handler for serialized streams.
+ + +Attributes:
+Name | +Type | +Description | +
---|---|---|
sink |
+
+ str | Path | TextIO
+ |
+
+
+
+ Destination for receiving logging messages. + |
+
level |
+
+ str
+ |
+
+
+
+ Minimum level to trace in logs. See |
+
format |
+
+ str
+ |
+
+
+
+ Template used for logged messages. + |
+
diagnose |
+
+ bool
+ |
+
+
+
+ Whether the exception trace should display the variables values +to eases the debugging. + |
+
catch |
+
+ bool
+ |
+
+
+
+ Whether errors occurring while sink handles logs messages should +be automatically caught. If True, an exception message is displayed +on sys.stderr but the exception is not propagated to the caller, +preventing your app to crash. + |
+
serialize |
+
+ bool
+ |
+
+
+
+ Whether the logged message and its records should be first converted +to a JSON string before being sent to the sink. + |
+
Merge an arbitrary number of DataFrames into a single DataFrame.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ *data
+ |
+
+ DataFrame
+ |
+
+
+
+ PySpark DataFrame. + |
+
+ ()
+ |
+
+ func
+ |
+
+ Callable
+ |
+
+
+
+ Reduce function to merge two DataFrames to each other. By default, this +union resolves by column name. + |
+ + required + | +
+ **kwargs
+ |
+
+ dict
+ |
+
+
+
+ Keyword-arguments for merge function. + |
+
+ {}
+ |
+
Returns:
+Type | +Description | +
---|---|
+ DataFrame
+ |
+
+
+
+ Result of merging all |
+
src/tidy_tools/functions/merge.py
Concatenate an aribitrary number of DataFrames into a single DataFrame.
+By default, all objects are appended to one another by column name. An error +will be raised if column names do not align.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ *data
+ |
+
+ DataFrame
+ |
+
+
+
+ PySpark DataFrame. + |
+
+ ()
+ |
+
+ func
+ |
+
+ Callable
+ |
+
+
+
+ Reduce function to concatenate two DataFrames to each other. By default, this +union resolves by column name. + |
+
+ unionByName
+ |
+
+ **kwargs
+ |
+
+ dict
+ |
+
+
+
+ Keyword-arguments for merge function. + |
+
+ {}
+ |
+
Returns:
+Type | +Description | +
---|---|
+ DataFrame
+ |
+
+
+
+ Result of concatenating |
+
src/tidy_tools/functions/merge.py
join(*data: DataFrame, on: str | Column, how: str = 'inner', func: Callable = join, **kwargs: dict) -> DataFrame
+
Join an aribitrary number of DataFrames into a single DataFrame.
+By default, all objects are appended to one another by column name. An error +will be raised if column names do not align.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ *data
+ |
+
+ DataFrame
+ |
+
+
+
+ PySpark DataFrame. + |
+
+ ()
+ |
+
+ on
+ |
+
+ str | Column
+ |
+
+
+
+ Column name or expression to perform join. + |
+ + required + | +
+ how
+ |
+
+ str
+ |
+
+
+
+ Set operation to perform. + |
+
+ 'inner'
+ |
+
+ func
+ |
+
+ Callable
+ |
+
+
+
+ Reduce function to join two DataFrames to each other. + |
+
+ join
+ |
+
+ **kwargs
+ |
+
+ dict
+ |
+
+
+
+ Keyword-arguments for merge function. + |
+
+ {}
+ |
+
Returns:
+Type | +Description | +
---|---|
+ DataFrame
+ |
+
+
+
+ Result of joining |
+
src/tidy_tools/functions/merge.py
Load data from source(s) as a PySpark DataFrame.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ *source
+ |
+
+ str | Path
+ |
+
+
+
+ Arbitrary number of file references. + |
+
+ ()
+ |
+
+ read_func
+ |
+
+ Callable
+ |
+
+
+
+ Function to load data from source(s). + |
+ + required + | +
+ **read_options
+ |
+
+ dict
+ |
+
+
+
+ Additional arguments to pass to the read_function. + |
+
+ {}
+ |
+
Returns:
+Type | +Description | +
---|---|
+ DataFrame
+ |
+
+
+
+ Object containing data from all source(s) provided. + |
+
src/tidy_tools/functions/reader.py
Convert data according to a class schema.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ cls_field
+ |
+
+ Attribute
+ |
+
+
+
+ Field to apply conversion function. + |
+ + required + | +
+ cls_field_exists
+ |
+
+ bool
+ |
+
+
+
+ Whether field exists in data already. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ DataFrame
+ |
+
+
+
+ Converted DataFrame. + |
+
src/tidy_tools/model/convert.py
classmethod
+
+
+Load data from source(s) and apply processing, conversion, and validation procedures.
+See TidyDataModel.tidy()
for more details.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ *source
+ |
+
+ str
+ |
+
+
+
+ Arbitrary number of reference(s) to data source(s). + |
+
+ ()
+ |
+
+ read_func
+ |
+
+ Callable
+ |
+
+
+
+ Function to load data from source(s). + |
+ + required + | +
+ read_options
+ |
+
+ dict
+ |
+
+
+
+ Keyword arguments to pass to |
+
+ dict()
+ |
+
Returns:
+Type | +Description | +
---|---|
+ DataFrame
+ |
+
+
+
+ Single DataFrame containing data from all source(s) coerced according to class schema. + |
+
src/tidy_tools/model/model.py
classmethod
+
+
+Apply conversion functions to supported fields.
+Outputs messages to logging handlers.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ data
+ |
+
+ DataFrame
+ |
+
+
+
+ Object to apply conversion functions. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ DataFrame
+ |
+
+
+
+ Converted data. + |
+
src/tidy_tools/model/model.py
classmethod
+
+
+Apply validation functions to supported fields.
+Outputs messages to logging handlers.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ data
+ |
+
+ DataFrame
+ |
+
+
+
+ Object to apply validations functions. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ DataFrame
+ |
+
+
+
+ Original data passed to function. + |
+
src/tidy_tools/model/model.py
classmethod
+
+
+Method for composing processing functions.
+If present, the methods are executed in the following order: + - pre-processing + - conversions + - validations + - post-processing
+ + +Returns:
+Type | +Description | +
---|---|
+ Callable
+ |
+
+
+
+ Function to call listed methods. + |
+
src/tidy_tools/model/model.py
Apply validation function(s) to schema cls_field.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ cls_field
+ |
+
+ Attribute
+ |
+
+
+
+ Schema for field in class. + |
+ + required + | +
+ data
+ |
+
+ DataFrame
+ |
+
+
+
+ Data to validate field against. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ TidyError
+ |
+
+
+
+ If the validation function fails for at least one row, an error handler +is returned for further logging. + |
+
src/tidy_tools/model/validate.py
Tidy Tools comes with its own custom validators.
+ + +Return expression checking for null values in column.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ _defaults
+ |
+
+ tuple[str]
+ |
+
+
+
+ Default values representing null. By default, checks for whitespace values and "N/A". + |
+
+ ('\\s*', '\\bN/A\\b')
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Callable
+ |
+
+
+
+ Constructs closure that can be called on column(s). + |
+
src/tidy_tools/model/validators.py
Return expression checking for pattern in column.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ pattern
+ |
+
+ str
+ |
+
+
+
+ Regular expression to check for in column. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ Callable
+ |
+
+
+
+ Constructs closure that can be called on column(s). + |
+
src/tidy_tools/model/validators.py
Return expression checking for membership in column.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ elements
+ |
+
+ Sequence
+ |
+
+
+
+ Collection containing value(s) to check for in column. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ Callable
+ |
+
+
+
+ Constructs closure that can be called on column(s). + |
+
src/tidy_tools/model/validators.py
Return expression checking for inclusion in column.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ lower_bound
+ |
+
+ Any
+ |
+
+
+
+ Least value to check for in column. + |
+ + required + | +
+ upper_bound
+ |
+
+ Any
+ |
+
+
+
+ Greatest value to check for in column. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ Callable
+ |
+
+
+
+ Constructs closure that can be called on column(s). + |
+
src/tidy_tools/model/validators.py
Define and store pipeline as object to be executed.
+Unlike pipe
, compose
will not evaluate when initialized.
+These are two separate steps. See Examples for more details.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ *functions
+ |
+
+ Callable
+ |
+
+
+
+ Arbitrary number of functions to chain together. + |
+
+ ()
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Callable
+ |
+
+
+
+ Nested function in order of function(s) passed. + |
+
Examples:
+>>> # works with unary function
+>>> add_two = lambda x: x + 2
+>>>
+>>> # works with partial functions
+>>> add_n = lambda x, n: x + n
+>>>
+>>> # works with closures
+>>> def add_n(n: int) -> Callable:
+>>> def closure(x):
+>>> return x + n
+>>> return closure
+>>>
+>>> summation = compose(add_two, add_n(10), add_n(-4))
+>>> assert summation(12) == 20
+
src/tidy_tools/workflow/pipeline.py
Apply arbitrary number of functions to instance
in succession.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ instance
+ |
+
+ Any
+ |
+
+
+
+ Scalar object. + |
+ + required + | +
+ *functions
+ |
+
+ Callable
+ |
+
+
+
+ Functions. + |
+
+ ()
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Any
+ |
+
+
+
+ Result of applying all function(s) to |
+
Examples:
+>>> # works with unary function
+>>> add_two = lambda x: x + 2
+>>>
+>>> # works with partial functions
+>>> add_n = lambda x, n: x + n
+>>>
+>>> # works with closures
+>>> def add_n(n: int) -> Callable:
+>>> def closure(x):
+>>> return x + n
+>>> return closure
+>>>
+>>> result = pipe(12, add_two, add_n(10), add_n(-4))
+>>> assert result == 20
+
src/tidy_tools/workflow/pipeline.py