diff --git a/.kokoro/docs/docs-presubmit-gerrit.cfg b/.kokoro/docs/docs-presubmit-gerrit.cfg
new file mode 100644
index 0000000000..1d0dc4b499
--- /dev/null
+++ b/.kokoro/docs/docs-presubmit-gerrit.cfg
@@ -0,0 +1,23 @@
+# Format: //devtools/kokoro/config/proto/build.proto
+
+env_vars: {
+    key: "V2_STAGING_BUCKET"
+    value: "gcloud-python-test"
+}
+
+# We only upload the image in the main `docs` build.
+env_vars: {
+    key: "TRAMPOLINE_IMAGE_UPLOAD"
+    value: "false"
+}
+
+env_vars: {
+    key: "TRAMPOLINE_BUILD_FILE"
+    value: ".kokoro/build.sh"
+}
+
+# Only run this nox session.
+env_vars: {
+    key: "NOX_SESSION"
+    value: "docfx"
+}
diff --git a/.kokoro/presubmit/e2e-gerrit.cfg b/.kokoro/presubmit/e2e-gerrit.cfg
new file mode 100644
index 0000000000..d875f36060
--- /dev/null
+++ b/.kokoro/presubmit/e2e-gerrit.cfg
@@ -0,0 +1,7 @@
+# Format: //devtools/kokoro/config/proto/build.proto
+
+# Only run this nox session.
+env_vars: {
+    key: "NOX_SESSION"
+    value: "system_noextras e2e notebook samples"
+}
diff --git a/.kokoro/presubmit/presubmit-gerrit.cfg b/.kokoro/presubmit/presubmit-gerrit.cfg
new file mode 100644
index 0000000000..18a4c35325
--- /dev/null
+++ b/.kokoro/presubmit/presubmit-gerrit.cfg
@@ -0,0 +1 @@
+# Format: //devtools/kokoro/config/proto/build.proto
diff --git a/OWNERS b/OWNERS
index 672da38afa..f86ad551ef 100644
--- a/OWNERS
+++ b/OWNERS
@@ -1,3 +1,4 @@
+ashleyxu@google.com
 bmil@google.com
 chelsealin@google.com
 garrettwu@google.com
diff --git a/README.rst b/README.rst
index 935c54cc8b..23aea446ff 100644
--- a/README.rst
+++ b/README.rst
@@ -57,9 +57,13 @@ internally to manage metadata on the service side. This session is tied to a
 BigQuery DataFrames uses the US multi-region as the default location, but you
 can use ``session_options.location`` to set a different location. Every query
 in a session is executed in the location where the session was created.
+BigQuery DataFrames
+auto-populates ``bf.options.bigquery.location`` if the user starts with
+``read_gbq/read_gbq_table/read_gbq_query()`` and specifies a table, either
+directly or in a SQL statement.
 
 If you want to reset the location of the created DataFrame or Series objects,
-can reset the session by executing ``bigframes.pandas.reset_session()``.
+you can reset the session by executing ``bigframes.pandas.reset_session()``.
 After that, you can reuse ``bigframes.pandas.options.bigquery.location`` to
 specify another location.
 
@@ -68,6 +72,11 @@ specify another location.
 querying is not in the US multi-region. If you try to read a table from another
 location, you get a NotFound exception.
 
+Project
+-------
+If ``bf.options.bigquery.project`` is not set, the ``$GOOGLE_CLOUD_PROJECT``
+environment variable is used, which is set in the notebook runtime serving the
+BigQuery Studio/Vertex Notebooks.
 
 ML Capabilities
 ---------------
diff --git a/bigframes/core/__init__.py b/bigframes/core/__init__.py
index 7086269af9..27fe4a4fe6 100644
--- a/bigframes/core/__init__.py
+++ b/bigframes/core/__init__.py
@@ -518,8 +518,8 @@ def aggregate(
         """
         Apply aggregations to the expression.
         Arguments:
-            by_column_id: column id of the aggregation key, this is preserved through the transform
             aggregations: input_column_id, operation, output_column_id tuples
+            by_column_id: column id of the aggregation key, this is preserved through the transform
             dropna: whether null keys should be dropped
         """
         table = self.to_ibis_expr(ordering_mode="unordered")
diff --git a/bigframes/core/block_transforms.py b/bigframes/core/block_transforms.py
index abf8b887d8..5dcd9fe753 100644
--- a/bigframes/core/block_transforms.py
+++ b/bigframes/core/block_transforms.py
@@ -117,6 +117,25 @@ def value_counts(
     return block.select_column(count_id).with_column_labels(["count"])
 
 
+def pct_change(block: blocks.Block, periods: int = 1) -> blocks.Block:
+    column_labels = block.column_labels
+    window_spec = core.WindowSpec(
+        preceding=periods if periods > 0 else None,
+        following=-periods if periods < 0 else None,
+    )
+
+    original_columns = block.value_columns
+    block, shift_columns = block.multi_apply_window_op(
+        original_columns, agg_ops.ShiftOp(periods), window_spec=window_spec
+    )
+    result_ids = []
+    for original_col, shifted_col in zip(original_columns, shift_columns):
+        block, change_id = block.apply_binary_op(original_col, shifted_col, ops.sub_op)
+        block, pct_change_id = block.apply_binary_op(change_id, shifted_col, ops.div_op)
+        result_ids.append(pct_change_id)
+    return block.select_columns(result_ids).with_column_labels(column_labels)
+
+
 def rank(
     block: blocks.Block,
     method: str = "average",
@@ -229,3 +248,160 @@ def dropna(block: blocks.Block, how: typing.Literal["all", "any"] = "any"):
             filtered_block = filtered_block.filter(predicate)
         filtered_block = filtered_block.select_columns(block.value_columns)
         return filtered_block
+
+
+def nsmallest(
+    block: blocks.Block,
+    n: int,
+    column_ids: typing.Sequence[str],
+    keep: str,
+) -> blocks.Block:
+    if keep not in ("first", "last", "all"):
+        raise ValueError("'keep must be one of 'first', 'last', or 'all'")
+    if keep == "last":
+        block = block.reversed()
+    order_refs = [
+        ordering.OrderingColumnReference(
+            col_id, direction=ordering.OrderingDirection.ASC
+        )
+        for col_id in column_ids
+    ]
+    block = block.order_by(order_refs, stable=True)
+    if keep in ("first", "last"):
+        return block.slice(0, n)
+    else:  # keep == "all":
+        block, counter = block.apply_window_op(
+            column_ids[0],
+            agg_ops.rank_op,
+            window_spec=core.WindowSpec(ordering=order_refs),
+        )
+        block, condition = block.apply_unary_op(
+            counter, ops.partial_right(ops.le_op, n)
+        )
+        block = block.filter(condition)
+        return block.drop_columns([counter, condition])
+
+
+def nlargest(
+    block: blocks.Block,
+    n: int,
+    column_ids: typing.Sequence[str],
+    keep: str,
+) -> blocks.Block:
+    if keep not in ("first", "last", "all"):
+        raise ValueError("'keep must be one of 'first', 'last', or 'all'")
+    if keep == "last":
+        block = block.reversed()
+    order_refs = [
+        ordering.OrderingColumnReference(
+            col_id, direction=ordering.OrderingDirection.DESC
+        )
+        for col_id in column_ids
+    ]
+    block = block.order_by(order_refs, stable=True)
+    if keep in ("first", "last"):
+        return block.slice(0, n)
+    else:  # keep == "all":
+        block, counter = block.apply_window_op(
+            column_ids[0],
+            agg_ops.rank_op,
+            window_spec=core.WindowSpec(ordering=order_refs),
+        )
+        block, condition = block.apply_unary_op(
+            counter, ops.partial_right(ops.le_op, n)
+        )
+        block = block.filter(condition)
+        return block.drop_columns([counter, condition])
+
+
+def skew(
+    block: blocks.Block,
+    skew_column_ids: typing.Sequence[str],
+    grouping_column_ids: typing.Sequence[str] = (),
+) -> blocks.Block:
+
+    original_columns = skew_column_ids
+    column_labels = block.select_columns(original_columns).column_labels
+
+    block, delta3_ids = _mean_delta_to_power(
+        block, 3, original_columns, grouping_column_ids
+    )
+    # counts, moment3 for each column
+    aggregations = []
+    for i, col in enumerate(original_columns):
+        count_agg = (col, agg_ops.count_op)
+        moment3_agg = (delta3_ids[i], agg_ops.mean_op)
+        variance_agg = (col, agg_ops.PopVarOp())
+        aggregations.extend([count_agg, moment3_agg, variance_agg])
+
+    block, agg_ids = block.aggregate(
+        by_column_ids=grouping_column_ids, aggregations=aggregations
+    )
+
+    skew_ids = []
+    for i, col in enumerate(original_columns):
+        # Corresponds to order of aggregations in preceding loop
+        count_id, moment3_id, var_id = agg_ids[i * 3 : (i * 3) + 3]
+        block, skew_id = _skew_from_moments_and_count(
+            block, count_id, moment3_id, var_id
+        )
+        skew_ids.append(skew_id)
+
+    block = block.select_columns(skew_ids).with_column_labels(column_labels)
+    if not grouping_column_ids:
+        # When ungrouped, stack everything into single column so can be returned as series
+        block = block.stack()
+        block = block.drop_levels([block.index_columns[0]])
+    return block
+
+
+def _mean_delta_to_power(
+    block: blocks.Block,
+    n_power,
+    column_ids: typing.Sequence[str],
+    grouping_column_ids: typing.Sequence[str],
+) -> typing.Tuple[blocks.Block, typing.Sequence[str]]:
+    """Calculate (x-mean(x))^n. Useful for calculating moment statistics such as skew and kurtosis."""
+    window = core.WindowSpec(grouping_keys=grouping_column_ids)
+    block, mean_ids = block.multi_apply_window_op(column_ids, agg_ops.mean_op, window)
+    delta_ids = []
+    cube_op = ops.partial_right(ops.pow_op, n_power)
+    for val_id, mean_val_id in zip(column_ids, mean_ids):
+        block, delta_id = block.apply_binary_op(val_id, mean_val_id, ops.sub_op)
+        block, delta_power_id = block.apply_unary_op(delta_id, cube_op)
+        block = block.drop_columns(delta_id)
+        delta_ids.append(delta_power_id)
+    return block, delta_ids
+
+
+def _skew_from_moments_and_count(
+    block: blocks.Block, count_id: str, moment3_id: str, var_id: str
+) -> typing.Tuple[blocks.Block, str]:
+    # Calculate skew using count, third moment and population variance
+    # See G1 estimator:
+    # https://en.wikipedia.org/wiki/Skewness#Sample_skewness
+    block, denominator_id = block.apply_unary_op(
+        var_id, ops.partial_right(ops.pow_op, 3 / 2)
+    )
+    block, base_id = block.apply_binary_op(moment3_id, denominator_id, ops.div_op)
+    block, countminus1_id = block.apply_unary_op(
+        count_id, ops.partial_right(ops.sub_op, 1)
+    )
+    block, countminus2_id = block.apply_unary_op(
+        count_id, ops.partial_right(ops.sub_op, 2)
+    )
+    block, adjustment_id = block.apply_binary_op(count_id, countminus1_id, ops.mul_op)
+    block, adjustment_id = block.apply_unary_op(
+        adjustment_id, ops.partial_right(ops.pow_op, 1 / 2)
+    )
+    block, adjustment_id = block.apply_binary_op(
+        adjustment_id, countminus2_id, ops.div_op
+    )
+    block, skew_id = block.apply_binary_op(base_id, adjustment_id, ops.mul_op)
+
+    # Need to produce NA if have less than 3 data points
+    block, na_cond_id = block.apply_unary_op(count_id, ops.partial_right(ops.ge_op, 3))
+    block, skew_id = block.apply_binary_op(
+        skew_id, na_cond_id, ops.partial_arg3(ops.where_op, None)
+    )
+    return block, skew_id
diff --git a/bigframes/core/blocks.py b/bigframes/core/blocks.py
index 482cfd0141..5b414252ee 100644
--- a/bigframes/core/blocks.py
+++ b/bigframes/core/blocks.py
@@ -709,8 +709,9 @@ def multi_apply_window_op(
         window_spec: core.WindowSpec,
         *,
         skip_null_groups: bool = False,
-    ) -> Block:
+    ) -> typing.Tuple[Block, typing.Sequence[str]]:
         block = self
+        result_ids = []
         for i, col_id in enumerate(columns):
             label = self.col_id_to_label[col_id]
             block, result_id = block.apply_window_op(
@@ -721,9 +722,8 @@ def multi_apply_window_op(
                 result_label=label,
                 skip_null_groups=skip_null_groups,
             )
-            block = block.copy_values(result_id, col_id)
-            block = block.drop_columns([result_id])
-        return block
+            result_ids.append(result_id)
+        return block, result_ids
 
     def multi_apply_unary_op(
         self,
@@ -1123,7 +1123,9 @@ def promote_offsets(self, label: Label = None) -> typing.Tuple[Block, str]:
         )
 
     def add_prefix(self, prefix: str, axis: str | int | None = None) -> Block:
-        axis_number = bigframes.core.utils.get_axis_number(axis)
+        axis_number = bigframes.core.utils.get_axis_number(
+            "rows" if (axis is None) else axis
+        )
         if axis_number == 0:
             expr = self._expr
             for index_col in self._index_columns:
@@ -1140,7 +1142,9 @@ def add_prefix(self, prefix: str, axis: str | int | None = None) -> Block:
             return self.rename(columns=lambda label: f"{prefix}{label}")
 
     def add_suffix(self, suffix: str, axis: str | int | None = None) -> Block:
-        axis_number = bigframes.core.utils.get_axis_number(axis)
+        axis_number = bigframes.core.utils.get_axis_number(
+            "rows" if (axis is None) else axis
+        )
         if axis_number == 0:
             expr = self._expr
             for index_col in self._index_columns:
diff --git a/bigframes/core/groupby/__init__.py b/bigframes/core/groupby/__init__.py
index 589c5c251c..810e145d33 100644
--- a/bigframes/core/groupby/__init__.py
+++ b/bigframes/core/groupby/__init__.py
@@ -20,6 +20,7 @@
 
 import bigframes.constants as constants
 import bigframes.core as core
+import bigframes.core.block_transforms as block_ops
 import bigframes.core.blocks as blocks
 import bigframes.core.ordering as order
 import bigframes.core.utils as utils
@@ -145,6 +146,16 @@ def var(
             self._raise_on_non_numeric("var")
         return self._aggregate_all(agg_ops.var_op, numeric_only=True)
 
+    def skew(
+        self,
+        *,
+        numeric_only: bool = False,
+    ) -> df.DataFrame:
+        if not numeric_only:
+            self._raise_on_non_numeric("skew")
+        block = block_ops.skew(self._block, self._selected_cols, self._by_col_ids)
+        return df.DataFrame(block)
+
     def all(self) -> df.DataFrame:
         return self._aggregate_all(agg_ops.all_op)
 
@@ -168,6 +179,22 @@ def cummax(self, *args, numeric_only: bool = False, **kwargs) -> df.DataFrame:
     def cumprod(self, *args, **kwargs) -> df.DataFrame:
         return self._apply_window_op(agg_ops.product_op, numeric_only=True)
 
+    def shift(self, periods=1) -> series.Series:
+        window = core.WindowSpec(
+            grouping_keys=self._by_col_ids,
+            preceding=periods if periods > 0 else None,
+            following=-periods if periods < 0 else None,
+        )
+        return self._apply_window_op(agg_ops.ShiftOp(periods), window=window)
+
+    def diff(self, periods=1) -> series.Series:
+        window = core.WindowSpec(
+            grouping_keys=self._by_col_ids,
+            preceding=periods if periods > 0 else None,
+            following=-periods if periods < 0 else None,
+        )
+        return self._apply_window_op(agg_ops.DiffOp(periods), window=window)
+
     def agg(self, func=None, **kwargs) -> df.DataFrame:
         if func:
             if isinstance(func, str):
@@ -323,10 +350,10 @@ def _apply_window_op(
             grouping_keys=self._by_col_ids, following=0
         )
         columns = self._aggregated_columns(numeric_only=numeric_only)
-        block = self._block.multi_apply_window_op(
+        block, result_ids = self._block.multi_apply_window_op(
             columns, op, window_spec=window_spec, skip_null_groups=self._dropna
         )
-        block = block.select_columns(columns)
+        block = block.select_columns(result_ids)
         return df.DataFrame(block)
 
     def _resolve_label(self, label: blocks.Label) -> str:
@@ -391,6 +418,10 @@ def std(self, *args, **kwargs) -> series.Series:
     def var(self, *args, **kwargs) -> series.Series:
         return self._aggregate(agg_ops.var_op)
 
+    def skew(self, *args, **kwargs) -> series.Series:
+        block = block_ops.skew(self._block, [self._value_column], self._by_col_ids)
+        return series.Series(block)
+
     def prod(self, *args) -> series.Series:
         return self._aggregate(agg_ops.product_op)
 
@@ -459,8 +490,13 @@ def shift(self, periods=1) -> series.Series:
         )
         return self._apply_window_op(agg_ops.ShiftOp(periods), window=window)
 
-    def diff(self) -> series.Series:
-        return self._ungroup() - self.shift(1)
+    def diff(self, periods=1) -> series.Series:
+        window = core.WindowSpec(
+            grouping_keys=self._by_col_ids,
+            preceding=periods if periods > 0 else None,
+            following=-periods if periods < 0 else None,
+        )
+        return self._apply_window_op(agg_ops.DiffOp(periods), window=window)
 
     def rolling(self, window: int, min_periods=None) -> windows.Window:
         # To get n size window, need current row and n-1 preceding rows.
diff --git a/bigframes/core/indexers.py b/bigframes/core/indexers.py
index 46091f211a..28bce05338 100644
--- a/bigframes/core/indexers.py
+++ b/bigframes/core/indexers.py
@@ -145,23 +145,41 @@ def __setitem__(
         value: bigframes.dataframe.SingleItemValue,
     ):
         if (
-            not isinstance(key, tuple)
-            or len(key) != 2
-            or not isinstance(key[0], slice)
-            or (key[0].start is not None and key[0].start != 0)
-            or (key[0].step is not None and key[0].step != 1)
-            or key[0].stop is not None
+            isinstance(key, tuple)
+            and len(key) == 2
+            and isinstance(key[0], slice)
+            and (key[0].start is None or key[0].start == 0)
+            and (key[0].step is None or key[0].step == 1)
+            and key[0].stop is None
         ):
+            # TODO(swast): Support setting multiple columns with key[1] as a list
+            # of labels and value as a DataFrame.
+            df = self._dataframe.assign(**{key[1]: value})
+            self._dataframe._set_block(df._get_block())
+        elif (
+            isinstance(key, tuple)
+            and len(key) == 2
+            and isinstance(key[0], bigframes.series.Series)
+            and key[0].dtype == "boolean"
+        ) and pd.api.types.is_scalar(value):
+            new_column = key[0].map({True: value, False: None})
+            try:
+                original_column = self._dataframe[key[1]]
+            except KeyError:
+                self._dataframe[key[1]] = new_column
+                return
+            try:
+                self._dataframe[key[1]] = new_column.fillna(original_column)
+            except ibis.common.exceptions.IbisTypeError:
+                raise TypeError(
+                    f"Cannot assign scalar of type {type(value)} to column of type {original_column.dtype}, or index type of series argument does not match dataframe."
+                )
+        else:
             raise NotImplementedError(
-                "Only setting a column by DataFrame.loc[:, 'column'] is supported."
+                "Only DataFrame.loc[:, 'column'] and DataFrame.loc[bool series, 'column'] = Scalar are supported."
                 f"{constants.FEEDBACK_LINK}"
             )
 
-        # TODO(swast): Support setting multiple columns with key[1] as a list
-        # of labels and value as a DataFrame.
-        df = self._dataframe.assign(**{key[1]: value})
-        self._dataframe._set_block(df._get_block())
-
 
 class ILocDataFrameIndexer:
     def __init__(self, dataframe: bigframes.dataframe.DataFrame):
diff --git a/bigframes/core/indexes/index.py b/bigframes/core/indexes/index.py
index 04b9a36b64..748a68c944 100644
--- a/bigframes/core/indexes/index.py
+++ b/bigframes/core/indexes/index.py
@@ -53,6 +53,10 @@ def names(self) -> typing.Sequence[blocks.Label]:
     def names(self, values: typing.Sequence[blocks.Label]):
         return self._data._set_block(self._data._get_block().with_index_labels(values))
 
+    @property
+    def nlevels(self) -> int:
+        return len(self._data._get_block().index_columns)
+
     @property
     def shape(self) -> typing.Tuple[int]:
         return (self._data._get_block().shape[0],)
@@ -97,6 +101,22 @@ def is_monotonic_decreasing(self) -> bool:
             ),
         )
 
+    @property
+    def is_unique(self) -> bool:
+        # TODO: Cache this at block level
+        # Avoid circular imports
+        import bigframes.core.block_transforms as block_ops
+        import bigframes.dataframe as df
+
+        duplicates_block, _ = block_ops.indicate_duplicates(
+            self._data._get_block(), self._data._get_block().index_columns
+        )
+        duplicates_block = duplicates_block.with_column_labels(
+            ["values", "is_duplicate"]
+        )
+        duplicates_df = df.DataFrame(duplicates_block)
+        return not duplicates_df["is_duplicate"].any()
+
     def __getitem__(self, key: int) -> typing.Any:
         if isinstance(key, int):
             result_pd_df, _ = self._data._get_block().slice(key, key + 1, 1).to_pandas()
diff --git a/bigframes/core/utils.py b/bigframes/core/utils.py
index 1c0a2a1a81..75175690ce 100644
--- a/bigframes/core/utils.py
+++ b/bigframes/core/utils.py
@@ -23,8 +23,8 @@
 UNNAMED_INDEX_ID = "bigframes_unnamed_index"
 
 
-def get_axis_number(axis: typing.Union[str, int, None]) -> typing.Literal[0, 1]:
-    if axis in {0, "index", "rows", None}:
+def get_axis_number(axis: typing.Union[str, int]) -> typing.Literal[0, 1]:
+    if axis in {0, "index", "rows"}:
         return 0
     elif axis in {1, "columns"}:
         return 1
diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py
index 31777f3fac..d65d4ce344 100644
--- a/bigframes/dataframe.py
+++ b/bigframes/dataframe.py
@@ -455,7 +455,7 @@ def __getattr__(self, key: str):
             raise AttributeError(key)
 
     def __repr__(self) -> str:
-        """Converts a DataFrame to a string. Calls compute.
+        """Converts a DataFrame to a string. Calls to_pandas.
 
         Only represents the first `bigframes.options.display.max_rows`.
         """
@@ -532,13 +532,14 @@ def _apply_binop(
         other: float | int | bigframes.series.Series | DataFrame,
         op,
         axis: str | int = "columns",
+        how: str = "outer",
     ):
         if isinstance(other, (float, int)):
             return self._apply_scalar_binop(other, op)
         elif isinstance(other, bigframes.series.Series):
-            return self._apply_series_binop(other, op, axis=axis)
+            return self._apply_series_binop(other, op, axis=axis, how=how)
         elif isinstance(other, DataFrame):
-            return self._apply_dataframe_binop(other, op)
+            return self._apply_dataframe_binop(other, op, how=how)
         raise NotImplementedError(
             f"binary operation is not implemented on the second operand of type {type(other).__name__}."
             f"{constants.FEEDBACK_LINK}"
@@ -559,6 +560,7 @@ def _apply_series_binop(
         other: bigframes.series.Series,
         op: ops.BinaryOp,
         axis: str | int = "columns",
+        how: str = "outer",
     ) -> DataFrame:
         if axis not in ("columns", "index", 0, 1):
             raise ValueError(f"Invalid input: axis {axis}.")
@@ -569,7 +571,7 @@ def _apply_series_binop(
             )
 
         joined_index, (get_column_left, get_column_right) = self._block.index.join(
-            other._block.index, how="outer"
+            other._block.index, how=how
         )
 
         series_column_id = other._value.get_name()
@@ -591,22 +593,27 @@ def _apply_series_binop(
         return DataFrame(block)
 
     def _apply_dataframe_binop(
-        self,
-        other: DataFrame,
-        op: ops.BinaryOp,
+        self, other: DataFrame, op: ops.BinaryOp, how: str = "outer"
     ) -> DataFrame:
         # Join rows
         joined_index, (get_column_left, get_column_right) = self._block.index.join(
-            other._block.index, how="outer"
+            other._block.index, how=how
         )
         # join columns schema
+        # indexers will be none for exact match
         columns, lcol_indexer, rcol_indexer = self.columns.join(
-            other.columns, how="outer", return_indexers=True
+            other.columns, how=how, return_indexers=True
         )
 
         binop_result_ids = []
         block = joined_index._block
-        for left_index, right_index in zip(lcol_indexer, rcol_indexer):
+
+        column_indices = zip(
+            lcol_indexer if (lcol_indexer is not None) else range(len(columns)),
+            rcol_indexer if (lcol_indexer is not None) else range(len(columns)),
+        )
+
+        for left_index, right_index in column_indices:
             if left_index >= 0 and right_index >= 0:  # -1 indices indicate missing
                 left_col_id = self._block.value_columns[left_index]
                 right_col_id = other._block.value_columns[right_index]
@@ -617,13 +624,19 @@ def _apply_dataframe_binop(
                 )
                 binop_result_ids.append(result_col_id)
             elif left_index >= 0:
-                dtype = self.dtypes[left_index]
-                block, null_col_id = block.create_constant(None, dtype=dtype)
-                binop_result_ids.append(null_col_id)
+                left_col_id = self._block.value_columns[left_index]
+                block, result_col_id = block.apply_unary_op(
+                    get_column_left(left_col_id),
+                    ops.partial_right(op, None),
+                )
+                binop_result_ids.append(result_col_id)
             elif right_index >= 0:
-                dtype = other.dtypes[right_index]
-                block, null_col_id = block.create_constant(None, dtype=dtype)
-                binop_result_ids.append(null_col_id)
+                right_col_id = other._block.value_columns[right_index]
+                block, result_col_id = block.apply_unary_op(
+                    get_column_right(right_col_id),
+                    ops.partial_left(op, None),
+                )
+                binop_result_ids.append(result_col_id)
             else:
                 # Should not be possible
                 raise ValueError("No right or left index.")
@@ -759,6 +772,75 @@ def rpow(
 
     __rpow__ = rpow
 
+    def combine(
+        self,
+        other: DataFrame,
+        func: typing.Callable[
+            [bigframes.series.Series, bigframes.series.Series], bigframes.series.Series
+        ],
+        fill_value=None,
+        overwrite: bool = True,
+    ) -> DataFrame:
+        # Join rows
+        joined_index, (get_column_left, get_column_right) = self._block.index.join(
+            other._block.index, how="outer"
+        )
+        columns, lcol_indexer, rcol_indexer = self.columns.join(
+            other.columns, how="outer", return_indexers=True
+        )
+
+        column_indices = zip(
+            lcol_indexer if (lcol_indexer is not None) else range(len(columns)),
+            rcol_indexer if (lcol_indexer is not None) else range(len(columns)),
+        )
+
+        block = joined_index._block
+        results = []
+        for left_index, right_index in column_indices:
+            if left_index >= 0 and right_index >= 0:  # -1 indices indicate missing
+                left_col_id = get_column_left(self._block.value_columns[left_index])
+                right_col_id = get_column_right(other._block.value_columns[right_index])
+                left_series = bigframes.series.Series(block.select_column(left_col_id))
+                right_series = bigframes.series.Series(
+                    block.select_column(right_col_id)
+                )
+                if fill_value is not None:
+                    left_series = left_series.fillna(fill_value)
+                    right_series = right_series.fillna(fill_value)
+                results.append(func(left_series, right_series))
+            elif left_index >= 0:
+                # Does not exist in other
+                if overwrite:
+                    dtype = self.dtypes[left_index]
+                    block, null_col_id = block.create_constant(None, dtype=dtype)
+                    result = bigframes.series.Series(block.select_column(null_col_id))
+                    results.append(result)
+                else:
+                    left_col_id = get_column_left(self._block.value_columns[left_index])
+                    result = bigframes.series.Series(block.select_column(left_col_id))
+                    if fill_value is not None:
+                        result = result.fillna(fill_value)
+                    results.append(result)
+            elif right_index >= 0:
+                right_col_id = get_column_right(other._block.value_columns[right_index])
+                result = bigframes.series.Series(block.select_column(right_col_id))
+                if fill_value is not None:
+                    result = result.fillna(fill_value)
+                results.append(result)
+            else:
+                # Should not be possible
+                raise ValueError("No right or left index.")
+
+        if all([isinstance(val, bigframes.series.Series) for val in results]):
+            import bigframes.core.reshape as rs
+
+            return rs.concat(results, axis=1)
+        else:
+            raise ValueError("'func' must return Series")
+
+    def combine_first(self, other: DataFrame):
+        return self._apply_dataframe_binop(other, ops.fillna_op)
+
     def to_pandas(
         self,
         max_download_size: Optional[int] = None,
@@ -810,6 +892,28 @@ def head(self, n: int = 5) -> DataFrame:
     def tail(self, n: int = 5) -> DataFrame:
         return typing.cast(DataFrame, self.iloc[-n:])
 
+    def nlargest(
+        self,
+        n: int,
+        columns: typing.Union[blocks.Label, typing.Sequence[blocks.Label]],
+        keep: str = "first",
+    ) -> DataFrame:
+        if keep not in ("first", "last", "all"):
+            raise ValueError("'keep must be one of 'first', 'last', or 'all'")
+        column_ids = self._sql_names(columns)
+        return DataFrame(block_ops.nlargest(self._block, n, column_ids, keep=keep))
+
+    def nsmallest(
+        self,
+        n: int,
+        columns: typing.Union[blocks.Label, typing.Sequence[blocks.Label]],
+        keep: str = "first",
+    ) -> DataFrame:
+        if keep not in ("first", "last", "all"):
+            raise ValueError("'keep must be one of 'first', 'last', or 'all'")
+        column_ids = self._sql_names(columns)
+        return DataFrame(block_ops.nsmallest(self._block, n, column_ids, keep=keep))
+
     def drop(
         self,
         labels: typing.Any = None,
@@ -852,13 +956,50 @@ def drop(
             raise ValueError("Must specify 'labels' or 'index'/'columns")
         return DataFrame(block)
 
-    def droplevel(self, level: LevelsType):
-        resolved_level_ids = self._resolve_levels(level)
-        return DataFrame(self._block.drop_levels(resolved_level_ids))
+    def droplevel(self, level: LevelsType, axis: int | str = 0):
+        axis_n = utils.get_axis_number(axis)
+        if axis_n == 0:
+            resolved_level_ids = self._resolve_levels(level)
+            return DataFrame(self._block.drop_levels(resolved_level_ids))
+        else:
+            if isinstance(self.columns, pandas.MultiIndex):
+                new_df = self.copy()
+                new_df.columns = self.columns.droplevel(level)
+                return new_df
+            else:
+                raise ValueError("Columns must be a multiindex to drop levels.")
+
+    def swaplevel(self, i: int = -2, j: int = -1, axis: int | str = 0):
+        axis_n = utils.get_axis_number(axis)
+        if axis_n == 0:
+            level_i = self._block.index_columns[i]
+            level_j = self._block.index_columns[j]
+            mapping = {level_i: level_j, level_j: level_i}
+            reordering = [
+                mapping.get(index_id, index_id)
+                for index_id in self._block.index_columns
+            ]
+            return DataFrame(self._block.reorder_levels(reordering))
+        else:
+            if isinstance(self.columns, pandas.MultiIndex):
+                new_df = self.copy()
+                new_df.columns = self.columns.swaplevel(i, j)
+                return new_df
+            else:
+                raise ValueError("Columns must be a multiindex to reorder levels.")
 
-    def reorder_levels(self, order: LevelsType):
-        resolved_level_ids = self._resolve_levels(order)
-        return DataFrame(self._block.reorder_levels(resolved_level_ids))
+    def reorder_levels(self, order: LevelsType, axis: int | str = 0):
+        axis_n = utils.get_axis_number(axis)
+        if axis_n == 0:
+            resolved_level_ids = self._resolve_levels(order)
+            return DataFrame(self._block.reorder_levels(resolved_level_ids))
+        else:
+            if isinstance(self.columns, pandas.MultiIndex):
+                new_df = self.copy()
+                new_df.columns = self.columns.reorder_levels(order)
+                return new_df
+            else:
+                raise ValueError("Columns must be a multiindex to reorder levels.")
 
     def _resolve_levels(self, level: LevelsType) -> typing.Sequence[str]:
         if utils.is_list_like(level):
@@ -1096,8 +1237,177 @@ def add_suffix(self, suffix: str, axis: int | str | None = None) -> DataFrame:
         axis = 1 if axis is None else axis
         return DataFrame(self._get_block().add_suffix(suffix, axis))
 
+    def filter(
+        self,
+        items: typing.Optional[typing.Iterable] = None,
+        like: typing.Optional[str] = None,
+        regex: typing.Optional[str] = None,
+        axis: int | str | None = None,
+    ) -> DataFrame:
+        if sum([(items is not None), (like is not None), (regex is not None)]) != 1:
+            raise ValueError(
+                "Need to provide exactly one of 'items', 'like', or 'regex'"
+            )
+        axis_n = utils.get_axis_number(axis) if (axis is not None) else 1
+        if axis_n == 0:  # row labels
+            return self._filter_rows(items, like, regex)
+        else:  # column labels
+            return self._filter_columns(items, like, regex)
+
+    def _filter_rows(
+        self,
+        items: typing.Optional[typing.Iterable] = None,
+        like: typing.Optional[str] = None,
+        regex: typing.Optional[str] = None,
+    ) -> DataFrame:
+        if len(self._block.index_columns) > 1:
+            raise NotImplementedError(
+                "Method filter does not support rows multiindex. {constants.FEEDBACK_LINK}"
+            )
+        if (like is not None) or (regex is not None):
+            block = self._block
+            block, label_string_id = block.apply_unary_op(
+                self._block.index_columns[0],
+                ops.AsTypeOp(pandas.StringDtype(storage="pyarrow")),
+            )
+            if like is not None:
+                block, mask_id = block.apply_unary_op(
+                    label_string_id, ops.ContainsStringOp(pat=like)
+                )
+            else:  # regex
+                assert regex is not None
+                block, mask_id = block.apply_unary_op(
+                    label_string_id, ops.ContainsRegexOp(pat=regex)
+                )
+
+            block = block.filter(mask_id)
+            block = block.select_columns(self._block.value_columns)
+            return DataFrame(block)
+        elif items is not None:
+            # Behavior matches pandas 2.1+, older pandas versions would reindex
+            block = self._block
+            block, mask_id = block.apply_unary_op(
+                self._block.index_columns[0], ops.IsInOp(values=list(items))
+            )
+            block = block.filter(mask_id)
+            block = block.select_columns(self._block.value_columns)
+            return DataFrame(block)
+        else:
+            raise ValueError("Need to provide 'items', 'like', or 'regex'")
+
+    def _filter_columns(
+        self,
+        items: typing.Optional[typing.Iterable] = None,
+        like: typing.Optional[str] = None,
+        regex: typing.Optional[str] = None,
+    ) -> DataFrame:
+        if (like is not None) or (regex is not None):
+
+            def label_filter(label):
+                label_str = label if isinstance(label, str) else str(label)
+                if like:
+                    return like in label_str
+                else:  # regex
+                    return re.match(regex, label_str) is not None
+
+            cols = [
+                col_id
+                for col_id, label in zip(self._block.value_columns, self.columns)
+                if label_filter(label)
+            ]
+            return DataFrame(self._block.select_columns(cols))
+        if items is not None:
+            # Behavior matches pandas 2.1+, older pandas versions would reorder using order of items
+            new_columns = self.columns.intersection(pandas.Index(items))
+            return self.reindex(columns=new_columns)
+        else:
+            raise ValueError("Need to provide 'items', 'like', or 'regex'")
+
+    def reindex(
+        self,
+        labels=None,
+        *,
+        index=None,
+        columns=None,
+        axis: typing.Optional[typing.Union[str, int]] = None,
+        validate: typing.Optional[bool] = None,
+    ):
+        if labels:
+            if index or columns:
+                raise ValueError("Cannot specify both 'labels' and 'index'/'columns")
+            axis_n = utils.get_axis_number(axis) if (axis is not None) else 0
+            if axis_n == 0:
+                index = labels
+            else:
+                columns = labels
+        if (index is not None) and (columns is not None):
+            return self._reindex_columns(columns)._reindex_rows(
+                index, validate=validate or False
+            )
+        if index is not None:
+            return self._reindex_rows(index, validate=validate or False)
+        if columns is not None:
+            return self._reindex_columns(columns)
+
+    def _reindex_rows(
+        self,
+        index,
+        *,
+        validate: typing.Optional[bool] = None,
+    ):
+        if validate and not self.index.is_unique:
+            raise ValueError("Original index must be unique to reindex")
+        keep_original_names = False
+        if isinstance(index, indexes.Index):
+            new_indexer = DataFrame(data=index._data._get_block())[[]]
+        else:
+            if not isinstance(index, pandas.Index):
+                keep_original_names = True
+                index = pandas.Index(index)
+            if index.nlevels != self.index.nlevels:
+                raise NotImplementedError(
+                    "Cannot reindex with index with different nlevels"
+                )
+            new_indexer = DataFrame(index=index)[[]]
+        # multiindex join is senstive to index names, so we will set all these
+        result = new_indexer.rename_axis(range(new_indexer.index.nlevels)).join(
+            self.rename_axis(range(self.index.nlevels)),
+            how="left",
+        )
+        # and then reset the names after the join
+        return result.rename_axis(
+            self.index.names if keep_original_names else index.names
+        )
+
+    def _reindex_columns(self, columns):
+        block = self._block
+        new_column_index, indexer = self.columns.reindex(columns)
+        result_cols = []
+        for label, index in zip(columns, indexer):
+            if index >= 0:
+                result_cols.append(self._block.value_columns[index])
+            else:
+                block, null_col = block.create_constant(
+                    pandas.NA, label, dtype=pandas.Float64Dtype()
+                )
+                result_cols.append(null_col)
+        result_df = DataFrame(block.select_columns(result_cols))
+        result_df.columns = new_column_index
+        return result_df
+
+    def reindex_like(self, other: DataFrame, *, validate: typing.Optional[bool] = None):
+        return self.reindex(index=other.index, columns=other.columns, validate=validate)
+
     def fillna(self, value=None) -> DataFrame:
-        return self._apply_binop(value, ops.fillna_op)
+        return self._apply_binop(value, ops.fillna_op, how="left")
+
+    def ffill(self, *, limit: typing.Optional[int] = None) -> DataFrame:
+        window = bigframes.core.WindowSpec(preceding=limit, following=0)
+        return self._apply_window_op(agg_ops.LastNonNullOp(), window)
+
+    def bfill(self, *, limit: typing.Optional[int] = None) -> DataFrame:
+        window = bigframes.core.WindowSpec(preceding=0, following=limit)
+        return self._apply_window_op(agg_ops.FirstNonNullOp(), window)
 
     def isin(self, values) -> DataFrame:
         if utils.is_dict_like(values):
@@ -1309,6 +1619,14 @@ def describe(self) -> DataFrame:
         )
         return typing.cast(DataFrame, result)
 
+    def skew(self, *, numeric_only: bool = False):
+        if not numeric_only:
+            frame = self._raise_on_non_numeric("skew")
+        else:
+            frame = self._drop_non_numeric()
+        result_block = block_ops.skew(frame._block, frame._block.value_columns)
+        return bigframes.series.Series(result_block)
+
     def pivot(
         self,
         *,
@@ -1702,17 +2020,29 @@ def shift(self, periods: int = 1) -> DataFrame:
         )
         return self._apply_window_op(agg_ops.ShiftOp(periods), window)
 
+    def diff(self, periods: int = 1) -> DataFrame:
+        window = bigframes.core.WindowSpec(
+            preceding=periods if periods > 0 else None,
+            following=-periods if periods < 0 else None,
+        )
+        return self._apply_window_op(agg_ops.DiffOp(periods), window)
+
+    def pct_change(self, periods: int = 1) -> DataFrame:
+        # Future versions of pandas will not perfrom ffill automatically
+        df = self.ffill()
+        return DataFrame(block_ops.pct_change(df._block, periods=periods))
+
     def _apply_window_op(
         self,
         op: agg_ops.WindowOp,
         window_spec: bigframes.core.WindowSpec,
     ):
-        block = self._block.multi_apply_window_op(
+        block, result_ids = self._block.multi_apply_window_op(
             self._block.value_columns,
             op,
             window_spec=window_spec,
         )
-        return DataFrame(block)
+        return DataFrame(block.select_columns(result_ids))
 
     def sample(
         self,
@@ -1875,6 +2205,98 @@ def to_parquet(self, path: str, *, index: bool = True) -> None:
         _, query_job = self._block.expr._session._start_query(export_data_statement)
         self._set_internal_query_job(query_job)
 
+    def to_dict(
+        self,
+        orient: Literal[
+            "dict", "list", "series", "split", "tight", "records", "index"
+        ] = "dict",
+        into: type[dict] = dict,
+        **kwargs,
+    ) -> dict | list[dict]:
+        return self.to_pandas().to_dict(orient, into, **kwargs)  # type: ignore
+
+    def to_excel(self, excel_writer, sheet_name: str = "Sheet1", **kwargs) -> None:
+        return self.to_pandas().to_excel(excel_writer, sheet_name, **kwargs)
+
+    def to_latex(
+        self,
+        buf=None,
+        columns: Sequence | None = None,
+        header: bool | Sequence[str] = True,
+        index: bool = True,
+        **kwargs,
+    ) -> str | None:
+        return self.to_pandas().to_latex(
+            buf, columns=columns, header=header, index=index, **kwargs  # type: ignore
+        )
+
+    def to_records(
+        self, index: bool = True, column_dtypes=None, index_dtypes=None
+    ) -> numpy.recarray:
+        return self.to_pandas().to_records(index, column_dtypes, index_dtypes)
+
+    def to_string(
+        self,
+        buf=None,
+        columns: Sequence[str] | None = None,
+        col_space=None,
+        header: bool | Sequence[str] = True,
+        index: bool = True,
+        na_rep: str = "NaN",
+        formatters=None,
+        float_format=None,
+        sparsify: bool | None = None,
+        index_names: bool = True,
+        justify: str | None = None,
+        max_rows: int | None = None,
+        max_cols: int | None = None,
+        show_dimensions: bool = False,
+        decimal: str = ".",
+        line_width: int | None = None,
+        min_rows: int | None = None,
+        max_colwidth: int | None = None,
+        encoding: str | None = None,
+    ) -> str | None:
+        return self.to_pandas().to_string(
+            buf,
+            columns,  # type: ignore
+            col_space,
+            header,  # type: ignore
+            index,
+            na_rep,
+            formatters,
+            float_format,
+            sparsify,
+            index_names,
+            justify,
+            max_rows,
+            max_cols,
+            show_dimensions,
+            decimal,
+            line_width,
+            min_rows,
+            max_colwidth,
+            encoding,
+        )
+
+    def to_markdown(
+        self,
+        buf=None,
+        mode: str = "wt",
+        index: bool = True,
+        **kwargs,
+    ) -> str | None:
+        return self.to_pandas().to_markdown(buf, mode, index, **kwargs)  # type: ignore
+
+    def to_pickle(self, path, **kwargs) -> None:
+        return self.to_pandas().to_pickle(path, **kwargs)
+
+    def to_orc(self, path=None, **kwargs) -> bytes | None:
+        as_pandas = self.to_pandas()
+        # to_orc only works with default index
+        as_pandas_default_index = as_pandas.reset_index()
+        return as_pandas_default_index.to_orc(path, **kwargs)
+
     def _apply_unary_op(self, operation: ops.UnaryOp) -> DataFrame:
         block = self._block.multi_apply_unary_op(self._block.value_columns, operation)
         return DataFrame(block)
diff --git a/bigframes/dtypes.py b/bigframes/dtypes.py
index 95cf737b2e..af3209b0e1 100644
--- a/bigframes/dtypes.py
+++ b/bigframes/dtypes.py
@@ -157,7 +157,7 @@ def ibis_dtype_to_bigframes_dtype(
         return IBIS_TO_BIGFRAMES[ibis_dtype]
     else:
         raise ValueError(
-            f"Unexpected Ibis data type {type(ibis_dtype)}. {constants.FEEDBACK_LINK}"
+            f"Unexpected Ibis data type {ibis_dtype}. {constants.FEEDBACK_LINK}"
         )
 
 
diff --git a/bigframes/ml/base.py b/bigframes/ml/base.py
index 9f9d9f85d0..f07274f8fc 100644
--- a/bigframes/ml/base.py
+++ b/bigframes/ml/base.py
@@ -133,7 +133,7 @@ class TrainablePredictor(Predictor):
     Also the predictor can be attached to a pipeline with transformers."""
 
     @abc.abstractmethod
-    def fit(self, X, y, transforms):
+    def _fit(self, X, y, transforms=None):
         pass
 
     @abc.abstractmethod
@@ -146,6 +146,36 @@ def to_gbq(self, model_name, replace):
         pass
 
 
+class SupervisedTrainablePredictor(TrainablePredictor):
+    """A BigQuery DataFrames ML Supervised Model base class that can be used to fit and predict outputs.
+
+    Need to provide both X and y in supervised tasks."""
+
+    _T = TypeVar("_T", bound="SupervisedTrainablePredictor")
+
+    def fit(
+        self: _T,
+        X: Union[bpd.DataFrame, bpd.Series],
+        y: Union[bpd.DataFrame, bpd.Series],
+    ) -> _T:
+        return self._fit(X, y)
+
+
+class UnsupervisedTrainablePredictor(TrainablePredictor):
+    """A BigQuery DataFrames ML Unsupervised Model base class that can be used to fit and predict outputs.
+
+    Only need to provide both X (y is optional and ignored) in unsupervised tasks."""
+
+    _T = TypeVar("_T", bound="UnsupervisedTrainablePredictor")
+
+    def fit(
+        self: _T,
+        X: Union[bpd.DataFrame, bpd.Series],
+        y: Optional[Union[bpd.DataFrame, bpd.Series]] = None,
+    ) -> _T:
+        return self._fit(X, y)
+
+
 class Transformer(BaseEstimator):
     """A BigQuery DataFrames Transformer base class that transforms data.
 
diff --git a/bigframes/ml/cluster.py b/bigframes/ml/cluster.py
index 2501d2b21f..14cce2879e 100644
--- a/bigframes/ml/cluster.py
+++ b/bigframes/ml/cluster.py
@@ -28,13 +28,13 @@
 
 
 class KMeans(
+    base.UnsupervisedTrainablePredictor,
     third_party.bigframes_vendored.sklearn.cluster._kmeans.KMeans,
-    base.TrainablePredictor,
 ):
 
     __doc__ = third_party.bigframes_vendored.sklearn.cluster._kmeans.KMeans.__doc__
 
-    def __init__(self, n_clusters=8):
+    def __init__(self, n_clusters: int = 8):
         self.n_clusters = n_clusters
         self._bqml_model: Optional[core.BqmlModel] = None
 
@@ -58,7 +58,7 @@ def _bqml_options(self) -> Dict[str, str | int | float | List[str]]:
         """The model options as they will be set for BQML"""
         return {"model_type": "KMEANS", "num_clusters": self.n_clusters}
 
-    def fit(
+    def _fit(
         self,
         X: Union[bpd.DataFrame, bpd.Series],
         y=None,  # ignored
diff --git a/bigframes/ml/compose.py b/bigframes/ml/compose.py
index df01303ffa..a3d3503ad0 100644
--- a/bigframes/ml/compose.py
+++ b/bigframes/ml/compose.py
@@ -33,8 +33,8 @@
 
 
 class ColumnTransformer(
-    third_party.bigframes_vendored.sklearn.compose._column_transformer.ColumnTransformer,
     base.Transformer,
+    third_party.bigframes_vendored.sklearn.compose._column_transformer.ColumnTransformer,
 ):
     __doc__ = (
         third_party.bigframes_vendored.sklearn.compose._column_transformer.ColumnTransformer.__doc__
diff --git a/bigframes/ml/decomposition.py b/bigframes/ml/decomposition.py
index 75b57f2e54..0cfe3b3ddf 100644
--- a/bigframes/ml/decomposition.py
+++ b/bigframes/ml/decomposition.py
@@ -28,12 +28,12 @@
 
 
 class PCA(
+    base.UnsupervisedTrainablePredictor,
     third_party.bigframes_vendored.sklearn.decomposition._pca.PCA,
-    base.TrainablePredictor,
 ):
     __doc__ = third_party.bigframes_vendored.sklearn.decomposition._pca.PCA.__doc__
 
-    def __init__(self, n_components=3):
+    def __init__(self, n_components: int = 3):
         self.n_components = n_components
         self._bqml_model: Optional[core.BqmlModel] = None
 
@@ -52,7 +52,7 @@ def _from_bq(cls, session: bigframes.Session, model: bigquery.Model) -> PCA:
         new_pca._bqml_model = core.BqmlModel(session, model)
         return new_pca
 
-    def fit(
+    def _fit(
         self,
         X: Union[bpd.DataFrame, bpd.Series],
         y=None,
diff --git a/bigframes/ml/ensemble.py b/bigframes/ml/ensemble.py
index 56a0cc3d94..142edaa00f 100644
--- a/bigframes/ml/ensemble.py
+++ b/bigframes/ml/ensemble.py
@@ -48,8 +48,8 @@
 
 
 class XGBRegressor(
+    base.SupervisedTrainablePredictor,
     third_party.bigframes_vendored.xgboost.sklearn.XGBRegressor,
-    base.TrainablePredictor,
 ):
     __doc__ = third_party.bigframes_vendored.xgboost.sklearn.XGBRegressor.__doc__
 
@@ -57,22 +57,22 @@ def __init__(
         self,
         num_parallel_tree: int = 1,
         booster: Literal["gbtree", "dart"] = "gbtree",
-        dart_normalized_type: Literal["TREE", "FOREST"] = "TREE",
+        dart_normalized_type: Literal["tree", "forest"] = "tree",
         tree_method: Literal["auto", "exact", "approx", "hist"] = "auto",
         min_tree_child_weight: int = 1,
-        colsample_bytree=1.0,
-        colsample_bylevel=1.0,
-        colsample_bynode=1.0,
-        gamma=0.0,
+        colsample_bytree: float = 1.0,
+        colsample_bylevel: float = 1.0,
+        colsample_bynode: float = 1.0,
+        gamma: float = 0.0,
         max_depth: int = 6,
-        subsample=1.0,
-        reg_alpha=0.0,
-        reg_lambda=1.0,
-        early_stop=True,
-        learning_rate=0.3,
+        subsample: float = 1.0,
+        reg_alpha: float = 0.0,
+        reg_lambda: float = 1.0,
+        early_stop: float = True,
+        learning_rate: float = 0.3,
         max_iterations: int = 20,
-        min_rel_progress=0.01,
-        enable_global_explain=False,
+        min_rel_progress: float = 0.01,
+        enable_global_explain: bool = False,
         xgboost_version: Literal["0.9", "1.1"] = "0.9",
     ):
         self.num_parallel_tree = num_parallel_tree
@@ -143,7 +143,7 @@ def _bqml_options(self) -> Dict[str, str | int | bool | float | List[str]]:
             "xgboost_version": self.xgboost_version,
         }
 
-    def fit(
+    def _fit(
         self,
         X: Union[bpd.DataFrame, bpd.Series],
         y: Union[bpd.DataFrame, bpd.Series],
@@ -211,8 +211,8 @@ def to_gbq(self, model_name: str, replace: bool = False) -> XGBRegressor:
 
 
 class XGBClassifier(
+    base.SupervisedTrainablePredictor,
     third_party.bigframes_vendored.xgboost.sklearn.XGBClassifier,
-    base.TrainablePredictor,
 ):
 
     __doc__ = third_party.bigframes_vendored.xgboost.sklearn.XGBClassifier.__doc__
@@ -221,22 +221,22 @@ def __init__(
         self,
         num_parallel_tree: int = 1,
         booster: Literal["gbtree", "dart"] = "gbtree",
-        dart_normalized_type: Literal["TREE", "FOREST"] = "TREE",
+        dart_normalized_type: Literal["tree", "forest"] = "tree",
         tree_method: Literal["auto", "exact", "approx", "hist"] = "auto",
         min_tree_child_weight: int = 1,
-        colsample_bytree=1.0,
-        colsample_bylevel=1.0,
-        colsample_bynode=1.0,
-        gamma=0.0,
+        colsample_bytree: float = 1.0,
+        colsample_bylevel: float = 1.0,
+        colsample_bynode: float = 1.0,
+        gamma: float = 0.0,
         max_depth: int = 6,
-        subsample=1.0,
-        reg_alpha=0.0,
-        reg_lambda=1.0,
-        early_stop=True,
-        learning_rate=0.3,
+        subsample: float = 1.0,
+        reg_alpha: float = 0.0,
+        reg_lambda: float = 1.0,
+        early_stop: bool = True,
+        learning_rate: float = 0.3,
         max_iterations: int = 20,
-        min_rel_progress=0.01,
-        enable_global_explain=False,
+        min_rel_progress: float = 0.01,
+        enable_global_explain: bool = False,
         xgboost_version: Literal["0.9", "1.1"] = "0.9",
     ):
         self.num_parallel_tree = num_parallel_tree
@@ -307,7 +307,7 @@ def _bqml_options(self) -> Dict[str, str | int | bool | float | List[str]]:
             "xgboost_version": self.xgboost_version,
         }
 
-    def fit(
+    def _fit(
         self,
         X: Union[bpd.DataFrame, bpd.Series],
         y: Union[bpd.DataFrame, bpd.Series],
@@ -374,8 +374,8 @@ def to_gbq(self, model_name: str, replace: bool = False) -> XGBClassifier:
 
 
 class RandomForestRegressor(
+    base.SupervisedTrainablePredictor,
     third_party.bigframes_vendored.sklearn.ensemble._forest.RandomForestRegressor,
-    base.TrainablePredictor,
 ):
 
     __doc__ = (
@@ -461,7 +461,7 @@ def _bqml_options(self) -> Dict[str, str | int | bool | float | List[str]]:
             "xgboost_version": self.xgboost_version,
         }
 
-    def fit(
+    def _fit(
         self,
         X: Union[bpd.DataFrame, bpd.Series],
         y: Union[bpd.DataFrame, bpd.Series],
@@ -542,8 +542,8 @@ def to_gbq(self, model_name: str, replace: bool = False) -> RandomForestRegresso
 
 
 class RandomForestClassifier(
+    base.SupervisedTrainablePredictor,
     third_party.bigframes_vendored.sklearn.ensemble._forest.RandomForestClassifier,
-    base.TrainablePredictor,
 ):
 
     __doc__ = (
@@ -629,7 +629,7 @@ def _bqml_options(self) -> Dict[str, str | int | bool | float | List[str]]:
             "xgboost_version": self.xgboost_version,
         }
 
-    def fit(
+    def _fit(
         self,
         X: Union[bpd.DataFrame, bpd.Series],
         y: Union[bpd.DataFrame, bpd.Series],
diff --git a/bigframes/ml/forecasting.py b/bigframes/ml/forecasting.py
index b7e0553ecb..22d81294fc 100644
--- a/bigframes/ml/forecasting.py
+++ b/bigframes/ml/forecasting.py
@@ -27,7 +27,7 @@
 _PREDICT_OUTPUT_COLUMNS = ["forecast_timestamp", "forecast_value"]
 
 
-class ARIMAPlus(base.TrainablePredictor):
+class ARIMAPlus(base.SupervisedTrainablePredictor):
     """Time Series ARIMA Plus model."""
 
     def __init__(self):
@@ -48,7 +48,7 @@ def _bqml_options(self) -> Dict[str, str | int | bool | float | List[str]]:
         """The model options as they will be set for BQML."""
         return {"model_type": "ARIMA_PLUS"}
 
-    def fit(
+    def _fit(
         self,
         X: Union[bpd.DataFrame, bpd.Series],
         y: Union[bpd.DataFrame, bpd.Series],
diff --git a/bigframes/ml/linear_model.py b/bigframes/ml/linear_model.py
index 0b18db9315..1606a15d73 100644
--- a/bigframes/ml/linear_model.py
+++ b/bigframes/ml/linear_model.py
@@ -30,8 +30,8 @@
 
 
 class LinearRegression(
+    base.SupervisedTrainablePredictor,
     third_party.bigframes_vendored.sklearn.linear_model._base.LinearRegression,
-    base.TrainablePredictor,
 ):
     __doc__ = (
         third_party.bigframes_vendored.sklearn.linear_model._base.LinearRegression.__doc__
@@ -39,7 +39,7 @@ class LinearRegression(
 
     def __init__(
         self,
-        fit_intercept=True,
+        fit_intercept: bool = True,
     ):
         self.fit_intercept = fit_intercept
         self._bqml_model: Optional[core.BqmlModel] = None
@@ -71,7 +71,7 @@ def _bqml_options(self) -> Dict[str, str | int | bool | float | List[str]]:
             "fit_intercept": self.fit_intercept,
         }
 
-    def fit(
+    def _fit(
         self,
         X: Union[bpd.DataFrame, bpd.Series],
         y: Union[bpd.DataFrame, bpd.Series],
@@ -136,8 +136,8 @@ def to_gbq(self, model_name: str, replace: bool = False) -> LinearRegression:
 
 
 class LogisticRegression(
+    base.SupervisedTrainablePredictor,
     third_party.bigframes_vendored.sklearn.linear_model._logistic.LogisticRegression,
-    base.TrainablePredictor,
 ):
     __doc__ = (
         third_party.bigframes_vendored.sklearn.linear_model._logistic.LogisticRegression.__doc__
@@ -189,12 +189,13 @@ def _bqml_options(self) -> Dict[str, str | int | float | List[str]]:
             # "class_weights": self.class_weights,
         }
 
-    def fit(
+    def _fit(
         self,
         X: Union[bpd.DataFrame, bpd.Series],
         y: Union[bpd.DataFrame, bpd.Series],
         transforms: Optional[List[str]] = None,
     ) -> LogisticRegression:
+        """Fit model with transforms."""
         X, y = utils.convert_to_dataframe(X, y)
 
         self._bqml_model = core.create_bqml_model(
diff --git a/bigframes/ml/llm.py b/bigframes/ml/llm.py
index 345e3deb72..973fbf2ad9 100644
--- a/bigframes/ml/llm.py
+++ b/bigframes/ml/llm.py
@@ -100,26 +100,26 @@ def predict(
                 Temperature controls the degree of randomness in token selection. Lower temperatures are good for prompts that expect a true or correct response,
                 while higher temperatures can lead to more diverse or unexpected results. A temperature of 0 is deterministic:
                 the highest probability token is always selected. For most use cases, try starting with a temperature of 0.2.
-                Default 0.
+                Default 0. Possible values [0.0, 1.0].
 
             max_output_tokens (int, default 128):
                 Maximum number of tokens that can be generated in the response. Specify a lower value for shorter responses and a higher value for longer responses.
                 A token may be smaller than a word. A token is approximately four characters. 100 tokens correspond to roughly 60-80 words.
-                Default 128.
+                Default 128. Possible values [1, 1024].
 
             top_k (int, default 40):
                 Top-k changes how the model selects tokens for output. A top-k of 1 means the selected token is the most probable among all tokens
                 in the model’s vocabulary (also called greedy decoding), while a top-k of 3 means that the next token is selected from among the 3 most probable tokens (using temperature).
                 For each token selection step, the top K tokens with the highest probabilities are sampled. Then tokens are further filtered based on topP with the final token selected using temperature sampling.
                 Specify a lower value for less random responses and a higher value for more random responses.
-                Default 40.
+                Default 40. Possible values [1, 40].
 
             top_p (float, default 0.95)::
                 Top-p changes how the model selects tokens for output. Tokens are selected from most K (see topK parameter) probable to least until the sum of their probabilities equals the top-p value.
                 For example, if tokens A, B, and C have a probability of 0.3, 0.2, and 0.1 and the top-p value is 0.5, then the model will select either A or B as the next token (using temperature)
                 and not consider C at all.
                 Specify a lower value for less random responses and a higher value for more random responses.
-                Default 0.95.
+                Default 0.95. Possible values [0.0, 1.0].
 
 
         Returns:
diff --git a/bigframes/ml/pipeline.py b/bigframes/ml/pipeline.py
index bfd0392526..bff0bf36ad 100644
--- a/bigframes/ml/pipeline.py
+++ b/bigframes/ml/pipeline.py
@@ -24,14 +24,14 @@
 
 import bigframes
 import bigframes.constants as constants
-from bigframes.ml import base, compose, loader, preprocessing, utils
+from bigframes.ml import base, compose, forecasting, loader, preprocessing, utils
 import bigframes.pandas as bpd
 import third_party.bigframes_vendored.sklearn.pipeline
 
 
 class Pipeline(
-    third_party.bigframes_vendored.sklearn.pipeline.Pipeline,
     base.BaseEstimator,
+    third_party.bigframes_vendored.sklearn.pipeline.Pipeline,
 ):
     __doc__ = third_party.bigframes_vendored.sklearn.pipeline.Pipeline.__doc__
 
@@ -55,7 +55,7 @@ def __init__(self, steps: List[Tuple[str, base.BaseEstimator]]):
             self._transform = transform
         else:
             raise NotImplementedError(
-                f"Transform {transform} is not yet supported by Pipeline. {constants.FEEDBACK_LINK}"
+                f"Transformer type {type(transform)} is not yet supported by Pipeline. {constants.FEEDBACK_LINK}"
             )
 
         if not isinstance(
@@ -63,7 +63,13 @@ def __init__(self, steps: List[Tuple[str, base.BaseEstimator]]):
             base.TrainablePredictor,
         ):
             raise NotImplementedError(
-                f"Estimator {estimator} is not supported by Pipeline. {constants.FEEDBACK_LINK}"
+                f"Estimator type {type(estimator)} is not supported by Pipeline. {constants.FEEDBACK_LINK}"
+            )
+
+        # BQML doesn't support ARIMA_PLUS with transformers. b/298676367
+        if isinstance(estimator, forecasting.ARIMAPlus):
+            raise NotImplementedError(
+                f"Estimator type {type(estimator)} is not supported by Pipeline. {constants.FEEDBACK_LINK}"
             )
 
         self._transform = transform
@@ -92,7 +98,7 @@ def fit(
             (y,) = utils.convert_to_dataframe(y)
             transform_sqls.extend(y.columns.tolist())
 
-        self._estimator.fit(X=X, y=y, transforms=transform_sqls)
+        self._estimator._fit(X=X, y=y, transforms=transform_sqls)
         return self
 
     def predict(self, X: Union[bpd.DataFrame, bpd.Series]) -> bpd.DataFrame:
diff --git a/bigframes/ml/preprocessing.py b/bigframes/ml/preprocessing.py
index ee46a37052..8add7bdd76 100644
--- a/bigframes/ml/preprocessing.py
+++ b/bigframes/ml/preprocessing.py
@@ -29,8 +29,8 @@
 
 
 class StandardScaler(
-    third_party.bigframes_vendored.sklearn.preprocessing._data.StandardScaler,
     base.Transformer,
+    third_party.bigframes_vendored.sklearn.preprocessing._data.StandardScaler,
 ):
     __doc__ = (
         third_party.bigframes_vendored.sklearn.preprocessing._data.StandardScaler.__doc__
@@ -105,8 +105,8 @@ def transform(self, X: Union[bpd.DataFrame, bpd.Series]) -> bpd.DataFrame:
 
 
 class OneHotEncoder(
-    third_party.bigframes_vendored.sklearn.preprocessing._encoder.OneHotEncoder,
     base.Transformer,
+    third_party.bigframes_vendored.sklearn.preprocessing._encoder.OneHotEncoder,
 ):
     # BQML max value https://cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-one-hot-encoder#syntax
     TOP_K_DEFAULT = 1000000
diff --git a/bigframes/operations/aggregations.py b/bigframes/operations/aggregations.py
index 874c264194..23271e8220 100644
--- a/bigframes/operations/aggregations.py
+++ b/bigframes/operations/aggregations.py
@@ -19,8 +19,10 @@
 import ibis
 import ibis.expr.datatypes as ibis_dtypes
 import ibis.expr.types as ibis_types
+from pandas import Int64Dtype
 
 import bigframes.constants as constants
+import bigframes.dtypes as dtypes
 import third_party.bigframes_vendored.ibis.expr.operations as vendored_ibis_ops
 
 
@@ -202,6 +204,16 @@ def _as_ibis(self, x: ibis_types.Column, window=None) -> ibis_types.Value:
         )
 
 
+class PopVarOp(AggregateOp):
+    name = "popvar"
+
+    @numeric_op
+    def _as_ibis(self, x: ibis_types.Column, window=None) -> ibis_types.Value:
+        return _apply_window_if_present(
+            typing.cast(ibis_types.NumericColumn, x).var(how="pop"), window
+        )
+
+
 class CountOp(AggregateOp):
     name = "count"
 
@@ -217,16 +229,20 @@ def skips_nulls(self):
 
 class CutOp(WindowOp):
     def __init__(self, bins: int):
-        self._bins = bins
+        self._bins_ibis = dtypes.literal_to_ibis_scalar(bins, force_dtype=Int64Dtype())
+        self._bins_int = bins
 
     def _as_ibis(self, x: ibis_types.Column, window=None):
         col_min = _apply_window_if_present(x.min(), window)
         col_max = _apply_window_if_present(x.max(), window)
-        bin_width = (col_max - col_min) / self._bins
+        bin_width = (col_max - col_min) / self._bins_ibis
         out = ibis.case()
-        for bin in range(self._bins - 1):
-            out = out.when(x <= (col_min + (bin + 1) * bin_width), bin)
-        out = out.when(x.notnull(), self._bins - 1)
+        for this_bin in range(self._bins_int - 1):
+            out = out.when(
+                x <= (col_min + (this_bin + 1) * bin_width),
+                dtypes.literal_to_ibis_scalar(this_bin, force_dtype=Int64Dtype()),
+            )
+        out = out.when(x.notnull(), self._bins_ibis - 1)
         return out.end()
 
     @property
@@ -305,6 +321,28 @@ def _as_ibis(self, column: ibis_types.Column, window=None) -> ibis_types.Value:
         return _apply_window_if_present(column.first(), window)
 
 
+class FirstNonNullOp(WindowOp):
+    @property
+    def skips_nulls(self):
+        return False
+
+    def _as_ibis(self, column: ibis_types.Column, window=None) -> ibis_types.Value:
+        return _apply_window_if_present(
+            vendored_ibis_ops.FirstNonNullValue(column).to_expr(), window  # type: ignore
+        )
+
+
+class LastNonNullOp(WindowOp):
+    @property
+    def skips_nulls(self):
+        return False
+
+    def _as_ibis(self, column: ibis_types.Column, window=None) -> ibis_types.Value:
+        return _apply_window_if_present(
+            vendored_ibis_ops.LastNonNullValue(column).to_expr(), window  # type: ignore
+        )
+
+
 class ShiftOp(WindowOp):
     def __init__(self, periods: int):
         self._periods = periods
@@ -321,6 +359,28 @@ def skips_nulls(self):
         return False
 
 
+class DiffOp(WindowOp):
+    def __init__(self, periods: int):
+        self._periods = periods
+
+    def _as_ibis(self, column: ibis_types.Column, window=None) -> ibis_types.Value:
+        shifted = ShiftOp(self._periods)._as_ibis(column, window)
+        if column.type().is_boolean():
+            return typing.cast(ibis_types.BooleanColumn, column) != typing.cast(
+                ibis_types.BooleanColumn, shifted
+            )
+        elif column.type().is_numeric():
+            return typing.cast(ibis_types.NumericColumn, column) - typing.cast(
+                ibis_types.NumericColumn, shifted
+            )
+        else:
+            raise TypeError(f"Cannot perform diff on type{column.type()}")
+
+    @property
+    def skips_nulls(self):
+        return False
+
+
 class AllOp(AggregateOp):
     def _as_ibis(
         self, column: ibis_types.Column, window=None
diff --git a/bigframes/pandas/__init__.py b/bigframes/pandas/__init__.py
index e1a23e67a1..cb27834590 100644
--- a/bigframes/pandas/__init__.py
+++ b/bigframes/pandas/__init__.py
@@ -399,6 +399,7 @@ def remote_function(
     dataset: Optional[str] = None,
     bigquery_connection: Optional[str] = None,
     reuse: bool = True,
+    name: Optional[str] = None,
 ):
     return global_session.with_default_session(
         bigframes.session.Session.remote_function,
@@ -407,6 +408,7 @@ def remote_function(
         dataset=dataset,
         bigquery_connection=bigquery_connection,
         reuse=reuse,
+        name=name,
     )
 
 
diff --git a/bigframes/remote_function.py b/bigframes/remote_function.py
index 6932e5b580..6fc2f8e59f 100644
--- a/bigframes/remote_function.py
+++ b/bigframes/remote_function.py
@@ -28,6 +28,8 @@
 import textwrap
 from typing import List, NamedTuple, Optional, Sequence, TYPE_CHECKING
 
+import requests
+
 if TYPE_CHECKING:
     from bigframes.session import Session
 
@@ -99,7 +101,7 @@ def get_remote_function_locations(bq_location):
 
 
 def _get_hash(def_):
-    "Get hash of a function."
+    "Get hash (32 digits alphanumeric) of a function."
     def_repr = cloudpickle.dumps(def_, protocol=_pickle_protocol_version)
     return hashlib.md5(def_repr).hexdigest()
 
@@ -128,7 +130,7 @@ class IbisSignature(NamedTuple):
 
 
 def get_cloud_function_name(def_, uniq_suffix=None):
-    """Get the name of the cloud function."""
+    "Get a name for the cloud function for the given user defined function."
     cf_name = _get_hash(def_)
     cf_name = f"bigframes-{cf_name}"  # for identification
     if uniq_suffix:
@@ -137,7 +139,7 @@ def get_cloud_function_name(def_, uniq_suffix=None):
 
 
 def get_remote_function_name(def_, uniq_suffix=None):
-    """Get the name for the BQ remote function."""
+    "Get a name for the BQ remote function for the given user defined function."
     bq_rf_name = _get_hash(def_)
     bq_rf_name = f"bigframes_{bq_rf_name}"  # for identification
     if uniq_suffix:
@@ -206,9 +208,15 @@ def create_bq_remote_function(
         query_job.result()  # Wait for the job to complete.
         logger.info(f"Created remote function {query_job.ddl_target_routine}")
 
+    def get_cloud_function_fully_qualified_parent(self):
+        "Get the fully qualilfied parent for a cloud function."
+        return self._cloud_functions_client.common_location_path(
+            self._gcp_project_id, self._cloud_function_region
+        )
+
     def get_cloud_function_fully_qualified_name(self, name):
         "Get the fully qualilfied name for a cloud function."
-        return "projects/{}/locations/{}/functions/{}".format(
+        return self._cloud_functions_client.function_path(
             self._gcp_project_id, self._cloud_function_region, name
         )
 
@@ -319,6 +327,7 @@ def create_cloud_function(self, def_, cf_name):
         # Build and deploy folder structure containing cloud function
         with tempfile.TemporaryDirectory() as dir:
             entry_point = self.generate_cloud_function_code(def_, dir)
+            archive_path = shutil.make_archive(dir, "zip", dir)
 
             # We are creating cloud function source code from the currently running
             # python version. Use the same version to deploy. This is necessary
@@ -331,50 +340,56 @@ def create_cloud_function(self, def_, cf_name):
                 sys.version_info.major, sys.version_info.minor
             )
 
-            # deploy/redeploy the cloud function
-            # TODO(shobs): Figure out a way to skip this step if a cloud function
-            # already exists with the same name and source code
-            command = (
-                "gcloud functions deploy"
-                + f" {cf_name} --gen2"
-                + f" --runtime={python_version}"
-                + f" --project={self._gcp_project_id}"
-                + f" --region={self._cloud_function_region}"
-                + f" --source={dir}"
-                + f" --entry-point={entry_point}"
-                + " --trigger-http"
+            # Determine an upload URL for user code
+            upload_url_request = functions_v2.GenerateUploadUrlRequest()
+            upload_url_request.parent = self.get_cloud_function_fully_qualified_parent()
+            upload_url_response = self._cloud_functions_client.generate_upload_url(
+                request=upload_url_request
             )
 
-            # If the cloud function is being created for the first time, then let's
-            # make it not allow unauthenticated calls. If it was previously created
-            # then this invocation will update it, in which case do not touch that
-            # aspect and let the previous policy hold. The reason we do this is to
-            # avoid an IAM permission needed to update the invocation policy.
-            # For example, when a cloud function is being created for the first
-            # time, i.e.
-            # $ gcloud functions deploy python-foo-http --gen2 --runtime=python310
-            #       --region=us-central1
-            #       --source=/source/code/dir
-            #       --entry-point=foo_http
-            #       --trigger-http
-            #       --no-allow-unauthenticated
-            # It works. When an invocation of the same command is done for the
-            # second time, it may run into an error like:
-            # ERROR: (gcloud.functions.deploy) PERMISSION_DENIED: Permission
-            # 'run.services.setIamPolicy' denied on resource
-            # 'projects/my_project/locations/us-central1/services/python-foo-http' (or resource may not exist)
-            # But when --no-allow-unauthenticated is omitted then it goes through.
-            # It suggests that in the second invocation the command is trying to set
-            # the IAM policy of the service, and the user running BigQuery
-            # DataFrame may not have privilege to do so, so better avoid this
-            # if we can.
-            if self.get_cloud_function_endpoint(cf_name):
-                logger.info(f"Updating existing cloud function: {command}")
-            else:
-                command = f"{command} --no-allow-unauthenticated"
-                logger.info(f"Creating new cloud function: {command}")
-
-            _run_system_command(command)
+            # Upload the code to GCS
+            with open(archive_path, "rb") as f:
+                response = requests.put(
+                    upload_url_response.upload_url,
+                    data=f,
+                    headers={"content-type": "application/zip"},
+                )
+                if response.status_code != 200:
+                    raise RuntimeError(
+                        "Failed to upload user code. code={}, reason={}, text={}".format(
+                            response.status_code, response.reason, response.text
+                        )
+                    )
+
+            # Deploy Cloud Function
+            create_function_request = functions_v2.CreateFunctionRequest()
+            create_function_request.parent = (
+                self.get_cloud_function_fully_qualified_parent()
+            )
+            create_function_request.function_id = cf_name
+            function = functions_v2.Function()
+            function.name = self.get_cloud_function_fully_qualified_name(cf_name)
+            function.build_config = functions_v2.BuildConfig()
+            function.build_config.runtime = python_version
+            function.build_config.entry_point = entry_point
+            function.build_config.source = functions_v2.Source()
+            function.build_config.source.storage_source = functions_v2.StorageSource()
+            function.build_config.source.storage_source.bucket = (
+                upload_url_response.storage_source.bucket
+            )
+            function.build_config.source.storage_source.object_ = (
+                upload_url_response.storage_source.object_
+            )
+            create_function_request.function = function
+
+            # Create the cloud function and wait for it to be ready to use
+            operation = self._cloud_functions_client.create_function(
+                request=create_function_request
+            )
+            operation.result()
+
+            # Cleanup
+            os.remove(archive_path)
 
         # Fetch the endpoint of the just created function
         endpoint = self.get_cloud_function_endpoint(cf_name)
@@ -389,23 +404,47 @@ def create_cloud_function(self, def_, cf_name):
         return endpoint
 
     def provision_bq_remote_function(
-        self, def_, input_types, output_type, uniq_suffix=None
+        self,
+        def_,
+        input_types,
+        output_type,
+        reuse,
+        name,
     ):
         """Provision a BigQuery remote function."""
-        # Derive the name of the underlying cloud function and first create
-        # it if it does not exist
+        # If reuse of any existing function with the same name (indicated by the
+        # same hash of its source code) is not intended, then attach a unique
+        # suffix to the intended function name to make it unique.
+        uniq_suffix = None
+        if not reuse:
+            uniq_suffix = "".join(
+                random.choices(string.ascii_lowercase + string.digits, k=8)
+            )
+
+        # Derive the name of the cloud function underlying the intended BQ
+        # remote function
         cloud_function_name = get_cloud_function_name(def_, uniq_suffix)
         cf_endpoint = self.get_cloud_function_endpoint(cloud_function_name)
+
+        # Create the cloud function if it does not exist
         if not cf_endpoint:
-            self.check_cloud_function_tools_and_permissions()
             cf_endpoint = self.create_cloud_function(def_, cloud_function_name)
         else:
             logger.info(f"Cloud function {cloud_function_name} already exists.")
 
-        # Derive the name of the remote function and create/replace it if needed
-        remote_function_name = get_remote_function_name(def_, uniq_suffix)
+        # Derive the name of the remote function
+        remote_function_name = name
+        if not remote_function_name:
+            remote_function_name = get_remote_function_name(def_, uniq_suffix)
         rf_endpoint, rf_conn = self.get_remote_function_specs(remote_function_name)
-        if rf_endpoint != cf_endpoint or rf_conn != self._bq_connection_id:
+
+        # Create the BQ remote function in following circumstances:
+        # 1. It does not exist
+        # 2. It exists but the existing remote function has different
+        #    configuration than intended
+        if not rf_endpoint or (
+            rf_endpoint != cf_endpoint or rf_conn != self._bq_connection_id
+        ):
             input_args = inspect.getargs(def_.__code__).args
             if len(input_args) != len(input_types):
                 raise ValueError(
@@ -439,27 +478,6 @@ def get_remote_function_specs(self, remote_function_name):
                 break
         return (http_endpoint, bq_connection)
 
-    def check_cloud_function_tools_and_permissions(self):
-        """Check if the necessary tools and permissions are in place for creating remote function"""
-        # gcloud CLI comes with bq CLI and they are required for creating google
-        # cloud function and BigQuery remote function respectively
-        if not shutil.which("gcloud"):
-            raise ValueError(
-                "gcloud tool not installed, install it from https://cloud.google.com/sdk/docs/install. "
-                f"{constants.FEEDBACK_LINK}"
-            )
-
-        # TODO(shobs): Check for permissions too
-        # I (shobs) tried the following method
-        # $ gcloud asset search-all-iam-policies \
-        #   --format=json \
-        #   --scope=projects/{gcp_project_id} \
-        #   --query='policy.role.permissions:cloudfunctions.functions.create'
-        # as a proxy to all the privilges necessary to create cloud function
-        # https://cloud.google.com/functions/docs/reference/iam/roles#cloudfunctions.developer
-        # but that itself required the runner to have the permission to enable
-        # `cloudasset.googleapis.com`
-
 
 def remote_function_node(
     routine_ref: bigquery.RoutineReference, ibis_signature: IbisSignature
@@ -583,6 +601,7 @@ def remote_function(
     dataset: Optional[str] = None,
     bigquery_connection: Optional[str] = None,
     reuse: bool = True,
+    name: Optional[str] = None,
 ):
     """Decorator to turn a user defined function into a BigQuery remote function.
 
@@ -613,7 +632,7 @@ def remote_function(
             * BigQuery Data Editor (roles/bigquery.dataEditor)
             * BigQuery Connection Admin (roles/bigquery.connectionAdmin)
             * Cloud Functions Developer (roles/cloudfunctions.developer)
-            * Service Account User (roles/iam.serviceAccountUser)
+            * Service Account User (roles/iam.serviceAccountUser) on the service account `PROJECT_NUMBER-compute@developer.gserviceaccount.com`
             * Storage Object Viewer (roles/storage.objectViewer)
             * Project IAM Admin (roles/resourcemanager.projectIamAdmin) (Only required if the bigquery connection being used is not pre-created and is created dynamically with user credentials.)
 
@@ -664,10 +683,16 @@ def remote_function(
         reuse (bool, Optional):
             Reuse the remote function if is already exists.
             `True` by default, which results in reusing an existing remote
-            function (if any) that was previously created for the same udf.
-            Setting it to false forces the creation of creating a unique remote function.
+            function and corresponding cloud function (if any) that was
+            previously created for the same udf.
+            Setting it to `False` forces the creation of a unique remote function.
             If the required remote function does not exist then it would be
             created irrespective of this param.
+        name (str, Optional):
+            Explicit name of the persisted BigQuery remote function. Use it with
+            caution, because two users working in the same project and dataset
+            could overwrite each other's remote functions if they use the same
+            persistent name.
 
     """
 
@@ -739,12 +764,6 @@ def remote_function(
             f"{constants.FEEDBACK_LINK}"
         )
 
-    uniq_suffix = None
-    if not reuse:
-        uniq_suffix = "".join(
-            random.choices(string.ascii_lowercase + string.digits, k=8)
-        )
-
     # Check connection_id with `LOCATION.CONNECTION_ID` or `PROJECT_ID.LOCATION.CONNECTION_ID` format.
     if bigquery_connection.count(".") == 1:
         bq_connection_location, bq_connection_id = bigquery_connection.split(".")
@@ -792,8 +811,13 @@ def wrapper(f):
             bigquery_connection,
             resource_manager_client,
         )
+
         rf_name, cf_name = remote_function_client.provision_bq_remote_function(
-            f, ibis_signature.input_types, ibis_signature.output_type, uniq_suffix
+            f,
+            ibis_signature.input_types,
+            ibis_signature.output_type,
+            reuse,
+            name,
         )
 
         node = remote_function_node(dataset_ref.routine(rf_name), ibis_signature)
diff --git a/bigframes/series.py b/bigframes/series.py
index 8eadee37ed..12e72c58b6 100644
--- a/bigframes/series.py
+++ b/bigframes/series.py
@@ -42,6 +42,7 @@
     STABLE_SORTS,
 )
 import bigframes.core.scalar as scalars
+import bigframes.core.utils as utils
 import bigframes.core.window
 import bigframes.dataframe
 import bigframes.dtypes
@@ -310,11 +311,20 @@ def drop(
         block = block.drop_columns([condition_id])
         return Series(block.select_column(self._value_column))
 
-    def droplevel(self, level: LevelsType):
+    def droplevel(self, level: LevelsType, axis: int | str = 0):
         resolved_level_ids = self._resolve_levels(level)
         return Series(self._block.drop_levels(resolved_level_ids))
 
-    def reorder_levels(self, order: LevelsType):
+    def swaplevel(self, i: int = -2, j: int = -1):
+        level_i = self._block.index_columns[i]
+        level_j = self._block.index_columns[j]
+        mapping = {level_i: level_j, level_j: level_i}
+        reordering = [
+            mapping.get(index_id, index_id) for index_id in self._block.index_columns
+        ]
+        return Series(self._block.reorder_levels(reordering))
+
+    def reorder_levels(self, order: LevelsType, axis: int | str = 0):
         resolved_level_ids = self._resolve_levels(order)
         return Series(self._block.reorder_levels(resolved_level_ids))
 
@@ -352,6 +362,14 @@ def cumsum(self) -> Series:
             agg_ops.sum_op, bigframes.core.WindowSpec(following=0)
         )
 
+    def ffill(self, *, limit: typing.Optional[int] = None) -> Series:
+        window = bigframes.core.WindowSpec(preceding=limit, following=0)
+        return self._apply_window_op(agg_ops.LastNonNullOp(), window)
+
+    def bfill(self, *, limit: typing.Optional[int] = None) -> Series:
+        window = bigframes.core.WindowSpec(preceding=0, following=limit)
+        return self._apply_window_op(agg_ops.FirstNonNullOp(), window)
+
     def cummax(self) -> Series:
         return self._apply_window_op(
             agg_ops.max_op, bigframes.core.WindowSpec(following=0)
@@ -375,7 +393,16 @@ def shift(self, periods: int = 1) -> Series:
         return self._apply_window_op(agg_ops.ShiftOp(periods), window)
 
     def diff(self, periods: int = 1) -> Series:
-        return self - self.shift(periods=periods)
+        window = bigframes.core.WindowSpec(
+            preceding=periods if periods > 0 else None,
+            following=-periods if periods < 0 else None,
+        )
+        return self._apply_window_op(agg_ops.DiffOp(periods), window)
+
+    def pct_change(self, periods: int = 1) -> Series:
+        # Future versions of pandas will not perfrom ffill automatically
+        series = self.ffill()
+        return Series(block_ops.pct_change(series._block, periods=periods))
 
     def rank(
         self,
@@ -390,6 +417,47 @@ def rank(
     def fillna(self, value=None) -> Series:
         return self._apply_binary_op(value, ops.fillna_op)
 
+    def replace(
+        self, to_replace: typing.Any, value: typing.Any = None, *, regex: bool = False
+    ):
+        if regex:
+            if not (isinstance(to_replace, str) and isinstance(value, str)):
+                raise NotImplementedError(
+                    f"replace regex mode only supports strings for 'to_replace' and 'value'. {constants.FEEDBACK_LINK}"
+                )
+            block, result_col = self._block.apply_unary_op(
+                self._value_column,
+                ops.ReplaceRegexOp(to_replace, value),
+                result_label=self.name,
+            )
+            return Series(block.select_column(result_col))
+        elif utils.is_dict_like(to_replace):
+            raise NotImplementedError(
+                f"Dict 'to_replace' not supported. {constants.FEEDBACK_LINK}"
+            )
+        elif utils.is_list_like(to_replace):
+            block, cond = self._block.apply_unary_op(
+                self._value_column, ops.IsInOp(to_replace)
+            )
+            block, result_col = block.apply_binary_op(
+                cond,
+                self._value_column,
+                ops.partial_arg1(ops.where_op, value),
+                result_label=self.name,
+            )
+            return Series(block.select_column(result_col))
+        else:  # Scalar
+            block, cond = self._block.apply_unary_op(
+                self._value_column, ops.BinopPartialLeft(ops.eq_op, to_replace)
+            )
+            block, result_col = block.apply_binary_op(
+                cond,
+                self._value_column,
+                ops.partial_arg1(ops.where_op, value),
+                result_label=self.name,
+            )
+            return Series(block.select_column(result_col))
+
     def dropna(
         self,
         *,
@@ -414,52 +482,16 @@ def tail(self, n: int = 5) -> Series:
     def nlargest(self, n: int = 5, keep: str = "first") -> Series:
         if keep not in ("first", "last", "all"):
             raise ValueError("'keep must be one of 'first', 'last', or 'all'")
-        block = self._block
-        if keep == "last":
-            block = block.reversed()
-        ordering = (
-            OrderingColumnReference(
-                self._value_column, direction=OrderingDirection.DESC
-            ),
+        return Series(
+            block_ops.nlargest(self._block, n, [self._value_column], keep=keep)
         )
-        block = block.order_by(ordering, stable=True)
-        if keep in ("first", "last"):
-            return Series(block.slice(0, n))
-        else:  # keep == "all":
-            block, counter = block.apply_window_op(
-                self._value_column,
-                agg_ops.rank_op,
-                window_spec=WindowSpec(ordering=ordering),
-            )
-            block, condition = block.apply_unary_op(
-                counter, ops.partial_right(ops.le_op, n)
-            )
-            block = block.filter(condition)
-            block = block.select_column(self._value_column)
-            return Series(block)
 
     def nsmallest(self, n: int = 5, keep: str = "first") -> Series:
         if keep not in ("first", "last", "all"):
             raise ValueError("'keep must be one of 'first', 'last', or 'all'")
-        block = self._block
-        if keep == "last":
-            block = block.reversed()
-        ordering = (OrderingColumnReference(self._value_column),)
-        block = block.order_by(ordering, stable=True)
-        if keep in ("first", "last"):
-            return Series(block.slice(0, n))
-        else:  # keep == "all":
-            block, counter = block.apply_window_op(
-                self._value_column,
-                agg_ops.rank_op,
-                window_spec=WindowSpec(ordering=ordering),
-            )
-            block, condition = block.apply_unary_op(
-                counter, ops.partial_right(ops.le_op, n)
-            )
-            block = block.filter(condition)
-            block = block.select_column(self._value_column)
-            return Series(block)
+        return Series(
+            block_ops.nsmallest(self._block, n, [self._value_column], keep=keep)
+        )
 
     def isin(self, values) -> "Series" | None:
         if not _is_list_like(values):
@@ -697,13 +729,9 @@ def _central_moment(self, n: int) -> float:
         """Useful helper for calculating central moment statistics"""
         # Nth central moment is mean((x-mean(x))^n)
         # See: https://en.wikipedia.org/wiki/Moment_(mathematics)
-        mean = self.mean()
-        mean_deltas = self - mean
-        delta_power = mean_deltas
-        # TODO(tbergeron): Replace with pow once implemented
-        for i in range(1, n):
-            delta_power = delta_power * mean_deltas
-        return delta_power.mean()
+        mean_deltas = self - self.mean()
+        delta_powers = mean_deltas**n
+        return delta_powers.mean()
 
     def agg(self, func: str | typing.Sequence[str]) -> scalars.Scalar | Series:
         if _is_list_like(func):
@@ -1096,6 +1124,85 @@ def add_prefix(self, prefix: str, axis: int | str | None = None) -> Series:
     def add_suffix(self, suffix: str, axis: int | str | None = None) -> Series:
         return Series(self._get_block().add_suffix(suffix))
 
+    def filter(
+        self,
+        items: typing.Optional[typing.Iterable] = None,
+        like: typing.Optional[str] = None,
+        regex: typing.Optional[str] = None,
+        axis: typing.Optional[typing.Union[str, int]] = None,
+    ) -> Series:
+        if (axis is not None) and utils.get_axis_number(axis) != 0:
+            raise ValueError(f"Invalid axis for series: {axis}")
+        if sum([(items is not None), (like is not None), (regex is not None)]) != 1:
+            raise ValueError(
+                "Need to provide exactly one of 'items', 'like', or 'regex'"
+            )
+        if len(self._block.index_columns) > 1:
+            raise NotImplementedError(
+                "Method filter does not support rows multiindex. {constants.FEEDBACK_LINK}"
+            )
+        if (like is not None) or (regex is not None):
+            block = self._block
+            block, label_string_id = block.apply_unary_op(
+                self._block.index_columns[0],
+                ops.AsTypeOp(pandas.StringDtype(storage="pyarrow")),
+            )
+            if like is not None:
+                block, mask_id = block.apply_unary_op(
+                    label_string_id, ops.ContainsStringOp(pat=like)
+                )
+            else:  # regex
+                assert regex is not None
+                block, mask_id = block.apply_unary_op(
+                    label_string_id, ops.ContainsRegexOp(pat=regex)
+                )
+
+            block = block.filter(mask_id)
+            block = block.select_columns([self._value_column])
+            return Series(block)
+        elif items is not None:
+            # Behavior matches pandas 2.1+, older pandas versions would reindex
+            block = self._block
+            block, mask_id = block.apply_unary_op(
+                self._block.index_columns[0], ops.IsInOp(values=list(items))
+            )
+            block = block.filter(mask_id)
+            block = block.select_columns([self._value_column])
+            return Series(block)
+        else:
+            raise ValueError("Need to provide 'items', 'like', or 'regex'")
+
+    def reindex(self, index=None, *, validate: typing.Optional[bool] = None):
+        if validate and not self.index.is_unique:
+            raise ValueError("Original index must be unique to reindex")
+        keep_original_names = False
+        if isinstance(index, indexes.Index):
+            new_indexer = bigframes.dataframe.DataFrame(data=index._data._get_block())[
+                []
+            ]
+        else:
+            if not isinstance(index, pandas.Index):
+                keep_original_names = True
+                index = pandas.Index(index)
+            if index.nlevels != self.index.nlevels:
+                raise NotImplementedError(
+                    "Cannot reindex with index with different nlevels"
+                )
+            new_indexer = bigframes.dataframe.DataFrame(index=index)[[]]
+        # multiindex join is senstive to index names, so we will set all these
+        result = new_indexer.rename_axis(range(new_indexer.index.nlevels)).join(
+            self.to_frame().rename_axis(range(self.index.nlevels)),
+            how="left",
+        )
+        # and then reset the names after the join
+        result_block = result.rename_axis(
+            self.index.names if keep_original_names else index.names
+        )._block
+        return Series(result_block)
+
+    def reindex_like(self, other: Series, *, validate: typing.Optional[bool] = None):
+        return self.reindex(other.index, validate=validate)
+
     def drop_duplicates(self, *, keep: str = "first") -> Series:
         block = block_ops.drop_duplicates(self._block, (self._value_column,), keep)
         return Series(block)
@@ -1216,14 +1323,7 @@ def to_xarray(self):
     def _throw_if_index_contains_duplicates(
         self, error_message: typing.Optional[str] = None
     ) -> None:
-        duplicates_block, _ = block_ops.indicate_duplicates(
-            self._get_block(), self._get_block().index_columns
-        )
-        duplicates_block = duplicates_block.with_column_labels(
-            ["values", "is_duplicate"]
-        )
-        duplicates_df = bigframes.dataframe.DataFrame(duplicates_block)
-        if duplicates_df["is_duplicate"].any():
+        if not self.index.is_unique:
             error_message = (
                 error_message
                 if error_message
diff --git a/bigframes/session.py b/bigframes/session.py
index 1744407772..3ca79a7b53 100644
--- a/bigframes/session.py
+++ b/bigframes/session.py
@@ -424,25 +424,30 @@ def read_gbq(
     ) -> dataframe.DataFrame:
         # TODO(b/281571214): Generate prompt to show the progress of read_gbq.
         if _is_query(query):
-            return self.read_gbq_query(
+            return self._read_gbq_query(
                 query,
                 index_col=index_col,
                 col_order=col_order,
                 max_results=max_results,
+                api_name="read_gbq",
             )
         else:
             # TODO(swast): Query the snapshot table but mark it as a
             # deterministic query so we can avoid serializing if we have a
             # unique index.
-            return self.read_gbq_table(
+            return self._read_gbq_table(
                 query,
                 index_col=index_col,
                 col_order=col_order,
                 max_results=max_results,
+                api_name="read_gbq",
             )
 
     def _query_to_destination(
-        self, query: str, index_cols: List[str]
+        self,
+        query: str,
+        index_cols: List[str],
+        api_name: str,
     ) -> Tuple[Optional[bigquery.TableReference], Optional[bigquery.QueryJob]]:
         # If there are no index columns, then there's no reason to cache to a
         # (clustered) session table, as we'll just have to query it again to
@@ -464,7 +469,7 @@ def _query_to_destination(
         # operations are as speedy as they can be.
         try:
             ibis_expr = self.ibis_client.sql(query)
-            return self._ibis_to_session_table(ibis_expr, index_cols), None
+            return self._ibis_to_session_table(ibis_expr, index_cols, api_name), None
         except google.api_core.exceptions.BadRequest:
             # Some SELECT statements still aren't compatible with CREATE TEMP
             # TABLE ... AS SELECT ... statements. For example, if the query has
@@ -490,15 +495,33 @@ def read_gbq_query(
 
         See also: :meth:`Session.read_gbq`.
         """
+        return self._read_gbq_query(
+            query=query,
+            index_col=index_col,
+            col_order=col_order,
+            max_results=max_results,
+            api_name="read_gbq_query",
+        )
+
+    def _read_gbq_query(
+        self,
+        query: str,
+        *,
+        index_col: Iterable[str] | str = (),
+        col_order: Iterable[str] = (),
+        max_results: Optional[int] = None,
+        api_name: str,
+    ) -> dataframe.DataFrame:
         # NOTE: This method doesn't (yet) exist in pandas or pandas-gbq, so
         # these docstrings are inline.
-
         if isinstance(index_col, str):
             index_cols = [index_col]
         else:
             index_cols = list(index_col)
 
-        destination, query_job = self._query_to_destination(query, index_cols)
+        destination, query_job = self._query_to_destination(
+            query, index_cols, api_name="read_gbq_query"
+        )
 
         # If there was no destination table, that means the query must have
         # been DDL or DML. Return some job metadata, instead.
@@ -535,6 +558,23 @@ def read_gbq_table(
 
         See also: :meth:`Session.read_gbq`.
         """
+        return self._read_gbq_table(
+            query=query,
+            index_col=index_col,
+            col_order=col_order,
+            max_results=max_results,
+            api_name="read_gbq_table",
+        )
+
+    def _read_gbq_table(
+        self,
+        query: str,
+        *,
+        index_col: Iterable[str] | str = (),
+        col_order: Iterable[str] = (),
+        max_results: Optional[int] = None,
+        api_name: str,
+    ) -> dataframe.DataFrame:
         if max_results and max_results <= 0:
             raise ValueError("`max_results` should be a positive number.")
 
@@ -646,7 +686,8 @@ def read_gbq_table(
                 # rows for which row numbers must be generated
                 table_expression = table_expression.limit(max_results)
             table_expression, ordering = self._create_sequential_ordering(
-                table_expression
+                table=table_expression,
+                api_name=api_name,
             )
             hidden_cols = (
                 (ordering.total_order_col.column_id,)
@@ -667,6 +708,7 @@ def read_gbq_table(
             hidden_cols=hidden_cols,
             ordering=ordering,
             is_total_ordering=is_total_ordering,
+            api_name=api_name,
         )
 
     def _read_gbq_with_ordering(
@@ -680,6 +722,7 @@ def _read_gbq_with_ordering(
         hidden_cols: Iterable[str] = (),
         ordering: core.ExpressionOrdering,
         is_total_ordering: bool = False,
+        api_name: str,
     ) -> dataframe.DataFrame:
         """Internal helper method that loads DataFrame from Google BigQuery given an ordering column.
 
@@ -698,6 +741,8 @@ def _read_gbq_with_ordering(
                 Columns that should be hidden. Ordering columns may (not always) be hidden
             ordering:
                 Column name to be used for ordering. If not supplied, a default ordering is generated.
+            api_name:
+                The name of the API method.
 
         Returns:
             A DataFrame representing results of the query or table.
@@ -723,7 +768,9 @@ def _read_gbq_with_ordering(
         if not is_total_ordering:
             # Rows are not ordered, we need to generate a default ordering and materialize it
             table_expression, ordering = self._create_sequential_ordering(
-                table_expression, index_cols
+                table=table_expression,
+                index_cols=index_cols,
+                api_name=api_name,
             )
         index_col_values = [table_expression[index_id] for index_id in index_cols]
         if not col_labels:
@@ -846,6 +893,11 @@ def read_pandas(self, pandas_dataframe: pandas.DataFrame) -> dataframe.DataFrame
         Returns:
             bigframes.dataframe.DataFrame: The BigQuery DataFrame.
         """
+        return self._read_pandas(pandas_dataframe, "read_pandas")
+
+    def _read_pandas(
+        self, pandas_dataframe: pandas.DataFrame, api_name: str
+    ) -> dataframe.DataFrame:
         col_labels, idx_labels = (
             pandas_dataframe.columns.to_list(),
             pandas_dataframe.index.names,
@@ -878,6 +930,7 @@ def read_pandas(self, pandas_dataframe: pandas.DataFrame) -> dataframe.DataFrame
 
         job_config = bigquery.LoadJobConfig(schema=schema)
         job_config.clustering_fields = cluster_cols
+        job_config.labels = {"bigframes-api": api_name}
 
         load_table_destination = self._create_session_table()
         load_job = self.bqclient.load_table_from_dataframe(
@@ -910,6 +963,7 @@ def read_pandas(self, pandas_dataframe: pandas.DataFrame) -> dataframe.DataFrame
             hidden_cols=(ordering_col,),
             ordering=ordering,
             is_total_ordering=True,
+            api_name=api_name,
         )
         return df
 
@@ -991,6 +1045,7 @@ def read_csv(
             job_config.autodetect = True
             job_config.field_delimiter = sep
             job_config.encoding = encoding
+            job_config.labels = {"bigframes-api": "read_csv"}
 
             # We want to match pandas behavior. If header is 0, no rows should be skipped, so we
             # do not need to set `skip_leading_rows`. If header is None, then there is no header.
@@ -1048,7 +1103,7 @@ def read_pickle(
                 pandas_obj.name = "0"
             bigframes_df = self.read_pandas(pandas_obj.to_frame())
             return bigframes_df[bigframes_df.columns[0]]
-        return self.read_pandas(pandas_obj)
+        return self._read_pandas(pandas_obj, "read_pickle")
 
     def read_parquet(
         self,
@@ -1063,6 +1118,7 @@ def read_parquet(
         job_config.create_disposition = bigquery.CreateDisposition.CREATE_IF_NEEDED
         job_config.source_format = bigquery.SourceFormat.PARQUET
         job_config.write_disposition = bigquery.WriteDisposition.WRITE_EMPTY
+        job_config.labels = {"bigframes-api": "read_parquet"}
 
         return self._read_bigquery_load_job(path, table, job_config=job_config)
 
@@ -1109,6 +1165,7 @@ def read_json(
             job_config.write_disposition = bigquery.WriteDisposition.WRITE_EMPTY
             job_config.autodetect = True
             job_config.encoding = encoding
+            job_config.labels = {"bigframes-api": "read_json"}
 
             return self._read_bigquery_load_job(
                 path_or_buf,
@@ -1176,7 +1233,10 @@ def _create_session_table(self) -> bigquery.TableReference:
         return dataset.table(table_name)
 
     def _create_sequential_ordering(
-        self, table: ibis_types.Table, index_cols: Iterable[str] = ()
+        self,
+        table: ibis_types.Table,
+        index_cols: Iterable[str] = (),
+        api_name: str = "",
     ) -> Tuple[ibis_types.Table, core.ExpressionOrdering]:
         # Since this might also be used as the index, don't use the default
         # "ordering ID" name.
@@ -1188,6 +1248,7 @@ def _create_sequential_ordering(
         table_ref = self._ibis_to_session_table(
             table,
             cluster_cols=list(index_cols) + [default_ordering_name],
+            api_name=api_name,
         )
         table = self.ibis_client.sql(f"SELECT * FROM `{table_ref.table_id}`")
         ordering_reference = core.OrderingColumnReference(default_ordering_name)
@@ -1199,7 +1260,10 @@ def _create_sequential_ordering(
         return table, ordering
 
     def _ibis_to_session_table(
-        self, table: ibis_types.Table, cluster_cols: Iterable[str]
+        self,
+        table: ibis_types.Table,
+        cluster_cols: Iterable[str],
+        api_name: str,
     ) -> bigquery.TableReference:
         clusterable_cols = [
             col for col in cluster_cols if _can_cluster(table[col].type())
@@ -1207,10 +1271,14 @@ def _ibis_to_session_table(
         return self._query_to_session_table(
             self.ibis_client.compile(table),
             cluster_cols=clusterable_cols,
+            api_name=api_name,
         )
 
     def _query_to_session_table(
-        self, query_text: str, cluster_cols: Iterable[str]
+        self,
+        query_text: str,
+        cluster_cols: Iterable[str],
+        api_name: str,
     ) -> bigquery.TableReference:
         if len(list(cluster_cols)) > _MAX_CLUSTER_COLUMNS:
             raise ValueError(
@@ -1236,6 +1304,7 @@ def _query_to_session_table(
         # otherwise we get `BadRequest: 400 OPTIONS on temporary tables are not
         # supported`.
         job_config.labels = {"source": "bigquery-dataframes-temp"}
+        job_config.labels["bigframes-api"] = api_name
 
         try:
             self._start_query(
@@ -1253,6 +1322,7 @@ def remote_function(
         dataset: Optional[str] = None,
         bigquery_connection: Optional[str] = None,
         reuse: bool = True,
+        name: Optional[str] = None,
     ):
         """Decorator to turn a user defined function into a BigQuery remote function.
 
@@ -1280,7 +1350,7 @@ def remote_function(
             * BigQuery Data Editor (roles/bigquery.dataEditor)
             * BigQuery Connection Admin (roles/bigquery.connectionAdmin)
             * Cloud Functions Developer (roles/cloudfunctions.developer)
-            * Service Account User (roles/iam.serviceAccountUser)
+            * Service Account User (roles/iam.serviceAccountUser) on the service account `PROJECT_NUMBER-compute@developer.gserviceaccount.com`
             * Storage Object Viewer (roles/storage.objectViewer)
             * Project IAM Admin (roles/resourcemanager.projectIamAdmin) (Only required if the bigquery connection being used is not pre-created and is created dynamically with user credentials.)
 
@@ -1311,10 +1381,16 @@ def remote_function(
             reuse (bool, Optional):
                 Reuse the remote function if already exists.
                 `True` by default, which will result in reusing an existing remote
-                function (if any) that was previously created for the same udf.
-                Setting it to false would force creating a unique remote function.
+                function and corresponding cloud function (if any) that was
+                previously created for the same udf.
+                Setting it to `False` would force creating a unique remote function.
                 If the required remote function does not exist then it would be
                 created irrespective of this param.
+            name (str, Optional):
+                Explicit name of the persisted BigQuery remote function. Use it with
+                caution, because two users working in the same project and dataset
+                could overwrite each other's remote functions if they use the same
+                persistent name.
         Returns:
             callable: A remote function object pointing to the cloud assets created
             in the background to support the remote execution. The cloud assets can be
@@ -1331,6 +1407,7 @@ def remote_function(
             dataset=dataset,
             bigquery_connection=bigquery_connection,
             reuse=reuse,
+            name=name,
         )
 
     def read_gbq_function(
diff --git a/noxfile.py b/noxfile.py
index 2355a9b27b..033bbfefe4 100644
--- a/noxfile.py
+++ b/noxfile.py
@@ -42,6 +42,7 @@
     "pytest",
     "pytest-cov",
     "pytest-asyncio",
+    "pytest-mock",
 ]
 UNIT_TEST_EXTERNAL_DEPENDENCIES: List[str] = []
 UNIT_TEST_LOCAL_DEPENDENCIES: List[str] = []
diff --git a/samples/snippets/remote_function.py b/samples/snippets/remote_function.py
index 37972672c3..9998a23eb2 100644
--- a/samples/snippets/remote_function.py
+++ b/samples/snippets/remote_function.py
@@ -39,11 +39,19 @@ def run_remote_function_and_read_gbq_function(project_id: str):
     # already created, BigQuery DataFrames will attempt to create one assuming
     # the necessary APIs and IAM permissions are setup in the project. In our
     # examples we would be using a pre-created connection named
-    # `bigframes-rf-conn`. Let's try a `pandas`-like use case in which we want
-    # to apply a user defined scalar function to every value in a `Series`, more
-    # specifically bucketize the `body_mass_g` value of the penguins, which is a
-    # real number, into a category, which is a string.
-    @bpd.remote_function([float], str, bigquery_connection="bigframes-rf-conn")
+    # `bigframes-rf-conn`. We will also set `reuse=False` to make sure we don't
+    # step over someone else creating remote function in the same project from
+    # the exact same source code at the same time. Let's try a `pandas`-like use
+    # case in which we want to apply a user defined scalar function to every
+    # value in a `Series`, more specifically bucketize the `body_mass_g` value
+    # of the penguins, which is a real number, into a category, which is a
+    # string.
+    @bpd.remote_function(
+        [float],
+        str,
+        bigquery_connection="bigframes-rf-conn",
+        reuse=False,
+    )
     def get_bucket(num):
         if not num:
             return "NA"
@@ -80,9 +88,11 @@ def get_bucket(num):
     # Let's continue trying other potential use cases of remote functions. Let's
     # say we consider the `species`, `island` and `sex` of the penguins
     # sensitive information and want to redact that by replacing with their hash
-    # code instead. Let's define another scalar custom function and decorated it
+    # code instead. Let's define another scalar custom function and decorate it
     # as a remote function
-    @bpd.remote_function([str], str, bigquery_connection="bigframes-rf-conn")
+    @bpd.remote_function(
+        [str], str, bigquery_connection="bigframes-rf-conn", reuse=False
+    )
     def get_hash(input):
         import hashlib
 
diff --git a/setup.py b/setup.py
index 139873e6fc..69b71c88f1 100644
--- a/setup.py
+++ b/setup.py
@@ -47,6 +47,7 @@
     "ibis-framework[bigquery] >=6.0.0,<=6.1.0",
     "pandas >=1.5.0",
     "pydata-google-auth >=1.8.2",
+    "requests >=2.27.1",
     "scikit-learn >=1.2.2",
     "sqlalchemy >=1.4,<3.0",
     "ipywidgets >=7.7.1",
@@ -58,7 +59,7 @@
         "pandas-gbq >=0.19.0",
     ],
     # Packages required for basic development flow.
-    "dev": ["pytest", "pre-commit", "nox", "google-cloud-testutils"],
+    "dev": ["pytest", "pytest-mock", "pre-commit", "nox", "google-cloud-testutils"],
 }
 extras["all"] = list(sorted(frozenset(itertools.chain.from_iterable(extras.values()))))
 
diff --git a/testing/constraints-3.9.txt b/testing/constraints-3.9.txt
index 523256ee83..cd69d45dc9 100644
--- a/testing/constraints-3.9.txt
+++ b/testing/constraints-3.9.txt
@@ -9,7 +9,7 @@ cachetools==5.3.0
 certifi==2022.12.7
 cffi==1.15.1
 cfgv==3.3.1
-charset-normalizer==3.1.0
+charset-normalizer==2.0.0
 click==8.1.3
 cloudpickle==2.0.0
 colorlog==6.7.0
@@ -90,13 +90,14 @@ pyperclip==1.8.2
 pytest==7.2.2
 pytest-asyncio==0.21.0
 pytest-cov==4.0.0
+pytest-mock==3.11.1
 pytest-retry==1.1.0
 pytest-xdist==3.2.1
 python-dateutil==2.8.2
 pytz==2023.3
 PyYAML==6.0
 readme-renderer==37.3
-requests==2.28.2
+requests==2.27.1
 requests-oauthlib==1.3.1
 requests-toolbelt==0.10.1
 rfc3986==2.0.0
diff --git a/tests/system/large/ml/test_ensemble.py b/tests/system/large/ml/test_ensemble.py
index 9b2872d673..a8613dfeb9 100644
--- a/tests/system/large/ml/test_ensemble.py
+++ b/tests/system/large/ml/test_ensemble.py
@@ -70,7 +70,7 @@ def test_xgbregressor_dart_booster_multiple_params(
 ):
     model = bigframes.ml.ensemble.XGBRegressor(
         booster="dart",
-        tree_method="AUTO",
+        tree_method="auto",
         min_tree_child_weight=2,
         colsample_bytree=0.95,
         colsample_bylevel=0.95,
@@ -121,7 +121,7 @@ def test_xgbregressor_dart_booster_multiple_params(
         in reloaded_model._bqml_model.model_name
     )
     assert reloaded_model.booster == "DART"
-    assert reloaded_model.dart_normalized_type == "TREE"
+    assert reloaded_model.dart_normalized_type == "tree"
     assert reloaded_model.tree_method == "AUTO"
     assert reloaded_model.colsample_bytree == 0.95
     assert reloaded_model.colsample_bylevel == 0.95
@@ -185,7 +185,7 @@ def test_xgbclassifier_dart_booster_multiple_params(
 ):
     model = bigframes.ml.ensemble.XGBClassifier(
         booster="dart",
-        tree_method="AUTO",
+        tree_method="auto",
         min_tree_child_weight=2,
         colsample_bytree=0.95,
         colsample_bylevel=0.95,
@@ -235,7 +235,7 @@ def test_xgbclassifier_dart_booster_multiple_params(
         in reloaded_model._bqml_model.model_name
     )
     assert reloaded_model.booster == "DART"
-    assert reloaded_model.dart_normalized_type == "TREE"
+    assert reloaded_model.dart_normalized_type == "tree"
     assert reloaded_model.tree_method == "AUTO"
     assert reloaded_model.colsample_bytree == 0.95
     assert reloaded_model.colsample_bylevel == 0.95
@@ -297,7 +297,7 @@ def test_randomforestregressor_default_params(penguins_df_default_index, dataset
 @pytest.mark.flaky(retries=2, delay=120)
 def test_randomforestregressor_multiple_params(penguins_df_default_index, dataset_id):
     model = bigframes.ml.ensemble.RandomForestRegressor(
-        tree_method="AUTO",
+        tree_method="auto",
         min_tree_child_weight=2,
         colsample_bytree=0.95,
         colsample_bylevel=0.95,
diff --git a/tests/system/large/test_remote_function.py b/tests/system/large/test_remote_function.py
index 8033f79c47..2f231f40c9 100644
--- a/tests/system/large/test_remote_function.py
+++ b/tests/system/large/test_remote_function.py
@@ -61,16 +61,32 @@ def get_remote_function_endpoints(bigquery_client, dataset_id):
     return endpoints
 
 
-def get_cloud_functions(functions_client, project, location, name_prefix="bigframes-"):
+def get_cloud_functions(
+    functions_client, project, location, name=None, name_prefix=None
+):
     """Get the cloud functions in the given project and location."""
+
+    assert (
+        not name or not name_prefix
+    ), f"At most one of the {name.__name__} or {name_prefix.__name__} can be passed."
+
     _, location = get_remote_function_locations(location)
     parent = f"projects/{project}/locations/{location}"
     request = functions_v2.ListFunctionsRequest(parent=parent)
     page_result = functions_client.list_functions(request=request)
-    full_name_prefix = parent + f"/functions/{name_prefix}"
     for response in page_result:
-        if not name_prefix or response.name.startswith(full_name_prefix):
-            yield response
+        # If name is provided and it does not match then skip
+        if bool(name):
+            full_name = parent + f"/functions/{name}"
+            if response.name != full_name:
+                continue
+        # If name prefix is provided and it does not match then skip
+        elif bool(name_prefix):
+            full_name_prefix = parent + f"/functions/{name_prefix}"
+            if not response.name.startswith(full_name_prefix):
+                continue
+
+        yield response
 
 
 def delete_cloud_function(functions_client, full_name):
@@ -84,8 +100,17 @@ def cleanup_remote_function_assets(
     bigquery_client, functions_client, remote_udf, ignore_failures=True
 ):
     """Clean up the GCP assets behind a bigframes remote function."""
+
+    # Clean up BQ remote function
     try:
         bigquery_client.delete_routine(remote_udf.bigframes_remote_function)
+    except Exception:
+        # By default don't raise exception in cleanup
+        if not ignore_failures:
+            raise
+
+    # Clean up cloud function
+    try:
         delete_cloud_function(functions_client, remote_udf.bigframes_cloud_function)
     except Exception:
         # By default don't raise exception in cleanup
@@ -94,7 +119,15 @@ def cleanup_remote_function_assets(
 
 
 def make_uniq_udf(udf):
-    """Transform a udf to another with same behavior but a unique name."""
+    """Transform a udf to another with same behavior but a unique name.
+    Use this to test remote functions with reuse=True, in which case parallel
+    instances of the same tests may evaluate same named cloud functions and BQ
+    remote functions, therefore interacting with each other and causing unwanted
+    failures. With this method one can transform a udf into another with the
+    same behavior but a different name which will remain unique for the
+    lifetime of one test instance.
+    """
+
     prefixer = test_utils.prefixer.Prefixer(udf.__name__, "")
     udf_uniq_name = prefixer.create_prefix()
     udf_file_name = f"{udf_uniq_name}.py"
@@ -111,7 +144,18 @@ def make_uniq_udf(udf):
         target_code = source_code.replace(source_key, target_key, 1)
         f.write(target_code)
     spec = importlib.util.spec_from_file_location(udf_file_name, udf_file_path)
-    return getattr(spec.loader.load_module(), udf_uniq_name), tmpdir
+    udf_uniq = getattr(spec.loader.load_module(), udf_uniq_name)
+
+    # This is a bit of a hack but we need to remove the reference to a foreign
+    # module, otherwise the serialization would keep the foreign module
+    # reference and deserialization would fail with error like following:
+    #     ModuleNotFoundError: No module named 'add_one_2nxcmd9j'
+    # TODO(shobs): Figure out if there is a better way of generating the unique
+    # function object, but for now let's just set it to same module as the
+    # original udf.
+    udf_uniq.__module__ = udf.__module__
+
+    return udf_uniq, tmpdir
 
 
 @pytest.fixture(scope="module")
@@ -136,7 +180,10 @@ def cleanup_cloud_functions(session, functions_client, dataset_id_permanent):
     )
     delete_count = 0
     for cloud_function in get_cloud_functions(
-        functions_client, session.bqclient.project, session.bqclient.location
+        functions_client,
+        session.bqclient.project,
+        session.bqclient.location,
+        name_prefix="bigframes-",
     ):
         # Ignore bigframes cloud functions referred by the remote functions in
         # the permanent dataset
@@ -524,15 +571,6 @@ def add_one(x):
         # Make a unique udf
         add_one_uniq, add_one_uniq_dir = make_uniq_udf(add_one)
 
-        # This is a bit of a hack but we need to remove the reference to a foreign
-        # module, otherwise the serialization would keep the foreign module
-        # reference and deserialization would fail with error like following:
-        #     ModuleNotFoundError: No module named 'add_one_2nxcmd9j'
-        # TODO(shobs): Figure out if there is a better way of generating the unique
-        # function object, but for now let's just set it to same module as the
-        # original udf.
-        add_one_uniq.__module__ = add_one.__module__
-
         # Expected cloud function name for the unique udf
         add_one_uniq_cf_name = get_cloud_function_name(add_one_uniq)
 
@@ -542,7 +580,7 @@ def add_one(x):
                 functions_client,
                 session.bqclient.project,
                 session.bqclient.location,
-                name_prefix=add_one_uniq_cf_name,
+                name=add_one_uniq_cf_name,
             )
         )
         assert len(cloud_functions) == 0
@@ -563,7 +601,7 @@ def add_one(x):
                 functions_client,
                 session.bqclient.project,
                 session.bqclient.location,
-                name_prefix=add_one_uniq_cf_name,
+                name=add_one_uniq_cf_name,
             )
         )
         assert len(cloud_functions) == 1
@@ -611,7 +649,7 @@ def inner_test():
                 functions_client,
                 session.bqclient.project,
                 session.bqclient.location,
-                name_prefix=add_one_uniq_cf_name,
+                name=add_one_uniq_cf_name,
             )
         )
         assert len(cloud_functions) == 0
@@ -633,7 +671,7 @@ def inner_test():
                 functions_client,
                 session.bqclient.project,
                 session.bqclient.location,
-                name_prefix=add_one_uniq_cf_name,
+                name=add_one_uniq_cf_name,
             )
         )
         assert len(cloud_functions) == 1
@@ -776,3 +814,221 @@ def test_remote_udf_lambda(
         cleanup_remote_function_assets(
             session.bqclient, functions_client, add_one_lambda_remote
         )
+
+
+@pytest.mark.flaky(retries=2, delay=120)
+def test_remote_function_with_explicit_name(
+    session, scalars_dfs, dataset_id, bq_cf_connection, functions_client
+):
+    try:
+
+        def square(x):
+            return x * x
+
+        prefixer = test_utils.prefixer.Prefixer(square.__name__, "")
+        rf_name = prefixer.create_prefix()
+        expected_remote_function = f"{dataset_id}.{rf_name}"
+
+        # Initially the expected BQ remote function should not exist
+        with pytest.raises(NotFound):
+            session.bqclient.get_routine(expected_remote_function)
+
+        # Create the remote function with the name provided explicitly
+        square_remote = session.remote_function(
+            [int],
+            int,
+            dataset_id,
+            bq_cf_connection,
+            reuse=False,
+            name=rf_name,
+        )(square)
+
+        # The remote function should reflect the explicitly provided name
+        assert square_remote.bigframes_remote_function == expected_remote_function
+
+        # Now the expected BQ remote function should exist
+        session.bqclient.get_routine(expected_remote_function)
+
+        # The behavior of the created remote function should be as expected
+        scalars_df, scalars_pandas_df = scalars_dfs
+
+        bf_int64_col = scalars_df["int64_too"]
+        bf_result_col = bf_int64_col.apply(square_remote)
+        bf_result = bf_int64_col.to_frame().assign(result=bf_result_col).to_pandas()
+
+        pd_int64_col = scalars_pandas_df["int64_too"]
+        pd_result_col = pd_int64_col.apply(square)
+        # TODO(shobs): Figure why pandas .apply() changes the dtype, i.e.
+        # pd_int64_col.dtype is Int64Dtype()
+        # pd_int64_col.apply(square).dtype is int64.
+        # For this test let's force the pandas dtype to be same as bigframes' dtype.
+        pd_result_col = pd_result_col.astype(pandas.Int64Dtype())
+        pd_result = pd_int64_col.to_frame().assign(result=pd_result_col)
+
+        assert_pandas_df_equal_ignore_ordering(bf_result, pd_result)
+    finally:
+        # clean up the gcp assets created for the remote function
+        cleanup_remote_function_assets(
+            session.bqclient, functions_client, square_remote
+        )
+
+
+@pytest.mark.flaky(retries=2, delay=120)
+def test_remote_function_with_explicit_name_reuse(
+    session, scalars_dfs, dataset_id, bq_cf_connection, functions_client
+):
+    try:
+
+        dirs_to_cleanup = []
+
+        # Define a user code
+        def square(x):
+            return x * x
+
+        # Make it a unique udf
+        square_uniq, square_uniq_dir = make_uniq_udf(square)
+        dirs_to_cleanup.append(square_uniq_dir)
+
+        # Define a common routine which accepts a remote function and the
+        # corresponding user defined function and tests that bigframes bahavior
+        # on the former is in parity with the pandas behaviour on the latter
+        def test_internal(rf, udf):
+            # The behavior of the created remote function should be as expected
+            scalars_df, scalars_pandas_df = scalars_dfs
+
+            bf_int64_col = scalars_df["int64_too"]
+            bf_result_col = bf_int64_col.apply(rf)
+            bf_result = bf_int64_col.to_frame().assign(result=bf_result_col).to_pandas()
+
+            pd_int64_col = scalars_pandas_df["int64_too"]
+            pd_result_col = pd_int64_col.apply(udf)
+            # TODO(shobs): Figure why pandas .apply() changes the dtype, i.e.
+            # pd_int64_col.dtype is Int64Dtype()
+            # pd_int64_col.apply(square).dtype is int64.
+            # For this test let's force the pandas dtype to be same as bigframes' dtype.
+            pd_result_col = pd_result_col.astype(pandas.Int64Dtype())
+            pd_result = pd_int64_col.to_frame().assign(result=pd_result_col)
+
+            assert_pandas_df_equal_ignore_ordering(bf_result, pd_result)
+
+        # Create an explicit name for the remote function
+        prefixer = test_utils.prefixer.Prefixer("foo", "")
+        rf_name = prefixer.create_prefix()
+        expected_remote_function = f"{dataset_id}.{rf_name}"
+
+        # Initially the expected BQ remote function should not exist
+        with pytest.raises(NotFound):
+            session.bqclient.get_routine(expected_remote_function)
+
+        # Create a new remote function with the name provided explicitly
+        square_remote1 = session.remote_function(
+            [int],
+            int,
+            dataset_id,
+            bq_cf_connection,
+            name=rf_name,
+        )(square_uniq)
+
+        # The remote function should reflect the explicitly provided name
+        assert square_remote1.bigframes_remote_function == expected_remote_function
+
+        # Now the expected BQ remote function should exist
+        routine = session.bqclient.get_routine(expected_remote_function)
+        square_remote1_created = routine.created
+        square_remote1_cf_updated = session.cloudfunctionsclient.get_function(
+            name=square_remote1.bigframes_cloud_function
+        ).update_time
+
+        # Test pandas parity with square udf
+        test_internal(square_remote1, square)
+
+        # Now Create another remote function with the same name provided
+        # explicitly. Since reuse is True by default, the previously created
+        # remote function with the same name will be reused.
+        square_remote2 = session.remote_function(
+            [int],
+            int,
+            dataset_id,
+            bq_cf_connection,
+            name=rf_name,
+        )(square_uniq)
+
+        # The new remote function should still reflect the explicitly provided name
+        assert square_remote2.bigframes_remote_function == expected_remote_function
+
+        # The expected BQ remote function should still exist
+        routine = session.bqclient.get_routine(expected_remote_function)
+        square_remote2_created = routine.created
+        square_remote2_cf_updated = session.cloudfunctionsclient.get_function(
+            name=square_remote2.bigframes_cloud_function
+        ).update_time
+
+        # The new remote function should reflect that the previous BQ remote
+        # function and the cloud function were reused instead of creating anew
+        assert square_remote2_created == square_remote1_created
+        assert (
+            square_remote2.bigframes_cloud_function
+            == square_remote1.bigframes_cloud_function
+        )
+        assert square_remote2_cf_updated == square_remote1_cf_updated
+
+        # Test again that the new remote function is actually same as the
+        # previous remote function
+        test_internal(square_remote2, square)
+
+        # Now define a different user code
+        def plusone(x):
+            return x + 1
+
+        # Make it a unique udf
+        plusone_uniq, plusone_uniq_dir = make_uniq_udf(plusone)
+        dirs_to_cleanup.append(plusone_uniq_dir)
+
+        # Now Create a third remote function with the same name provided
+        # explicitly. Even though reuse is True by default, the previously
+        # created remote function with the same name should not be reused since
+        # this time it is a different user code.
+        plusone_remote = session.remote_function(
+            [int],
+            int,
+            dataset_id,
+            bq_cf_connection,
+            name=rf_name,
+        )(plusone_uniq)
+
+        # The new remote function should still reflect the explicitly provided name
+        assert plusone_remote.bigframes_remote_function == expected_remote_function
+
+        # The expected BQ remote function should still exist
+        routine = session.bqclient.get_routine(expected_remote_function)
+        plusone_remote_created = routine.created
+        plusone_remote_cf_updated = session.cloudfunctionsclient.get_function(
+            name=plusone_remote.bigframes_cloud_function
+        ).update_time
+
+        # The new remote function should reflect that the previous BQ remote
+        # function and the cloud function were NOT reused, instead were created
+        # anew
+        assert plusone_remote_created > square_remote2_created
+        assert (
+            plusone_remote.bigframes_cloud_function
+            != square_remote2.bigframes_cloud_function
+        )
+        assert plusone_remote_cf_updated > square_remote2_cf_updated
+
+        # Test again that the new remote function is equivalent to the new user
+        # defined function
+        test_internal(plusone_remote, plusone)
+    finally:
+        # clean up the gcp assets created for the remote function
+        cleanup_remote_function_assets(
+            session.bqclient, functions_client, square_remote1
+        )
+        cleanup_remote_function_assets(
+            session.bqclient, functions_client, square_remote2
+        )
+        cleanup_remote_function_assets(
+            session.bqclient, functions_client, plusone_remote
+        )
+        for dir_ in dirs_to_cleanup:
+            shutil.rmtree(dir_)
diff --git a/tests/system/small/ml/test_core.py b/tests/system/small/ml/test_core.py
index 6c3e8e06f5..ace943956f 100644
--- a/tests/system/small/ml/test_core.py
+++ b/tests/system/small/ml/test_core.py
@@ -18,6 +18,7 @@
 
 import pandas as pd
 import pyarrow as pa
+import pytest
 import pytz
 
 import bigframes
@@ -278,6 +279,7 @@ def test_model_predict_with_unnamed_index(
     )
 
 
+@pytest.mark.flaky(retries=2, delay=120)
 def test_model_generate_text(
     bqml_palm2_text_generator_model: core.BqmlModel, llm_text_df
 ):
diff --git a/tests/system/small/ml/test_decomposition.py b/tests/system/small/ml/test_decomposition.py
index 8df4145fcf..c71bbbe3b0 100644
--- a/tests/system/small/ml/test_decomposition.py
+++ b/tests/system/small/ml/test_decomposition.py
@@ -16,33 +16,14 @@
 
 from bigframes.ml import decomposition
 
-_PD_NEW_PENGUINS = pd.DataFrame(
-    {
-        "tag_number": [1633, 1672, 1690],
-        "species": [
-            "Adelie Penguin (Pygoscelis adeliae)",
-            "Gentoo penguin (Pygoscelis papua)",
-            "Adelie Penguin (Pygoscelis adeliae)",
-        ],
-        "island": ["Dream", "Biscoe", "Torgersen"],
-        "culmen_length_mm": [37.8, 46.5, 41.1],
-        "culmen_depth_mm": [18.1, 14.8, 18.6],
-        "flipper_length_mm": [193.0, 217.0, 189.0],
-        "body_mass_g": [3750.0, 5200.0, 3325.0],
-        "sex": ["MALE", "FEMALE", "MALE"],
-    }
-).set_index("tag_number")
 
-
-def test_pca_predict(session, penguins_pca_model: decomposition.PCA):
-    new_penguins = session.read_pandas(_PD_NEW_PENGUINS)
-
-    predictions = penguins_pca_model.predict(new_penguins).to_pandas()
+def test_pca_predict(penguins_pca_model, new_penguins_df):
+    predictions = penguins_pca_model.predict(new_penguins_df).to_pandas()
     expected = pd.DataFrame(
         {
-            "principal_component_1": [-1.459, 2.258, -1.685],
-            "principal_component_2": [-1.120, -1.351, -0.874],
-            "principal_component_3": [-0.646, 0.443, -0.704],
+            "principal_component_1": [-1.314041, -0.855813, -1.848786],
+            "principal_component_2": [-0.889106, -1.259753, -0.983304],
+            "principal_component_3": [-0.704345, 0.322555, -0.095759],
         },
         dtype="Float64",
         index=pd.Index([1633, 1672, 1690], name="tag_number", dtype="Int64"),
diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py
index 85c3cce1d7..a85777c59d 100644
--- a/tests/system/small/test_dataframe.py
+++ b/tests/system/small/test_dataframe.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import operator
+import tempfile
 import typing
 from typing import Tuple
 
@@ -137,6 +138,46 @@ def test_tail_with_custom_column_labels(scalars_df_index, scalars_pandas_df_inde
     pandas.testing.assert_frame_equal(bf_result, pd_result)
 
 
+@pytest.mark.parametrize(
+    ("keep",),
+    [
+        ("first",),
+        ("last",),
+        ("all",),
+    ],
+)
+def test_df_nlargest(scalars_df_index, scalars_pandas_df_index, keep):
+    bf_result = scalars_df_index.nlargest(
+        3, ["bool_col", "int64_too"], keep=keep
+    ).to_pandas()
+    pd_result = scalars_pandas_df_index.nlargest(
+        3, ["bool_col", "int64_too"], keep=keep
+    )
+
+    pd.testing.assert_frame_equal(
+        bf_result,
+        pd_result,
+    )
+
+
+@pytest.mark.parametrize(
+    ("keep",),
+    [
+        ("first",),
+        ("last",),
+        ("all",),
+    ],
+)
+def test_df_nsmallest(scalars_df_index, scalars_pandas_df_index, keep):
+    bf_result = scalars_df_index.nsmallest(6, ["bool_col"], keep=keep).to_pandas()
+    pd_result = scalars_pandas_df_index.nsmallest(6, ["bool_col"], keep=keep)
+
+    pd.testing.assert_frame_equal(
+        bf_result,
+        pd_result,
+    )
+
+
 def test_get_column_by_attr(scalars_dfs):
     scalars_df, scalars_pandas_df = scalars_dfs
     series = scalars_df.int64_col
@@ -582,6 +623,22 @@ def test_df_fillna(scalars_dfs):
     pandas.testing.assert_frame_equal(bf_result, pd_result)
 
 
+def test_df_ffill(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    bf_result = scalars_df[["int64_col", "float64_col"]].ffill(limit=1).to_pandas()
+    pd_result = scalars_pandas_df[["int64_col", "float64_col"]].ffill(limit=1)
+
+    pandas.testing.assert_frame_equal(bf_result, pd_result)
+
+
+def test_df_bfill(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    bf_result = scalars_df[["int64_col", "float64_col"]].bfill().to_pandas()
+    pd_result = scalars_pandas_df[["int64_col", "float64_col"]].bfill()
+
+    pandas.testing.assert_frame_equal(bf_result, pd_result)
+
+
 def test_df_isin_list(scalars_dfs):
     scalars_df, scalars_pandas_df = scalars_dfs
     values = ["Hello, World!", 55555, 2.51, pd.NA, True]
@@ -1027,6 +1084,88 @@ def test_df_notnull(scalars_dfs):
     assert_pandas_df_equal_ignore_ordering(bf_result, pd_result)
 
 
+@pytest.mark.parametrize(
+    ("left_labels", "right_labels", "overwrite", "fill_value"),
+    [
+        (["a", "b", "c"], ["c", "a", "b"], True, None),
+        (["a", "b", "c"], ["c", "a", "b"], False, None),
+        (["a", "b", "c"], ["a", "b", "c"], False, 2),
+    ],
+    ids=[
+        "one_one_match_overwrite",
+        "one_one_match_no_overwrite",
+        "exact_match",
+    ],
+)
+def test_combine(
+    scalars_df_index,
+    scalars_df_2_index,
+    scalars_pandas_df_index,
+    left_labels,
+    right_labels,
+    overwrite,
+    fill_value,
+):
+    if pd.__version__.startswith("1."):
+        pytest.skip("pd.NA vs NaN not handled well in pandas 1.x.")
+    columns = ["int64_too", "int64_col", "float64_col"]
+
+    bf_df_a = scalars_df_index[columns]
+    bf_df_a.columns = left_labels
+    bf_df_b = scalars_df_2_index[columns]
+    bf_df_b.columns = right_labels
+    bf_result = bf_df_a.combine(
+        bf_df_b,
+        lambda x, y: x**2 + 2 * x * y + y**2,
+        overwrite=overwrite,
+        fill_value=fill_value,
+    ).to_pandas()
+
+    pd_df_a = scalars_pandas_df_index[columns]
+    pd_df_a.columns = left_labels
+    pd_df_b = scalars_pandas_df_index[columns]
+    pd_df_b.columns = right_labels
+    pd_result = pd_df_a.combine(
+        pd_df_b,
+        lambda x, y: x**2 + 2 * x * y + y**2,
+        overwrite=overwrite,
+        fill_value=fill_value,
+    )
+
+    # Some dtype inconsistency for all-NULL columns
+    pd.testing.assert_frame_equal(bf_result, pd_result, check_dtype=False)
+
+
+def test_combine_first(
+    scalars_df_index,
+    scalars_df_2_index,
+    scalars_pandas_df_index,
+):
+    if pd.__version__.startswith("1."):
+        pytest.skip("pd.NA vs NaN not handled well in pandas 1.x.")
+    columns = ["int64_too", "int64_col", "float64_col"]
+
+    bf_df_a = scalars_df_index[columns].iloc[0:6]
+    bf_df_a.columns = ["a", "b", "c"]
+    bf_df_b = scalars_df_2_index[columns].iloc[2:8]
+    bf_df_b.columns = ["b", "a", "d"]
+    bf_result = bf_df_a.combine_first(bf_df_b).to_pandas()
+
+    pd_df_a = scalars_pandas_df_index[columns].iloc[0:6]
+    pd_df_a.columns = ["a", "b", "c"]
+    pd_df_b = scalars_pandas_df_index[columns].iloc[2:8]
+    pd_df_b.columns = ["b", "a", "d"]
+    pd_result = pd_df_a.combine_first(pd_df_b)
+
+    print("pandas")
+    print(pd_result.to_string())
+    print("bigframes")
+    print(bf_result.to_string())
+
+    # Some dtype inconsistency for all-NULL columns
+    pd.testing.assert_frame_equal(bf_result, pd_result, check_dtype=False)
+
+
 @pytest.mark.parametrize(
     ("op"),
     [
@@ -1145,11 +1284,13 @@ def test_series_binop_axis_index(
         (["a", "a", "b"], ["c", "c", "d"]),
         (["a", "b", "c"], ["c", "a", "b"]),
         (["a", "c", "c"], ["c", "a", "c"]),
+        (["a", "b", "c"], ["a", "b", "c"]),
     ],
     ids=[
         "no_overlap",
         "one_one_match",
         "multi_match",
+        "exact_match",
     ],
 )
 def test_binop_df_df_binary_op(
@@ -1361,6 +1502,42 @@ def test_dataframe_general_analytic_op(
     )
 
 
+@pytest.mark.parametrize(
+    ("periods",),
+    [
+        (1,),
+        (2,),
+        (-1,),
+    ],
+)
+def test_dataframe_diff(scalars_df_index, scalars_pandas_df_index, periods):
+    col_names = ["int64_too", "float64_col", "int64_col"]
+    bf_result = scalars_df_index[col_names].diff(periods=periods).to_pandas()
+    pd_result = scalars_pandas_df_index[col_names].diff(periods=periods)
+    pd.testing.assert_frame_equal(
+        pd_result,
+        bf_result,
+    )
+
+
+@pytest.mark.parametrize(
+    ("periods",),
+    [
+        (1,),
+        (2,),
+        (-1,),
+    ],
+)
+def test_dataframe_pct_change(scalars_df_index, scalars_pandas_df_index, periods):
+    col_names = ["int64_too", "float64_col", "int64_col"]
+    bf_result = scalars_df_index[col_names].pct_change(periods=periods).to_pandas()
+    pd_result = scalars_pandas_df_index[col_names].pct_change(periods=periods)
+    pd.testing.assert_frame_equal(
+        pd_result,
+        bf_result,
+    )
+
+
 def test_dataframe_agg_single_string(scalars_dfs):
     numeric_cols = ["int64_col", "int64_too", "float64_col"]
     scalars_df, scalars_pandas_df = scalars_dfs
@@ -1675,6 +1852,52 @@ def test_loc_single_index_no_duplicate(scalars_df_index, scalars_pandas_df_index
     )
 
 
+def test_loc_setitem_bool_series_scalar_new_col(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    bf_df = scalars_df.copy()
+    pd_df = scalars_pandas_df.copy()
+    bf_df.loc[bf_df["int64_too"] == 0, "new_col"] = 99
+    pd_df.loc[pd_df["int64_too"] == 0, "new_col"] = 99
+
+    # pandas type difference
+    pd_df["new_col"] = pd_df["new_col"].astype("Float64")
+
+    pd.testing.assert_frame_equal(
+        bf_df.to_pandas(),
+        pd_df,
+    )
+
+
+def test_loc_setitem_bool_series_scalar_existing_col(scalars_dfs):
+    if pd.__version__.startswith("1."):
+        pytest.skip("this loc overload not supported in pandas 1.x.")
+
+    scalars_df, scalars_pandas_df = scalars_dfs
+    bf_df = scalars_df.copy()
+    pd_df = scalars_pandas_df.copy()
+    bf_df.loc[bf_df["int64_too"] == 1, "string_col"] = "hello"
+    pd_df.loc[pd_df["int64_too"] == 1, "string_col"] = "hello"
+
+    pd.testing.assert_frame_equal(
+        bf_df.to_pandas(),
+        pd_df,
+    )
+
+
+def test_loc_setitem_bool_series_scalar_type_error(scalars_dfs):
+    if pd.__version__.startswith("1."):
+        pytest.skip("this loc overload not supported in pandas 1.x.")
+
+    scalars_df, scalars_pandas_df = scalars_dfs
+    bf_df = scalars_df.copy()
+    pd_df = scalars_pandas_df.copy()
+
+    with pytest.raises(TypeError):
+        bf_df.loc[bf_df["int64_too"] == 1, "string_col"] = 99
+    with pytest.raises(TypeError):
+        pd_df.loc[pd_df["int64_too"] == 1, "string_col"] = 99
+
+
 @pytest.mark.parametrize(
     ("op"),
     [
@@ -1749,6 +1972,30 @@ def test_dataframe_prod(scalars_df_index, scalars_pandas_df_index):
     pd.testing.assert_series_equal(pd_series, bf_result, check_index_type=False)
 
 
+def test_df_skew_too_few_values(scalars_dfs):
+    columns = ["float64_col", "int64_col"]
+    scalars_df, scalars_pandas_df = scalars_dfs
+    bf_result = scalars_df[columns].head(2).skew().to_pandas()
+    pd_result = scalars_pandas_df[columns].head(2).skew()
+
+    # Pandas may produce narrower numeric types, but bigframes always produces Float64
+    pd_result = pd_result.astype("Float64")
+
+    pd.testing.assert_series_equal(pd_result, bf_result, check_index_type=False)
+
+
+def test_df_skew(scalars_dfs):
+    columns = ["float64_col", "int64_col"]
+    scalars_df, scalars_pandas_df = scalars_dfs
+    bf_result = scalars_df[columns].skew().to_pandas()
+    pd_result = scalars_pandas_df[columns].skew()
+
+    # Pandas may produce narrower numeric types, but bigframes always produces Float64
+    pd_result = pd_result.astype("Float64")
+
+    pd.testing.assert_series_equal(pd_result, bf_result, check_index_type=False)
+
+
 @pytest.mark.parametrize(
     ("frac", "n", "random_state"),
     [
@@ -1828,6 +2075,161 @@ def test_df_add_suffix(scalars_df_index, scalars_pandas_df_index, axis):
     )
 
 
+def test_df_columns_filter_items(scalars_df_index, scalars_pandas_df_index):
+    if pd.__version__.startswith("2.0") or pd.__version__.startswith("1."):
+        pytest.skip("pandas filter items behavior different pre-2.1")
+    bf_result = scalars_df_index.filter(items=["string_col", "int64_col"]).to_pandas()
+
+    pd_result = scalars_pandas_df_index.filter(items=["string_col", "int64_col"])
+
+    pd.testing.assert_frame_equal(
+        bf_result,
+        pd_result,
+    )
+
+
+def test_df_columns_filter_like(scalars_df_index, scalars_pandas_df_index):
+    bf_result = scalars_df_index.filter(like="64_col").to_pandas()
+
+    pd_result = scalars_pandas_df_index.filter(like="64_col")
+
+    pd.testing.assert_frame_equal(
+        bf_result,
+        pd_result,
+    )
+
+
+def test_df_columns_filter_regex(scalars_df_index, scalars_pandas_df_index):
+    bf_result = scalars_df_index.filter(regex="^[^_]+$").to_pandas()
+
+    pd_result = scalars_pandas_df_index.filter(regex="^[^_]+$")
+
+    pd.testing.assert_frame_equal(
+        bf_result,
+        pd_result,
+    )
+
+
+def test_df_rows_filter_items(scalars_df_index, scalars_pandas_df_index):
+    if pd.__version__.startswith("2.0") or pd.__version__.startswith("1."):
+        pytest.skip("pandas filter items behavior different pre-2.1")
+    bf_result = scalars_df_index.filter(items=[5, 1, 3], axis=0).to_pandas()
+
+    pd_result = scalars_pandas_df_index.filter(items=[5, 1, 3], axis=0)
+
+    # Pandas uses int64 instead of Int64 (nullable) dtype.
+    pd_result.index = pd_result.index.astype(pd.Int64Dtype())
+    pd.testing.assert_frame_equal(
+        bf_result,
+        pd_result,
+    )
+
+
+def test_df_rows_filter_like(scalars_df_index, scalars_pandas_df_index):
+    scalars_df_index = scalars_df_index.copy().set_index("string_col")
+    scalars_pandas_df_index = scalars_pandas_df_index.copy().set_index("string_col")
+
+    bf_result = scalars_df_index.filter(like="ello", axis=0).to_pandas()
+
+    pd_result = scalars_pandas_df_index.filter(like="ello", axis=0)
+
+    pd.testing.assert_frame_equal(
+        bf_result,
+        pd_result,
+    )
+
+
+def test_df_rows_filter_regex(scalars_df_index, scalars_pandas_df_index):
+    scalars_df_index = scalars_df_index.copy().set_index("string_col")
+    scalars_pandas_df_index = scalars_pandas_df_index.copy().set_index("string_col")
+
+    bf_result = scalars_df_index.filter(regex="^[GH].*", axis=0).to_pandas()
+
+    pd_result = scalars_pandas_df_index.filter(regex="^[GH].*", axis=0)
+
+    pd.testing.assert_frame_equal(
+        bf_result,
+        pd_result,
+    )
+
+
+def test_df_reindex_rows_list(scalars_df_index, scalars_pandas_df_index):
+    bf_result = scalars_df_index.reindex(index=[5, 1, 3, 99, 1]).to_pandas()
+
+    pd_result = scalars_pandas_df_index.reindex(index=[5, 1, 3, 99, 1])
+
+    # Pandas uses int64 instead of Int64 (nullable) dtype.
+    pd_result.index = pd_result.index.astype(pd.Int64Dtype())
+    pd.testing.assert_frame_equal(
+        bf_result,
+        pd_result,
+    )
+
+
+def test_df_reindex_rows_index(scalars_df_index, scalars_pandas_df_index):
+    bf_result = scalars_df_index.reindex(
+        index=pd.Index([5, 1, 3, 99, 1], name="newname")
+    ).to_pandas()
+
+    pd_result = scalars_pandas_df_index.reindex(
+        index=pd.Index([5, 1, 3, 99, 1], name="newname")
+    )
+
+    # Pandas uses int64 instead of Int64 (nullable) dtype.
+    pd_result.index = pd_result.index.astype(pd.Int64Dtype())
+    pd.testing.assert_frame_equal(
+        bf_result,
+        pd_result,
+    )
+
+
+def test_df_reindex_nonunique(scalars_df_index):
+    with pytest.raises(ValueError):
+        # int64_too is non-unique
+        scalars_df_index.set_index("int64_too").reindex(
+            index=[5, 1, 3, 99, 1], validate=True
+        )
+
+
+def test_df_reindex_columns(scalars_df_index, scalars_pandas_df_index):
+    bf_result = scalars_df_index.reindex(
+        columns=["not_a_col", "int64_col", "int64_too"]
+    ).to_pandas()
+
+    pd_result = scalars_pandas_df_index.reindex(
+        columns=["not_a_col", "int64_col", "int64_too"]
+    )
+
+    # Pandas uses float64 as default for newly created empty column, bf uses Float64
+    pd_result.not_a_col = pd_result.not_a_col.astype(pandas.Float64Dtype())
+    pd.testing.assert_frame_equal(
+        bf_result,
+        pd_result,
+    )
+
+
+def test_df_reindex_like(scalars_df_index, scalars_pandas_df_index):
+    reindex_target_bf = scalars_df_index.reindex(
+        columns=["not_a_col", "int64_col", "int64_too"], index=[5, 1, 3, 99, 1]
+    )
+    bf_result = scalars_df_index.reindex_like(reindex_target_bf).to_pandas()
+
+    reindex_target_pd = scalars_pandas_df_index.reindex(
+        columns=["not_a_col", "int64_col", "int64_too"], index=[5, 1, 3, 99, 1]
+    )
+    pd_result = scalars_pandas_df_index.reindex_like(reindex_target_pd)
+
+    # Pandas uses float64 as default for newly created empty column, bf uses Float64
+    # Pandas uses int64 instead of Int64 (nullable) dtype.
+    pd_result.index = pd_result.index.astype(pd.Int64Dtype())
+    # Pandas uses float64 as default for newly created empty column, bf uses Float64
+    pd_result.not_a_col = pd_result.not_a_col.astype(pandas.Float64Dtype())
+    pd.testing.assert_frame_equal(
+        bf_result,
+        pd_result,
+    )
+
+
 def test_df_values(scalars_df_index, scalars_pandas_df_index):
     bf_result = scalars_df_index.values
 
@@ -2035,6 +2437,93 @@ def test_df_duplicated(scalars_df_index, scalars_pandas_df_index, keep, subset):
     pd.testing.assert_series_equal(pd_series, bf_series, check_dtype=False)
 
 
+def test_df_to_dict(scalars_df_index, scalars_pandas_df_index):
+    unsupported = ["numeric_col"]  # formatted differently
+    bf_result = scalars_df_index.drop(columns=unsupported).to_dict()
+    pd_result = scalars_pandas_df_index.drop(columns=unsupported).to_dict()
+
+    assert bf_result == pd_result
+
+
+def test_df_to_excel(scalars_df_index, scalars_pandas_df_index):
+    unsupported = ["timestamp_col"]
+    with tempfile.TemporaryFile() as bf_result_file, tempfile.TemporaryFile() as pd_result_file:
+        scalars_df_index.drop(columns=unsupported).to_excel(bf_result_file)
+        scalars_pandas_df_index.drop(columns=unsupported).to_excel(pd_result_file)
+        bf_result = bf_result_file.read()
+        pd_result = bf_result_file.read()
+
+    assert bf_result == pd_result
+
+
+def test_df_to_latex(scalars_df_index, scalars_pandas_df_index):
+    unsupported = ["numeric_col"]  # formatted differently
+    bf_result = scalars_df_index.drop(columns=unsupported).to_latex()
+    pd_result = scalars_pandas_df_index.drop(columns=unsupported).to_latex()
+
+    assert bf_result == pd_result
+
+
+def test_df_to_records(scalars_df_index, scalars_pandas_df_index):
+    unsupported = ["numeric_col"]
+    bf_result = scalars_df_index.drop(columns=unsupported).to_records()
+    pd_result = scalars_pandas_df_index.drop(columns=unsupported).to_records()
+
+    for bfi, pdi in zip(bf_result, pd_result):
+        for bfj, pdj in zip(bfi, pdi):
+            assert pd.isna(bfj) and pd.isna(pdj) or bfj == pdj
+
+
+def test_df_to_string(scalars_df_index, scalars_pandas_df_index):
+    unsupported = ["numeric_col"]  # formatted differently
+
+    bf_result = scalars_df_index.drop(columns=unsupported).to_string()
+    pd_result = scalars_pandas_df_index.drop(columns=unsupported).to_string()
+
+    assert bf_result == pd_result
+
+
+def test_df_to_markdown(scalars_df_index, scalars_pandas_df_index):
+    # Nulls have bug from tabulate https://github.com/astanin/python-tabulate/issues/231
+    bf_result = scalars_df_index.dropna().to_markdown()
+    pd_result = scalars_pandas_df_index.dropna().to_markdown()
+
+    assert bf_result == pd_result
+
+
+def test_df_to_pickle(scalars_df_index, scalars_pandas_df_index):
+    with tempfile.TemporaryFile() as bf_result_file, tempfile.TemporaryFile() as pd_result_file:
+        scalars_df_index.to_pickle(bf_result_file)
+        scalars_pandas_df_index.to_pickle(pd_result_file)
+        bf_result = bf_result_file.read()
+        pd_result = bf_result_file.read()
+
+    assert bf_result == pd_result
+
+
+def test_df_to_orc(scalars_df_index, scalars_pandas_df_index):
+    unsupported = [
+        "numeric_col",
+        "bytes_col",
+        "date_col",
+        "datetime_col",
+        "time_col",
+        "timestamp_col",
+        "geography_col",
+    ]
+
+    bf_result_file = tempfile.TemporaryFile()
+    pd_result_file = tempfile.TemporaryFile()
+    scalars_df_index.drop(columns=unsupported).to_orc(bf_result_file)
+    scalars_pandas_df_index.drop(columns=unsupported).reset_index().to_orc(
+        pd_result_file
+    )
+    bf_result = bf_result_file.read()
+    pd_result = bf_result_file.read()
+
+    assert bf_result == pd_result
+
+
 @pytest.mark.parametrize(
     ("subset", "normalize", "ascending", "dropna"),
     [
diff --git a/tests/system/small/test_groupby.py b/tests/system/small/test_groupby.py
index 987368ce77..18741468c5 100644
--- a/tests/system/small/test_groupby.py
+++ b/tests/system/small/test_groupby.py
@@ -210,12 +210,14 @@ def test_dataframe_groupby_multi_sum(
         (lambda x: x.cummax(numeric_only=True)),
         (lambda x: x.cummin(numeric_only=True)),
         (lambda x: x.cumprod()),
+        (lambda x: x.shift(periods=2)),
     ],
     ids=[
         "cumsum",
         "cummax",
         "cummin",
         "cumprod",
+        "shift",
     ],
 )
 def test_dataframe_groupby_analytic(
@@ -229,6 +231,30 @@ def test_dataframe_groupby_analytic(
     pd.testing.assert_frame_equal(pd_result, bf_result_computed, check_dtype=False)
 
 
+def test_series_groupby_skew(scalars_df_index, scalars_pandas_df_index):
+    bf_result = scalars_df_index.groupby("bool_col")["int64_too"].skew().to_pandas()
+    pd_result = scalars_pandas_df_index.groupby("bool_col")["int64_too"].skew()
+
+    pd.testing.assert_series_equal(pd_result, bf_result, check_dtype=False)
+
+
+def test_dataframe_groupby_skew(scalars_df_index, scalars_pandas_df_index):
+    col_names = ["float64_col", "int64_col", "bool_col"]
+    bf_result = scalars_df_index[col_names].groupby("bool_col").skew().to_pandas()
+    pd_result = scalars_pandas_df_index[col_names].groupby("bool_col").skew()
+
+    pd.testing.assert_frame_equal(pd_result, bf_result, check_dtype=False)
+
+
+def test_dataframe_groupby_diff(scalars_df_index, scalars_pandas_df_index):
+    col_names = ["float64_col", "int64_col", "string_col"]
+    bf_result = scalars_df_index[col_names].groupby("string_col").diff(-1)
+    pd_result = scalars_pandas_df_index[col_names].groupby("string_col").diff(-1)
+    bf_result_computed = bf_result.to_pandas()
+
+    pd.testing.assert_frame_equal(pd_result, bf_result_computed, check_dtype=False)
+
+
 def test_dataframe_groupby_getitem(
     scalars_df_index,
     scalars_pandas_df_index,
diff --git a/tests/system/small/test_ipython.py b/tests/system/small/test_ipython.py
index 6725805d9a..be98ce0067 100644
--- a/tests/system/small/test_ipython.py
+++ b/tests/system/small/test_ipython.py
@@ -22,7 +22,8 @@ def test_repr_cache(scalars_df_index):
     # Make sure the df has a new block that the method return value
     # is not already cached.
     test_df = scalars_df_index.head()
+    test_df._block.retrieve_repr_request_results.cache_clear()
     results = display_formatter.format(test_df)
     assert results[0].keys() == {"text/plain", "text/html"}
-    assert test_df._block.retrieve_repr_request_results.cache_info().misses == 1
-    assert test_df._block.retrieve_repr_request_results.cache_info().hits == 1
+    assert test_df._block.retrieve_repr_request_results.cache_info().misses >= 1
+    assert test_df._block.retrieve_repr_request_results.cache_info().hits >= 1
diff --git a/tests/system/small/test_multiindex.py b/tests/system/small/test_multiindex.py
index 25d1e2ad49..1e38b47b4c 100644
--- a/tests/system/small/test_multiindex.py
+++ b/tests/system/small/test_multiindex.py
@@ -157,7 +157,7 @@ def test_multi_index_getitem_bool(scalars_df_index, scalars_pandas_df_index):
     ],
     ids=["level_num", "level_name", "list", "mixed_list"],
 )
-def test_multi_index_droplevel(scalars_df_index, scalars_pandas_df_index, level):
+def test_df_multi_index_droplevel(scalars_df_index, scalars_pandas_df_index, level):
     bf_frame = scalars_df_index.set_index(["int64_too", "bool_col", "int64_col"])
     pd_frame = scalars_pandas_df_index.set_index(["int64_too", "bool_col", "int64_col"])
 
@@ -167,6 +167,26 @@ def test_multi_index_droplevel(scalars_df_index, scalars_pandas_df_index, level)
     pandas.testing.assert_frame_equal(bf_result, pd_result)
 
 
+@pytest.mark.parametrize(
+    ("level"),
+    [
+        (1),
+        ("int64_too"),
+        ([0, 2]),
+        ([2, "bool_col"]),
+    ],
+    ids=["level_num", "level_name", "list", "mixed_list"],
+)
+def test_series_multi_index_droplevel(scalars_df_index, scalars_pandas_df_index, level):
+    bf_frame = scalars_df_index.set_index(["int64_too", "bool_col", "int64_col"])
+    pd_frame = scalars_pandas_df_index.set_index(["int64_too", "bool_col", "int64_col"])
+
+    bf_result = bf_frame["string_col"].droplevel(level).to_pandas()
+    pd_result = pd_frame["string_col"].droplevel(level)
+
+    pandas.testing.assert_series_equal(bf_result, pd_result)
+
+
 @pytest.mark.parametrize(
     ("labels", "level"),
     [
@@ -198,7 +218,9 @@ def test_multi_index_drop(scalars_df_index, scalars_pandas_df_index, labels, lev
         "num_names_mixed",
     ],
 )
-def test_multi_index_reorder_levels(scalars_df_index, scalars_pandas_df_index, order):
+def test_df_multi_index_reorder_levels(
+    scalars_df_index, scalars_pandas_df_index, order
+):
     bf_frame = scalars_df_index.set_index(["int64_too", "bool_col", "int64_col"])
     pd_frame = scalars_pandas_df_index.set_index(["int64_too", "bool_col", "int64_col"])
 
@@ -208,6 +230,51 @@ def test_multi_index_reorder_levels(scalars_df_index, scalars_pandas_df_index, o
     pandas.testing.assert_frame_equal(bf_result, pd_result)
 
 
+@pytest.mark.parametrize(
+    ("order"),
+    [
+        (1, 0, 2),
+        (["int64_col", "bool_col", "int64_too"]),
+        (["int64_col", "bool_col", 0]),
+    ],
+    ids=[
+        "level_nums",
+        "level_names",
+        "num_names_mixed",
+    ],
+)
+def test_series_multi_index_reorder_levels(
+    scalars_df_index, scalars_pandas_df_index, order
+):
+    bf_frame = scalars_df_index.set_index(["int64_too", "bool_col", "int64_col"])
+    pd_frame = scalars_pandas_df_index.set_index(["int64_too", "bool_col", "int64_col"])
+
+    bf_result = bf_frame["string_col"].reorder_levels(order).to_pandas()
+    pd_result = pd_frame["string_col"].reorder_levels(order)
+
+    pandas.testing.assert_series_equal(bf_result, pd_result)
+
+
+def test_df_multi_index_swaplevel(scalars_df_index, scalars_pandas_df_index):
+    bf_frame = scalars_df_index.set_index(["int64_too", "bool_col", "int64_col"])
+    pd_frame = scalars_pandas_df_index.set_index(["int64_too", "bool_col", "int64_col"])
+
+    bf_result = bf_frame.swaplevel().to_pandas()
+    pd_result = pd_frame.swaplevel()
+
+    pandas.testing.assert_frame_equal(bf_result, pd_result)
+
+
+def test_series_multi_index_swaplevel(scalars_df_index, scalars_pandas_df_index):
+    bf_frame = scalars_df_index.set_index(["int64_too", "bool_col", "int64_col"])
+    pd_frame = scalars_pandas_df_index.set_index(["int64_too", "bool_col", "int64_col"])
+
+    bf_result = bf_frame["string_col"].swaplevel(0, 2).to_pandas()
+    pd_result = pd_frame["string_col"].swaplevel(0, 2)
+
+    pandas.testing.assert_series_equal(bf_result, pd_result)
+
+
 def test_multi_index_series_groupby(scalars_df_index, scalars_pandas_df_index):
     bf_frame = scalars_df_index.set_index(["int64_too", "bool_col"])
     bf_result = (
@@ -446,6 +513,24 @@ def test_multi_index_series_rename_dict_same_type(
     )
 
 
+def test_multi_index_df_reindex(scalars_df_index, scalars_pandas_df_index):
+    new_index = pandas.MultiIndex.from_tuples(
+        [(4, "Hello, World!"), (99, "some_new_string")],
+        names=["new_index1", "new_index2"],
+    )
+    bf_result = (
+        scalars_df_index.set_index(["rowindex_2", "string_col"])
+        .reindex(index=new_index)
+        .to_pandas()
+    )
+    pd_result = scalars_pandas_df_index.set_index(["rowindex_2", "string_col"]).reindex(
+        index=new_index
+    )
+    pandas.testing.assert_frame_equal(
+        bf_result, pd_result, check_dtype=False, check_index_type=False
+    )
+
+
 # Column Multi-index tests
 
 
@@ -722,3 +807,76 @@ def test_is_monotonic_decreasing_extra(indexes):
         bf_result.index.is_monotonic_decreasing
         == pd_result.index.is_monotonic_decreasing
     )
+
+
+def test_column_multi_index_droplevel(scalars_df_index, scalars_pandas_df_index):
+    columns = ["int64_too", "string_col", "bool_col"]
+    multi_columns = pandas.MultiIndex.from_tuples(
+        zip(["a", "b", "a"], ["c", "d", "e"], ["f", "g", "f"])
+    )
+    bf_df = scalars_df_index[columns].copy()
+    bf_df.columns = multi_columns
+    pd_df = scalars_pandas_df_index[columns].copy()
+    pd_df.columns = multi_columns
+
+    bf_result = bf_df.droplevel(1, axis=1).to_pandas()
+    pd_result = pd_df.droplevel(1, axis=1)
+
+    pandas.testing.assert_frame_equal(bf_result, pd_result)
+
+
+def test_df_column_multi_index_reindex(scalars_df_index, scalars_pandas_df_index):
+    columns = ["int64_too", "int64_col", "rowindex_2"]
+    multi_columns = pandas.MultiIndex.from_tuples(zip(["a", "b", "a"], ["a", "b", "b"]))
+    bf_df = scalars_df_index[columns].copy()
+    bf_df.columns = multi_columns
+    pd_df = scalars_pandas_df_index[columns].copy()
+    pd_df.columns = multi_columns
+
+    new_index = pandas.MultiIndex.from_tuples(
+        [("z", "a"), ("a", "a")], names=["newname1", "newname2"]
+    )
+
+    bf_result = bf_df.reindex(columns=new_index).to_pandas()
+
+    pd_result = pd_df.reindex(columns=new_index)
+
+    # Pandas uses float64 as default for newly created empty column, bf uses Float64
+    pd_result[("z", "a")] = pd_result[("z", "a")].astype(pandas.Float64Dtype())
+
+    pandas.testing.assert_frame_equal(
+        bf_result,
+        pd_result,
+    )
+
+
+def test_column_multi_index_reorder_levels(scalars_df_index, scalars_pandas_df_index):
+    columns = ["int64_too", "string_col", "bool_col"]
+    multi_columns = pandas.MultiIndex.from_tuples(
+        zip(["a", "b", "a"], ["c", "d", "e"], ["f", "g", "f"])
+    )
+    bf_df = scalars_df_index[columns].copy()
+    bf_df.columns = multi_columns
+    pd_df = scalars_pandas_df_index[columns].copy()
+    pd_df.columns = multi_columns
+
+    bf_result = bf_df.reorder_levels([-2, -1, 0], axis=1).to_pandas()
+    pd_result = pd_df.reorder_levels([-2, -1, 0], axis=1)
+
+    pandas.testing.assert_frame_equal(bf_result, pd_result)
+
+
+def test_column_multi_index_swaplevel(scalars_df_index, scalars_pandas_df_index):
+    columns = ["int64_too", "string_col", "bool_col"]
+    multi_columns = pandas.MultiIndex.from_tuples(
+        zip(["a", "b", "a"], ["c", "d", "e"], ["f", "g", "f"])
+    )
+    bf_df = scalars_df_index[columns].copy()
+    bf_df.columns = multi_columns
+    pd_df = scalars_pandas_df_index[columns].copy()
+    pd_df.columns = multi_columns
+
+    bf_result = bf_df.swaplevel(-3, -1, axis=1).to_pandas()
+    pd_result = pd_df.swaplevel(-3, -1, axis=1)
+
+    pandas.testing.assert_frame_equal(bf_result, pd_result)
diff --git a/tests/system/small/test_pandas.py b/tests/system/small/test_pandas.py
index e451d5c3a2..a429c6551d 100644
--- a/tests/system/small/test_pandas.py
+++ b/tests/system/small/test_pandas.py
@@ -209,3 +209,17 @@ def test_merge_series(scalars_dfs, merge_how):
     )
 
     assert_pandas_df_equal_ignore_ordering(bf_result, pd_result)
+
+
+def test_cut(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+
+    pd_result = pd.cut(scalars_pandas_df["float64_col"], 5, labels=False)
+    bf_result = bpd.cut(scalars_df["float64_col"], 5, labels=False)
+
+    # make sure the result is a supported dtype
+    assert bf_result.dtype == bpd.Int64Dtype()
+
+    bf_result = bf_result.to_pandas()
+    pd_result = pd_result.astype("Int64")
+    pd.testing.assert_series_equal(bf_result, pd_result)
diff --git a/tests/system/small/test_series.py b/tests/system/small/test_series.py
index 07dc892ddc..d3560540cc 100644
--- a/tests/system/small/test_series.py
+++ b/tests/system/small/test_series.py
@@ -186,6 +186,54 @@ def test_fillna(scalars_dfs):
     )
 
 
+def test_series_replace_scalar_scalar(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    col_name = "string_col"
+    bf_result = (
+        scalars_df[col_name].replace("Hello, World!", "Howdy, Planet!").to_pandas()
+    )
+    pd_result = scalars_pandas_df[col_name].replace("Hello, World!", "Howdy, Planet!")
+
+    pd.testing.assert_series_equal(
+        pd_result,
+        bf_result,
+    )
+
+
+def test_series_replace_regex_scalar(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    col_name = "string_col"
+    bf_result = (
+        scalars_df[col_name].replace("^H.l", "Howdy, Planet!", regex=True).to_pandas()
+    )
+    pd_result = scalars_pandas_df[col_name].replace(
+        "^H.l", "Howdy, Planet!", regex=True
+    )
+
+    pd.testing.assert_series_equal(
+        pd_result,
+        bf_result,
+    )
+
+
+def test_series_replace_list_scalar(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    col_name = "string_col"
+    bf_result = (
+        scalars_df[col_name]
+        .replace(["Hello, World!", "T"], "Howdy, Planet!")
+        .to_pandas()
+    )
+    pd_result = scalars_pandas_df[col_name].replace(
+        ["Hello, World!", "T"], "Howdy, Planet!"
+    )
+
+    pd.testing.assert_series_equal(
+        pd_result,
+        bf_result,
+    )
+
+
 @pytest.mark.parametrize(
     ("ignore_index",),
     (
@@ -759,7 +807,6 @@ def test_isin_raise_error(scalars_df_index, scalars_pandas_df_index):
 )
 def test_isin(scalars_dfs, col_name, test_set):
     scalars_df, scalars_pandas_df = scalars_dfs
-    print(type(scalars_pandas_df["datetime_col"].iloc[0]))
     bf_result = scalars_df[col_name].isin(test_set).to_pandas()
     pd_result = scalars_pandas_df[col_name].isin(test_set).astype("boolean")
     pd.testing.assert_series_equal(
@@ -1506,6 +1553,28 @@ def test_shift(scalars_df_index, scalars_pandas_df_index):
     )
 
 
+def test_series_ffill(scalars_df_index, scalars_pandas_df_index):
+    col_name = "numeric_col"
+    bf_result = scalars_df_index[col_name].ffill(limit=1).to_pandas()
+    pd_result = scalars_pandas_df_index[col_name].ffill(limit=1)
+
+    pd.testing.assert_series_equal(
+        bf_result,
+        pd_result,
+    )
+
+
+def test_series_bfill(scalars_df_index, scalars_pandas_df_index):
+    col_name = "numeric_col"
+    bf_result = scalars_df_index[col_name].bfill(limit=2).to_pandas()
+    pd_result = scalars_pandas_df_index[col_name].bfill(limit=2)
+
+    pd.testing.assert_series_equal(
+        bf_result,
+        pd_result,
+    )
+
+
 def test_cumsum_int(scalars_df_index, scalars_pandas_df_index):
     if pd.__version__.startswith("1."):
         pytest.skip("Series.cumsum NA mask are different in pandas 1.x.")
@@ -1588,7 +1657,7 @@ def test_rank_with_nulls(scalars_df_index, scalars_pandas_df_index, na_option, m
         ("all",),
     ],
 )
-def test_nlargest(scalars_df_index, scalars_pandas_df_index, keep):
+def test_series_nlargest(scalars_df_index, scalars_pandas_df_index, keep):
     col_name = "bool_col"
     bf_result = scalars_df_index[col_name].nlargest(4, keep=keep).to_pandas()
     pd_result = scalars_pandas_df_index[col_name].nlargest(4, keep=keep)
@@ -1622,6 +1691,25 @@ def test_diff(scalars_df_index, scalars_pandas_df_index, periods):
     )
 
 
+@pytest.mark.parametrize(
+    ("periods",),
+    [
+        (1,),
+        (2,),
+        (-1,),
+    ],
+)
+def test_series_pct_change(scalars_df_index, scalars_pandas_df_index, periods):
+    bf_result = scalars_df_index["int64_col"].pct_change(periods=periods).to_pandas()
+    # cumsum does not behave well on nullable ints in pandas, produces object type and never ignores NA
+    pd_result = scalars_pandas_df_index["int64_col"].pct_change(periods=periods)
+
+    pd.testing.assert_series_equal(
+        bf_result,
+        pd_result,
+    )
+
+
 @pytest.mark.parametrize(
     ("keep",),
     [
@@ -1630,7 +1718,7 @@ def test_diff(scalars_df_index, scalars_pandas_df_index, periods):
         ("all",),
     ],
 )
-def test_nsmallest(scalars_df_index, scalars_pandas_df_index, keep):
+def test_series_nsmallest(scalars_df_index, scalars_pandas_df_index, keep):
     col_name = "bool_col"
     bf_result = scalars_df_index[col_name].nsmallest(2, keep=keep).to_pandas()
     pd_result = scalars_pandas_df_index[col_name].nsmallest(2, keep=keep)
@@ -1853,6 +1941,91 @@ def test_series_add_suffix(scalars_df_index, scalars_pandas_df_index):
     )
 
 
+def test_series_filter_items(scalars_df_index, scalars_pandas_df_index):
+    if pd.__version__.startswith("2.0") or pd.__version__.startswith("1."):
+        pytest.skip("pandas filter items behavior different pre-2.1")
+    bf_result = scalars_df_index["float64_col"].filter(items=[5, 1, 3]).to_pandas()
+
+    pd_result = scalars_pandas_df_index["float64_col"].filter(items=[5, 1, 3])
+
+    # Pandas uses int64 instead of Int64 (nullable) dtype.
+    pd_result.index = pd_result.index.astype(pd.Int64Dtype())
+    pd.testing.assert_series_equal(
+        bf_result,
+        pd_result,
+    )
+
+
+def test_series_filter_like(scalars_df_index, scalars_pandas_df_index):
+    scalars_df_index = scalars_df_index.copy().set_index("string_col")
+    scalars_pandas_df_index = scalars_pandas_df_index.copy().set_index("string_col")
+
+    bf_result = scalars_df_index["float64_col"].filter(like="ello").to_pandas()
+
+    pd_result = scalars_pandas_df_index["float64_col"].filter(like="ello")
+
+    pd.testing.assert_series_equal(
+        bf_result,
+        pd_result,
+    )
+
+
+def test_series_filter_regex(scalars_df_index, scalars_pandas_df_index):
+    scalars_df_index = scalars_df_index.copy().set_index("string_col")
+    scalars_pandas_df_index = scalars_pandas_df_index.copy().set_index("string_col")
+
+    bf_result = scalars_df_index["float64_col"].filter(regex="^[GH].*").to_pandas()
+
+    pd_result = scalars_pandas_df_index["float64_col"].filter(regex="^[GH].*")
+
+    pd.testing.assert_series_equal(
+        bf_result,
+        pd_result,
+    )
+
+
+def test_series_reindex(scalars_df_index, scalars_pandas_df_index):
+    bf_result = (
+        scalars_df_index["float64_col"].reindex(index=[5, 1, 3, 99, 1]).to_pandas()
+    )
+
+    pd_result = scalars_pandas_df_index["float64_col"].reindex(index=[5, 1, 3, 99, 1])
+
+    # Pandas uses int64 instead of Int64 (nullable) dtype.
+    pd_result.index = pd_result.index.astype(pd.Int64Dtype())
+    pd.testing.assert_series_equal(
+        bf_result,
+        pd_result,
+    )
+
+
+def test_series_reindex_nonunique(scalars_df_index):
+    with pytest.raises(ValueError):
+        # int64_too is non-unique
+        scalars_df_index.set_index("int64_too")["float64_col"].reindex(
+            index=[5, 1, 3, 99, 1], validate=True
+        )
+
+
+def test_series_reindex_like(scalars_df_index, scalars_pandas_df_index):
+    bf_reindex_target = scalars_df_index["float64_col"].reindex(index=[5, 1, 3, 99, 1])
+    bf_result = (
+        scalars_df_index["int64_too"].reindex_like(bf_reindex_target).to_pandas()
+    )
+
+    pd_reindex_target = scalars_pandas_df_index["float64_col"].reindex(
+        index=[5, 1, 3, 99, 1]
+    )
+    pd_result = scalars_pandas_df_index["int64_too"].reindex_like(pd_reindex_target)
+
+    # Pandas uses int64 instead of Int64 (nullable) dtype.
+    pd_result.index = pd_result.index.astype(pd.Int64Dtype())
+    pd.testing.assert_series_equal(
+        bf_result,
+        pd_result,
+    )
+
+
 def test_where_with_series(scalars_df_index, scalars_pandas_df_index):
     bf_result = (
         scalars_df_index["int64_col"]
diff --git a/tests/unit/ml/test_golden_sql.py b/tests/unit/ml/test_golden_sql.py
new file mode 100644
index 0000000000..8d4932a3c3
--- /dev/null
+++ b/tests/unit/ml/test_golden_sql.py
@@ -0,0 +1,47 @@
+# Copyright 2023 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from unittest import mock
+
+import pytest_mock
+
+import bigframes
+from bigframes.ml import linear_model
+import bigframes.pandas as bpd
+
+
+def test_linear_regression_default_fit(mocker: pytest_mock.MockerFixture):
+    mock_session = mock.create_autospec(spec=bigframes.Session)
+
+    mock_X = mock.create_autospec(spec=bpd.DataFrame)
+    mock_X._get_block().expr._session = mock_session
+
+    mock_y = mock.create_autospec(spec=bpd.DataFrame)
+    mock_y.columns.tolist.return_value = ["input_label_column"]
+
+    mock_X.join(mock_y).sql = "input_dataframe_sql"
+
+    # return values we don't care about, but need to provide to continue the program
+    mock_session._start_query.return_value = (None, mock.MagicMock())
+
+    mocker.patch(
+        "bigframes.ml.core._create_temp_model_name", return_value="temp_model_name"
+    )
+
+    model = linear_model.LinearRegression()
+    model.fit(mock_X, mock_y)
+
+    mock_session._start_query.assert_called_once_with(
+        'CREATE TEMP MODEL `temp_model_name`\nOPTIONS(\n  model_type="LINEAR_REG",\n  data_split_method="NO_SPLIT",\n  fit_intercept=True,\n  INPUT_LABEL_COLS=["input_label_column"])\nAS input_dataframe_sql'
+    )
diff --git a/tests/unit/ml/test_pipeline.py b/tests/unit/ml/test_pipeline.py
index 27706a1a07..ed5c621b1d 100644
--- a/tests/unit/ml/test_pipeline.py
+++ b/tests/unit/ml/test_pipeline.py
@@ -18,38 +18,35 @@
 import sklearn.pipeline as sklearn_pipeline  # type: ignore
 import sklearn.preprocessing as sklearn_preprocessing  # type: ignore
 
-import bigframes.ml.compose
-import bigframes.ml.linear_model
-import bigframes.ml.pipeline
-import bigframes.ml.preprocessing
+from bigframes.ml import compose, forecasting, linear_model, pipeline, preprocessing
 
 
 def test_pipeline_repr():
-    pipeline = bigframes.ml.pipeline.Pipeline(
+    pl = pipeline.Pipeline(
         [
             (
                 "preproc",
-                bigframes.ml.compose.ColumnTransformer(
+                compose.ColumnTransformer(
                     [
                         (
                             "onehot",
-                            bigframes.ml.preprocessing.OneHotEncoder(),
+                            preprocessing.OneHotEncoder(),
                             "species",
                         ),
                         (
                             "scale",
-                            bigframes.ml.preprocessing.StandardScaler(),
+                            preprocessing.StandardScaler(),
                             ["culmen_length_mm", "flipper_length_mm"],
                         ),
                     ]
                 ),
             ),
-            ("linreg", bigframes.ml.linear_model.LinearRegression()),
+            ("linreg", linear_model.LinearRegression()),
         ]
     )
 
     assert (
-        pipeline.__repr__()
+        pl.__repr__()
         == """Pipeline(steps=[('preproc',
                  ColumnTransformer(transformers=[('onehot', OneHotEncoder(),
                                                   'species'),
@@ -62,29 +59,29 @@ def test_pipeline_repr():
 
 @pytest.mark.skipif(sklearn_pipeline is None, reason="requires sklearn")
 def test_pipeline_repr_matches_sklearn():
-    bf_pipeline = bigframes.ml.pipeline.Pipeline(
+    bf_pl = pipeline.Pipeline(
         [
             (
                 "preproc",
-                bigframes.ml.compose.ColumnTransformer(
+                compose.ColumnTransformer(
                     [
                         (
                             "onehot",
-                            bigframes.ml.preprocessing.OneHotEncoder(),
+                            preprocessing.OneHotEncoder(),
                             "species",
                         ),
                         (
                             "scale",
-                            bigframes.ml.preprocessing.StandardScaler(),
+                            preprocessing.StandardScaler(),
                             ["culmen_length_mm", "flipper_length_mm"],
                         ),
                     ]
                 ),
             ),
-            ("linreg", bigframes.ml.linear_model.LinearRegression()),
+            ("linreg", linear_model.LinearRegression()),
         ]
     )
-    sk_pipeline = sklearn_pipeline.Pipeline(
+    sk_pl = sklearn_pipeline.Pipeline(
         [
             (
                 "preproc",
@@ -107,4 +104,17 @@ def test_pipeline_repr_matches_sklearn():
         ]
     )
 
-    assert bf_pipeline.__repr__() == sk_pipeline.__repr__()
+    assert bf_pl.__repr__() == sk_pl.__repr__()
+
+
+def test_pipeline_arima_plus_not_implemented():
+    with pytest.raises(NotImplementedError):
+        pipeline.Pipeline(
+            [
+                (
+                    "transform",
+                    preprocessing.StandardScaler(),
+                ),
+                ("estimator", forecasting.ARIMAPlus()),
+            ]
+        )
diff --git a/tests/unit/test_core.py b/tests/unit/test_core.py
index e01638e22e..8f3e0beb0e 100644
--- a/tests/unit/test_core.py
+++ b/tests/unit/test_core.py
@@ -13,9 +13,12 @@
 # limitations under the License.
 
 import ibis
+import ibis.expr.types as ibis_types
 import pandas
 
 import bigframes.core as core
+import bigframes.operations as ops
+import bigframes.operations.aggregations as agg_ops
 
 from . import resources
 
@@ -46,6 +49,42 @@ def test_arrayvalue_constructor_from_ibis_table_adds_all_columns():
     assert len(actual.columns) == 3
 
 
+def test_arrayvalue_with_get_column_type():
+    value = resources.create_arrayvalue(
+        pandas.DataFrame(
+            {
+                "col1": [1, 2, 3],
+                "col2": ["a", "b", "c"],
+                "col3": [0.1, 0.2, 0.3],
+            }
+        ),
+        total_ordering_columns=["col1"],
+    )
+    col1_type = value.get_column_type("col1")
+    col2_type = value.get_column_type("col2")
+    col3_type = value.get_column_type("col3")
+    assert isinstance(col1_type, pandas.Int64Dtype)
+    assert isinstance(col2_type, pandas.StringDtype)
+    assert isinstance(col3_type, pandas.Float64Dtype)
+
+
+def test_arrayvalue_with_get_column():
+    value = resources.create_arrayvalue(
+        pandas.DataFrame(
+            {
+                "col1": [1, 2, 3],
+                "col2": ["a", "b", "c"],
+                "col3": [0.1, 0.2, 0.3],
+            }
+        ),
+        total_ordering_columns=["col1"],
+    )
+    col1 = value.get_column("col1")
+    assert isinstance(col1, ibis_types.Value)
+    assert col1.get_name() == "col1"
+    assert col1.type().is_int64()
+
+
 def test_arrayvalue_to_ibis_expr_with_projection():
     value = resources.create_arrayvalue(
         pandas.DataFrame(
@@ -69,3 +108,133 @@ def test_arrayvalue_to_ibis_expr_with_projection():
     assert actual.columns[0] == "int64_col"
     assert actual.columns[1] == "literals"
     assert actual.columns[2] == "string_col"
+
+
+def test_arrayvalues_to_ibis_expr_with_get_column():
+    value = resources.create_arrayvalue(
+        pandas.DataFrame(
+            {
+                "col1": [1, 2, 3],
+                "col2": ["a", "b", "c"],
+                "col3": [0.1, 0.2, 0.3],
+            }
+        ),
+        total_ordering_columns=["col1"],
+    )
+    expr = value.get_column("col1")
+    assert expr.get_name() == "col1"
+    assert expr.type().is_int64()
+
+
+def test_arrayvalues_to_ibis_expr_with_concat():
+    value = resources.create_arrayvalue(
+        pandas.DataFrame(
+            {
+                "col1": [1, 2, 3],
+                "col2": ["a", "b", "c"],
+                "col3": [0.1, 0.2, 0.3],
+            }
+        ),
+        total_ordering_columns=["col1"],
+    )
+    expr = value.concat([value])
+    actual = expr.to_ibis_expr()
+    assert len(actual.columns) == 3
+    # TODO(ashleyxu, b/299631930): test out the union expression
+    assert actual.columns[0] == "column_0"
+    assert actual.columns[1] == "column_1"
+    assert actual.columns[2] == "column_2"
+
+
+def test_arrayvalues_to_ibis_expr_with_project_unary_op():
+    value = resources.create_arrayvalue(
+        pandas.DataFrame(
+            {
+                "col1": [1, 2, 3],
+                "col2": ["a", "b", "c"],
+                "col3": [0.1, 0.2, 0.3],
+            }
+        ),
+        total_ordering_columns=["col1"],
+    )
+    expr = value.project_unary_op("col1", ops.AsTypeOp("string"))
+    assert value.columns[0].type().is_int64()
+    assert expr.columns[0].type().is_string()
+
+
+def test_arrayvalues_to_ibis_expr_with_project_binary_op():
+    value = resources.create_arrayvalue(
+        pandas.DataFrame(
+            {
+                "col1": [1, 2, 3],
+                "col2": [0.2, 0.3, 0.4],
+                "col3": [0.1, 0.2, 0.3],
+            }
+        ),
+        total_ordering_columns=["col1"],
+    )
+    expr = value.project_binary_op("col2", "col3", ops.add_op, "col4")
+    assert expr.columns[3].type().is_float64()
+    actual = expr.to_ibis_expr()
+    assert len(expr.columns) == 4
+    assert actual.columns[3] == "col4"
+
+
+def test_arrayvalues_to_ibis_expr_with_project_ternary_op():
+    value = resources.create_arrayvalue(
+        pandas.DataFrame(
+            {
+                "col1": [1, 2, 3],
+                "col2": [0.2, 0.3, 0.4],
+                "col3": [True, False, False],
+                "col4": [0.1, 0.2, 0.3],
+            }
+        ),
+        total_ordering_columns=["col1"],
+    )
+    expr = value.project_ternary_op("col2", "col3", "col4", ops.where_op, "col5")
+    assert expr.columns[4].type().is_float64()
+    actual = expr.to_ibis_expr()
+    assert len(expr.columns) == 5
+    assert actual.columns[4] == "col5"
+
+
+def test_arrayvalue_to_ibis_expr_with_aggregate():
+    value = resources.create_arrayvalue(
+        pandas.DataFrame(
+            {
+                "col1": [1, 2, 3],
+                "col2": ["a", "b", "c"],
+                "col3": [0.1, 0.2, 0.3],
+            }
+        ),
+        total_ordering_columns=["col1"],
+    )
+    expr = value.aggregate(
+        aggregations=(("col1", agg_ops.sum_op, "col4"),),
+        by_column_ids=["col1"],
+        dropna=False,
+    )
+    actual = expr.to_ibis_expr()
+    assert len(expr.columns) == 2
+    assert actual.columns[0] == "col1"
+    assert actual.columns[1] == "col4"
+    assert expr.columns[1].type().is_int64()
+
+
+def test_arrayvalue_to_ibis_expr_with_corr_aggregate():
+    value = resources.create_arrayvalue(
+        pandas.DataFrame(
+            {
+                "col1": [1, 2, 3],
+                "col2": ["a", "b", "c"],
+                "col3": [0.1, 0.2, 0.3],
+            }
+        ),
+        total_ordering_columns=["col1"],
+    )
+    expr = value.corr_aggregate(corr_aggregations=[("col1", "col3", "col4")])
+    actual = expr.to_ibis_expr()
+    assert len(expr.columns) == 1
+    assert actual.columns[0] == "col4"
+    assert expr.columns[0].type().is_float64()
diff --git a/third_party/bigframes_vendored/ibis/backends/bigquery/registry.py b/third_party/bigframes_vendored/ibis/backends/bigquery/registry.py
index d209284ab7..a4e61ca0f9 100644
--- a/third_party/bigframes_vendored/ibis/backends/bigquery/registry.py
+++ b/third_party/bigframes_vendored/ibis/backends/bigquery/registry.py
@@ -12,8 +12,20 @@ def _approx_quantiles(translator, op: vendored_ibis_ops.ApproximateMultiQuantile
     return f"APPROX_QUANTILES({arg}, {num_bins})"
 
 
+def _first_non_null_value(translator, op: vendored_ibis_ops.FirstNonNullValue):
+    arg = translator.translate(op.arg)
+    return f"FIRST_VALUE({arg} IGNORE NULLS)"
+
+
+def _last_non_null_value(translator, op: vendored_ibis_ops.LastNonNullValue):
+    arg = translator.translate(op.arg)
+    return f"LAST_VALUE({arg} IGNORE NULLS)"
+
+
 patched_ops = {
     vendored_ibis_ops.ApproximateMultiQuantile: _approx_quantiles,
+    vendored_ibis_ops.FirstNonNullValue: _first_non_null_value,
+    vendored_ibis_ops.LastNonNullValue: _last_non_null_value,
 }
 
 OPERATION_REGISTRY.update(patched_ops)
diff --git a/third_party/bigframes_vendored/ibis/expr/operations/__init__.py b/third_party/bigframes_vendored/ibis/expr/operations/__init__.py
index f3ab753a3b..1612d9c12e 100644
--- a/third_party/bigframes_vendored/ibis/expr/operations/__init__.py
+++ b/third_party/bigframes_vendored/ibis/expr/operations/__init__.py
@@ -1,4 +1,5 @@
 # Contains code from https://github.com/ibis-project/ibis/blob/master/ibis/expr/operations/__init__.py
 from __future__ import annotations
 
+from third_party.bigframes_vendored.ibis.expr.operations.analytic import *  # noqa: F403
 from third_party.bigframes_vendored.ibis.expr.operations.reductions import *  # noqa: F403
diff --git a/third_party/bigframes_vendored/ibis/expr/operations/analytic.py b/third_party/bigframes_vendored/ibis/expr/operations/analytic.py
new file mode 100644
index 0000000000..038987cac9
--- /dev/null
+++ b/third_party/bigframes_vendored/ibis/expr/operations/analytic.py
@@ -0,0 +1,26 @@
+# Contains code from https://github.com/ibis-project/ibis/blob/master/ibis/expr/operations/analytic.py
+
+from __future__ import annotations
+
+from ibis.expr.operations.analytic import Analytic
+import ibis.expr.rules as rlz
+
+
+class FirstNonNullValue(Analytic):
+    """Retrieve the first element."""
+
+    arg = rlz.column(rlz.any)
+    output_dtype = rlz.dtype_like("arg")
+
+
+class LastNonNullValue(Analytic):
+    """Retrieve the last element."""
+
+    arg = rlz.column(rlz.any)
+    output_dtype = rlz.dtype_like("arg")
+
+
+__all__ = [
+    "FirstNonNullValue",
+    "LastNonNullValue",
+]
diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py
index 8c81b23b6c..113c6547a0 100644
--- a/third_party/bigframes_vendored/pandas/core/frame.py
+++ b/third_party/bigframes_vendored/pandas/core/frame.py
@@ -13,7 +13,7 @@
 
 from typing import Iterable, Literal, Mapping, Optional, Sequence, Union
 
-import numpy
+import numpy as np
 
 from bigframes import constants
 from third_party.bigframes_vendored.pandas.core.generic import NDFrame
@@ -56,7 +56,7 @@ def axes(self) -> list:
         return [self.index, self.columns]
 
     @property
-    def values(self) -> numpy.ndarray:
+    def values(self) -> np.ndarray:
         """Return the values of DataFrame in the form of a NumPy array.
 
         Args:
@@ -72,9 +72,7 @@ def values(self) -> numpy.ndarray:
 
     # ----------------------------------------------------------------------
     # IO methods (to / from other formats)
-    def to_numpy(
-        self, dtype=None, copy=False, na_value=None, **kwargs
-    ) -> numpy.ndarray:
+    def to_numpy(self, dtype=None, copy=False, na_value=None, **kwargs) -> np.ndarray:
         """
         Convert the DataFrame to a NumPy array.
 
@@ -154,6 +152,250 @@ def to_parquet(
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
+    def to_dict(
+        self,
+        orient: Literal[
+            "dict", "list", "series", "split", "tight", "records", "index"
+        ] = "dict",
+        into: type[dict] = dict,
+        **kwargs,
+    ) -> dict | list[dict]:
+        """
+        Convert the DataFrame to a dictionary.
+
+        The type of the key-value pairs can be customized with the parameters
+        (see below).
+
+        Args:
+            orient (str {'dict', 'list', 'series', 'split', 'tight', 'records', 'index'}):
+                Determines the type of the values of the dictionary.
+                'dict' (default) : dict like {column -> {index -> value}}.
+                'list' : dict like {column -> [values]}.
+                'series' : dict like {column -> Series(values)}.
+                split' : dict like {'index' -> [index], 'columns' -> [columns], 'data' -> [values]}.
+                'tight' : dict like {'index' -> [index], 'columns' -> [columns], 'data' -> [values],
+                'index_names' -> [index.names], 'column_names' -> [column.names]}.
+                'records' : list like [{column -> value}, ... , {column -> value}].
+                'index' : dict like {index -> {column -> value}}.
+            into (class, default dict):
+                The collections.abc.Mapping subclass used for all Mappings
+                in the return value.  Can be the actual class or an empty
+                instance of the mapping type you want.  If you want a
+                collections.defaultdict, you must pass it initialized.
+
+            index (bool, default True):
+                Whether to include the index item (and index_names item if `orient`
+                is 'tight') in the returned dictionary. Can only be ``False``
+                when `orient` is 'split' or 'tight'.
+
+        Returns:
+            dict or list of dict: Return a collections.abc.Mapping object representing the DataFrame.
+            The resulting transformation depends on the `orient` parameter.
+        """
+        raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
+
+    def to_excel(self, excel_writer, sheet_name: str = "Sheet1", **kwargs) -> None:
+        """
+        Write DataFrame to an Excel sheet.
+
+        To write a single DataFrame to an Excel .xlsx file it is only necessary to
+        specify a target file name. To write to multiple sheets it is necessary to
+        create an `ExcelWriter` object with a target file name, and specify a sheet
+        in the file to write to.
+
+        Multiple sheets may be written to by specifying unique `sheet_name`.
+        With all data written to the file it is necessary to save the changes.
+        Note that creating an `ExcelWriter` object with a file name that already
+        exists will result in the contents of the existing file being erased.
+
+        Args:
+            excel_writer (path-like, file-like, or ExcelWriter object):
+                File path or existing ExcelWriter.
+            sheet_name (str, default 'Sheet1'):
+                Name of sheet which will contain DataFrame.
+        """
+        raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
+
+    def to_latex(
+        self, buf=None, columns=None, header=True, index=True, **kwargs
+    ) -> str | None:
+        r"""
+        Render object to a LaTeX tabular, longtable, or nested table.
+
+        Requires ``\usepackage{{booktabs}}``.  The output can be copy/pasted
+        into a main LaTeX document or read from an external file
+        with ``\input{{table.tex}}``.
+
+        Args:
+            buf (str, Path or StringIO-like, optional, default None):
+                Buffer to write to. If None, the output is returned as a string.
+            columns (list of label, optional):
+                The subset of columns to write. Writes all columns by default.
+            header (bool or list of str, default True):
+                Write out the column names. If a list of strings is given,
+                it is assumed to be aliases for the column names.
+            index (bool, default True):
+                Write row names (index).
+        """
+        raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
+
+    def to_records(
+        self, index: bool = True, column_dtypes=None, index_dtypes=None
+    ) -> np.recarray:
+        """
+        Convert DataFrame to a NumPy record array.
+
+        Index will be included as the first field of the record array if
+        requested.
+
+        Args:
+            index (bool, default True):
+                Include index in resulting record array, stored in 'index'
+                field or using the index label, if set.
+            column_dtypes (str, type, dict, default None):
+                If a string or type, the data type to store all columns. If
+                a dictionary, a mapping of column names and indices (zero-indexed)
+                to specific data types.
+            index_dtypes (str, type, dict, default None):
+                If a string or type, the data type to store all index levels. If
+                a dictionary, a mapping of index level names and indices
+                (zero-indexed) to specific data types.
+
+                This mapping is applied only if `index=True`.
+
+        Returns:
+            np.recarray: NumPy ndarray with the DataFrame labels as fields and each row
+            of the DataFrame as entries.
+        """
+        raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
+
+    def to_string(
+        self,
+        buf=None,
+        columns: Sequence[str] | None = None,
+        col_space=None,
+        header: bool | Sequence[str] = True,
+        index: bool = True,
+        na_rep: str = "NaN",
+        formatters=None,
+        float_format=None,
+        sparsify: bool | None = None,
+        index_names: bool = True,
+        justify: str | None = None,
+        max_rows: int | None = None,
+        max_cols: int | None = None,
+        show_dimensions: bool = False,
+        decimal: str = ".",
+        line_width: int | None = None,
+        min_rows: int | None = None,
+        max_colwidth: int | None = None,
+        encoding: str | None = None,
+    ):
+        """Render a DataFrame to a console-friendly tabular output.
+
+        Args:
+            buf (str, Path or StringIO-like, optional, default None):
+                Buffer to write to. If None, the output is returned as a string.
+            columns (sequence, optional, default None):
+                The subset of columns to write. Writes all columns by default.
+            col_space (int, list or dict of int, optional):
+                The minimum width of each column.
+            header (bool or sequence, optional):
+                Write out the column names. If a list of strings is given, it is assumed to be aliases for the column names.
+            index (bool, optional, default True):
+                Whether to print index (row) labels.
+            na_rep (str, optional, default 'NaN'):
+                String representation of NAN to use.
+            formatters (list, tuple or dict of one-param. functions, optional):
+                Formatter functions to apply to columns' elements by position or
+                name.
+                The result of each function must be a unicode string.
+                List/tuple must be of length equal to the number of columns.
+            float_format (one-parameter function, optional, default None):
+                Formatter function to apply to columns' elements if they are
+                floats. The result of this function must be a unicode string.
+            sparsify (bool, optional, default True):
+                Set to False for a DataFrame with a hierarchical index to print
+                every multiindex key at each row.
+            index_names (bool, optional, default True):
+                Prints the names of the indexes.
+            justify (str, default None):
+                How to justify the column labels. If None uses the option from
+                the print configuration (controlled by set_option), 'right' out
+                of the box. Valid values are, 'left', 'right', 'center', 'justify',
+                'justify-all', 'start', 'end', 'inherit', 'match-parent', 'initial',
+                'unset'.
+            max_rows (int, optional):
+                Maximum number of rows to display in the console.
+            min_rows (int, optional):
+                The number of rows to display in the console in a truncated repr
+                (when number of rows is above `max_rows`).
+            max_cols (int, optional):
+                Maximum number of columns to display in the console.
+            show_dimensions (bool, default False):
+                Display DataFrame dimensions (number of rows by number of columns).
+            decimal (str, default '.'):
+                Character recognized as decimal separator, e.g. ',' in Europe.
+            line_width (int, optional):
+                Width to wrap a line in characters.
+            max_colwidth (int, optional):
+                Max width to truncate each column in characters. By default, no limit.
+            encoding (str, default "utf-8"):
+                Set character encoding.
+
+        Returns:
+            str or None: If buf is None, returns the result as a string. Otherwise returns
+            None.
+        """
+        raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
+
+    def to_markdown(
+        self,
+        buf=None,
+        mode: str = "wt",
+        index: bool = True,
+        **kwargs,
+    ):
+        """Print DataFrame in Markdown-friendly format.
+
+        Args:
+            buf (str, Path or StringIO-like, optional, default None):
+                Buffer to write to. If None, the output is returned as a string.
+            mode (str, optional):
+                Mode in which file is opened.
+            index (bool, optional, default True):
+                Add index (row) labels.
+            **kwargs
+                These parameters will be passed to `tabulate                 <https://pypi.org/project/tabulate>`_.
+
+        Returns:
+            DataFrame in Markdown-friendly format.
+        """
+        raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
+
+    def to_pickle(self, path, **kwargs) -> None:
+        """Pickle (serialize) object to file.
+
+        Args:
+            path (str):
+                File path where the pickled object will be stored.
+        """
+        raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
+
+    def to_orc(self, path=None, **kwargs) -> bytes | None:
+        """
+        Write a DataFrame to the ORC format.
+
+        Args:
+            path (str, file-like object or None, default None):
+                If a string, it will be used as Root Directory path
+                when writing a partitioned dataset. By file-like object,
+                we refer to objects with a write() method, such as a file handle
+                (e.g. via builtin open function). If path is None,
+                a bytes object is returned.
+        """
+        raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
+
     # ----------------------------------------------------------------------
     # Unsorted
 
@@ -185,6 +427,53 @@ def assign(self, **kwargs) -> DataFrame:
     # ----------------------------------------------------------------------
     # Reindexing and alignment
 
+    def reindex(
+        self,
+        labels=None,
+        *,
+        index=None,
+        columns=None,
+        axis=None,
+    ):
+        """Conform DataFrame to new index with optional filling logic.
+
+        Places NA in locations having no value in the previous index. A new object
+        is produced.
+
+        Args:
+            labels (array-like, optional):
+                New labels / index to conform the axis specified by 'axis' to.
+            index (array-like, optional):
+                New labels for the index. Preferably an Index object to avoid
+                duplicating data.
+            columns (array-like, optional):
+                New labels for the columns. Preferably an Index object to avoid
+                duplicating data.
+            axis (int or str, optional):
+                Axis to target. Can be either the axis name ('index', 'columns')
+                or number (0, 1).
+        Returns:
+            DataFrame: DataFrame with changed index.
+        """
+        raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
+
+    def reindex_like(self, other):
+        """Return an object with matching indices as other object.
+
+        Conform the object to the same index on all axes. Optional
+        filling logic, placing Null in locations having no value
+        in the previous index.
+
+        Args:
+            other (Object of the same data type):
+                Its row and column indices are used to define the new indices
+                of this object.
+
+        Returns:
+            Series or DataFrame: Same type as caller, but with changed indices on each axis.
+        """
+        raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
+
     def drop(
         self, labels=None, *, axis=0, index=None, columns=None, level=None
     ) -> DataFrame | None:
@@ -276,7 +565,9 @@ def set_index(
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
-    def reorder_levels(self, order: Sequence[int | str]) -> DataFrame:
+    def reorder_levels(
+        self, order: Sequence[int | str], axis: str | int = 0
+    ) -> DataFrame:
         """
         Rearrange index levels using input order. May not drop or duplicate levels.
 
@@ -284,13 +575,33 @@ def reorder_levels(self, order: Sequence[int | str]) -> DataFrame:
             order (list of int or list of str):
                 List representing new level order. Reference level by number
                 (position) or by key (label).
+            axis ({0 or 'index', 1 or 'columns'}, default 0):
+                Where to reorder levels.
 
         Returns:
             DataFrame: DataFrame of rearranged index.
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
-    def droplevel(self, level):
+    def swaplevel(self, i, j, axis: str | int = 0) -> DataFrame:
+        """
+        Swap levels i and j in a :class:`MultiIndex`.
+
+        Default is to swap the two innermost levels of the index.
+
+        Args:
+            i, j (int or str):
+                Levels of the indices to be swapped. Can pass level name as string.
+            axis ({0 or 'index', 1 or 'columns'}, default 0):
+                The axis to swap levels on. 0 or 'index' for row-wise, 1 or
+                'columns' for column-wise.
+
+        Returns:
+            DataFrame: DataFrame with levels swapped in MultiIndex.
+        """
+        raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
+
+    def droplevel(self, level, axis: str | int = 0):
         """
         Return DataFrame with requested index / column level(s) removed.
 
@@ -299,6 +610,11 @@ def droplevel(self, level):
                 If a string is given, must be the name of a level
                 If list-like, elements must be names or positional indexes
                 of levels.
+            axis ({0 or 'index', 1 or 'columns'}, default 0):
+                Axis along which the level(s) is removed:
+
+                * 0 or 'index': remove level(s) in column.
+                * 1 or 'columns': remove level(s) in row.
         Returns:
             DataFrame: DataFrame with requested index / column level(s) removed.
         """
@@ -889,6 +1205,54 @@ def rpow(self, other, axis: str | int = "columns") -> DataFrame:
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
+    def combine(
+        self, other, func, fill_value=None, overwrite: bool = True
+    ) -> DataFrame:
+        """Perform column-wise combine with another DataFrame.
+
+        Combines a DataFrame with `other` DataFrame using `func`
+        to element-wise combine columns. The row and column indexes of the
+        resulting DataFrame will be the union of the two.
+
+        Args:
+            other (DataFrame):
+                The DataFrame to merge column-wise.
+            func (function):
+                Function that takes two series as inputs and return a Series or a
+                scalar. Used to merge the two dataframes column by columns.
+            fill_value (scalar value, default None):
+                The value to fill NaNs with prior to passing any column to the
+                merge func.
+            overwrite (bool, default True):
+                If True, columns in `self` that do not exist in `other` will be
+                overwritten with NaNs.
+
+        Returns:
+            DataFrame: Combination of the provided DataFrames.
+        """
+        raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
+
+    def combine_first(self, other) -> DataFrame:
+        """
+        Update null elements with value in the same location in `other`.
+
+        Combine two DataFrame objects by filling null values in one DataFrame
+        with non-null values from other DataFrame. The row and column indexes
+        of the resulting DataFrame will be the union of the two. The resulting
+        dataframe contains the 'first' dataframe values and overrides the
+        second one values where both first.loc[index, col] and
+        second.loc[index, col] are not missing values, upon calling
+        first.combine_first(second).
+
+        Args:
+            other (DataFrame):
+                Provided DataFrame to use to fill null values.
+
+        Returns:
+            DataFrame: The result of combining the provided DataFrame with the other object.
+        """
+        raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
+
     # ----------------------------------------------------------------------
     # Data reshaping
 
@@ -1191,6 +1555,20 @@ def var(self, *, numeric_only: bool = False):
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
+    def skew(self, *, numeric_only: bool = False):
+        """Return unbiased skew over requested axis.
+
+        Normalized by N-1.
+
+        Args:
+            numeric_only (bool, default False):
+                Include only float, int, boolean columns.
+
+        Returns:
+            Series
+        """
+        raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
+
     def std(self, *, numeric_only: bool = False):
         """Return sample standard deviation over requested axis.
 
@@ -1222,6 +1600,76 @@ def count(self, *, numeric_only: bool = False):
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
+    def nlargest(self, n: int, columns, keep: str = "first"):
+        """
+        Return the first `n` rows ordered by `columns` in descending order.
+
+        Return the first `n` rows with the largest values in `columns`, in
+        descending order. The columns that are not specified are returned as
+        well, but not used for ordering.
+
+        This method is equivalent to
+        ``df.sort_values(columns, ascending=False).head(n)``, but more
+        performant.
+
+        Args:
+            n (int):
+                Number of rows to return.
+            columns (label or list of labels):
+                Column label(s) to order by.
+            keep ({'first', 'last', 'all'}, default 'first'):
+                Where there are duplicate values:
+
+                - ``first`` : prioritize the first occurrence(s)
+                - ``last`` : prioritize the last occurrence(s)
+                - ``all`` : do not drop any duplicates, even it means
+                  selecting more than `n` items.
+
+        Returns:
+            DataFrame: The first `n` rows ordered by the given columns in descending order.
+
+        .. note::
+            This function cannot be used with all column types. For example, when
+            specifying columns with `object` or `category` dtypes, ``TypeError`` is
+            raised.
+        """
+        raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
+
+    def nsmallest(self, n: int, columns, keep: str = "first"):
+        """
+        Return the first `n` rows ordered by `columns` in ascending order.
+
+        Return the first `n` rows with the smallest values in `columns`, in
+        ascending order. The columns that are not specified are returned as
+        well, but not used for ordering.
+
+        This method is equivalent to
+        ``df.sort_values(columns, ascending=True).head(n)``, but more
+        performant.
+
+        Args:
+            n (int):
+                Number of rows to return.
+            columns (label or list of labels):
+                Column label(s) to order by.
+            keep ({'first', 'last', 'all'}, default 'first'):
+                Where there are duplicate values:
+
+                - ``first`` : prioritize the first occurrence(s)
+                - ``last`` : prioritize the last occurrence(s)
+                - ``all`` : do not drop any duplicates, even it means
+                  selecting more than `n` items.
+
+        Returns:
+            DataFrame: The first `n` rows ordered by the given columns in ascending order.
+
+        .. note::
+            This function cannot be used with all column types. For example, when
+            specifying columns with `object` or `category` dtypes, ``TypeError`` is
+            raised.
+        """
+        raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
+
     def nunique(self):
         """
         Count number of distinct elements in specified axis.
@@ -1271,6 +1719,25 @@ def cumprod(self) -> DataFrame:
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
+    def diff(
+        self,
+        periods: int = 1,
+    ) -> NDFrame:
+        """First discrete difference of element.
+
+        Calculates the difference of a DataFrame element compared with another
+        element in the DataFrame (default is element in previous row).
+
+        Args:
+            periods (int, default 1):
+                Periods to shift for calculating difference, accepts negative
+                values.
+
+        Returns:
+            bigframes.dataframe.DataFrame: First differences of the Series.
+        """
+        raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
+
     def agg(self, func):
         """
         Aggregate using one or more operations over the specified axis.
diff --git a/third_party/bigframes_vendored/pandas/core/generic.py b/third_party/bigframes_vendored/pandas/core/generic.py
index 56d3b2434f..7d496891b0 100644
--- a/third_party/bigframes_vendored/pandas/core/generic.py
+++ b/third_party/bigframes_vendored/pandas/core/generic.py
@@ -335,6 +335,41 @@ def copy(self):
     # ----------------------------------------------------------------------
     # Action Methods
 
+    def ffill(self, *, limit: Optional[int] = None):
+        """Fill NA/NaN values by propagating the last valid observation to next valid.
+
+        Args:
+            limit : int, default None
+                If method is specified, this is the maximum number of consecutive
+                NaN values to forward/backward fill. In other words, if there is
+                a gap with more than this number of consecutive NaNs, it will only
+                be partially filled. If method is not specified, this is the
+                maximum number of entries along the entire axis where NaNs will be
+                filled. Must be greater than 0 if not None.
+
+
+        Returns:
+            Series/DataFrame or None: Object with missing values filled.
+        """
+        raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
+
+    def bfill(self, *, limit: Optional[int] = None):
+        """Fill NA/NaN values by using the next valid observation to fill the gap.
+
+        Args:
+            limit : int, default None
+                If method is specified, this is the maximum number of consecutive
+                NaN values to forward/backward fill. In other words, if there is
+                a gap with more than this number of consecutive NaNs, it will only
+                be partially filled. If method is not specified, this is the
+                maximum number of entries along the entire axis where NaNs will be
+                filled. Must be greater than 0 if not None.
+
+        Returns:
+            Series/DataFrame or None: Object with missing values filled.
+        """
+        raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
+
     def isna(self) -> NDFrame:
         """Detect missing values.
 
@@ -367,6 +402,36 @@ def notna(self) -> NDFrame:
 
     notnull = notna
 
+    def filter(
+        self,
+        items=None,
+        like: str | None = None,
+        regex: str | None = None,
+        axis=None,
+    ) -> NDFrame:
+        """
+        Subset the dataframe rows or columns according to the specified index labels.
+
+        Note that this routine does not filter a dataframe on its
+        contents. The filter is applied to the labels of the index.
+
+        Args:
+            items (list-like):
+                Keep labels from axis which are in items.
+            like (str):
+                Keep labels from axis for which "like in label == True".
+            regex (str (regular expression)):
+                Keep labels from axis for which re.search(regex, label) == True.
+            axis ({0 or 'index', 1 or 'columns', None}, default None):
+                The axis to filter on, expressed either as an index (int)
+                or axis name (str). By default this is the info axis, 'columns' for
+                DataFrame. For `Series` this parameter is unused and defaults to `None`.
+
+        Returns:
+            same type as input object
+        """
+        raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
+
     def shift(
         self,
         periods: int = 1,
@@ -384,6 +449,30 @@ def shift(
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
+    def pct_change(self, periods: int = 1):
+        """
+        Fractional change between the current and a prior element.
+
+        Computes the fractional change from the immediately previous row by
+        default. This is useful in comparing the fraction of change in a time
+        series of elements.
+
+        .. note::
+
+            Despite the name of this method, it calculates fractional change
+            (also known as per unit change or relative change) and not
+            percentage change. If you need the percentage change, multiply
+            these values by 100.
+
+        Args:
+            periods (int, default 1):
+                Periods to shift for forming percent change.
+
+        Returns:
+            Series or DataFrame: The same type as the calling object.
+        """
+        raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
+
     def rank(
         self,
         axis=0,
diff --git a/third_party/bigframes_vendored/pandas/core/groupby/__init__.py b/third_party/bigframes_vendored/pandas/core/groupby/__init__.py
index 9271da8a5e..7849a3afd5 100644
--- a/third_party/bigframes_vendored/pandas/core/groupby/__init__.py
+++ b/third_party/bigframes_vendored/pandas/core/groupby/__init__.py
@@ -124,6 +124,26 @@ def var(
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
+    def skew(
+        self,
+        *,
+        numeric_only: bool = False,
+    ):
+        """
+        Return unbiased skew within groups.
+
+        Normalized by N-1.
+
+        Args:
+            numeric_only (bool, default False):
+                Include only `float`, `int` or `boolean` data.
+
+        Returns:
+            Series or DataFrame
+                Variance of values within each group.
+        """
+        raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
+
     def sum(
         self,
         numeric_only: bool = False,
diff --git a/third_party/bigframes_vendored/pandas/core/indexes/base.py b/third_party/bigframes_vendored/pandas/core/indexes/base.py
index 404a99809c..864007b774 100644
--- a/third_party/bigframes_vendored/pandas/core/indexes/base.py
+++ b/third_party/bigframes_vendored/pandas/core/indexes/base.py
@@ -21,6 +21,16 @@ def shape(self):
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
+    @property
+    def nlevels(self) -> int:
+        """Number of levels."""
+        raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
+
+    @property
+    def is_unique(self) -> bool:
+        """Return if the index has unique values."""
+        raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
+
     def to_numpy(self, dtype):
         """
         A NumPy ndarray representing the values in this Series or Index.
diff --git a/third_party/bigframes_vendored/pandas/core/series.py b/third_party/bigframes_vendored/pandas/core/series.py
index 76fb46a700..c6dd973372 100644
--- a/third_party/bigframes_vendored/pandas/core/series.py
+++ b/third_party/bigframes_vendored/pandas/core/series.py
@@ -758,6 +758,41 @@ def groupby(
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
+    def reindex(self, index=None):
+        """
+        Conform Series to new index with optional filling logic.
+
+        Places NA/NaN in locations having no value in the previous index. A new object
+        is produced unless the new index is equivalent to the current one and
+        ``copy=False``.
+
+        Args:
+            index (array-like, optional):
+                New labels for the index. Preferably an Index object to avoid
+                duplicating data.
+
+        Returns:
+            Series: Series with changed index.
+        """
+        raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
+
+    def reindex_like(self, other):
+        """Return an object with matching indices as other object.
+
+        Conform the object to the same index on all axes. Optional
+        filling logic, placing Null in locations having no value
+        in the previous index.
+
+        Args:
+            other (Object of the same data type):
+                Its row and column indices are used to define the new indices
+                of this object.
+
+        Returns:
+            Series or DataFrame: Same type as caller, but with changed indices on each axis.
+        """
+        raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
+
     def drop(
         self, labels=None, *, axis=0, index=None, columns=None, level=None
     ) -> Series | None:
@@ -790,7 +825,7 @@ def drop(
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
-    def reorder_levels(self, order: Sequence) -> Series:
+    def reorder_levels(self, order: Sequence, axis) -> Series:
         """
         Rearrange index levels using input order.
 
@@ -800,12 +835,31 @@ def reorder_levels(self, order: Sequence) -> Series:
             order (list of int representing new level order):
                 Reference level by number or key.
 
+            axis ({0 or 'index', 1 or 'columns'}, default 0):
+                For `Series` this parameter is unused and defaults to 0.
+
+
         Returns:
             type of caller (new object)
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
-    def droplevel(self, level):
+    def swaplevel(self, i, j):
+        """
+        Swap levels i and j in a `MultiIndex`.
+
+        Default is to swap the two innermost levels of the index.
+
+        Args:
+            i, j (int or str):
+                Levels of the indices to be swapped. Can pass level name as string.
+
+        Returns:
+            Series: Series with levels swapped in MultiIndex
+        """
+        raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
+
+    def droplevel(self, level, axis):
         """
         Return Series with requested index / column level(s) removed.
 
@@ -815,6 +869,9 @@ def droplevel(self, level):
                 If list-like, elements must be names or positional indexes
                 of levels.
 
+            axis ({0 or 'index', 1 or 'columns'}, default 0):
+                For `Series` this parameter is unused and defaults to 0.
+
         Returns:
             Series with requested index / column level(s) removed.
         """
@@ -836,6 +893,69 @@ def fillna(
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
+    def replace(
+        self,
+        to_replace,
+        value=None,
+    ) -> Series | None:
+        """
+        Replace values given in `to_replace` with `value`.
+
+        Values of the Series/DataFrame are replaced with other values dynamically.
+        This differs from updating with ``.loc`` or ``.iloc``, which require
+        you to specify a location to update with some value.
+
+        Args:
+            to_replace (str, regex, list, int, float or None):
+                How to find the values that will be replaced.
+
+                * numeric, str or regex:
+
+                    - numeric: numeric values equal to `to_replace` will be
+                      replaced with `value`
+                    - str: string exactly matching `to_replace` will be replaced
+                      with `value`
+                    - regex: regexs matching `to_replace` will be replaced with
+                      `value`
+
+                * list of str, regex, or numeric:
+
+                    - First, if `to_replace` and `value` are both lists, they
+                      **must** be the same length.
+                    - Second, if ``regex=True`` then all of the strings in **both**
+                      lists will be interpreted as regexs otherwise they will match
+                      directly. This doesn't matter much for `value` since there
+                      are only a few possible substitution regexes you can use.
+                    - str, regex and numeric rules apply as above.
+
+            value (scalar, default None):
+                Value to replace any values matching `to_replace` with.
+                For a DataFrame a dict of values can be used to specify which
+                value to use for each column (columns not in the dict will not be
+                filled). Regular expressions, strings and lists or dicts of such
+                objects are also allowed.
+            regex (bool, default False):
+                Whether to interpret `to_replace` and/or `value` as regular
+                expressions. If this is ``True`` then `to_replace` *must* be a
+                string.
+
+        Returns:
+            Series/DataFrame: Object after replacement.
+
+        Raises:
+            TypeError:
+                * If `to_replace` is not a scalar, array-like, ``dict``, or ``None``
+                * If `to_replace` is a ``dict`` and `value` is not a ``list``,
+                  ``dict``, ``ndarray``, or ``Series``
+                * If `to_replace` is ``None`` and `regex` is not compilable
+                  into a regular expression or is a list, dict, ndarray, or
+                  Series.
+                * When replacing multiple ``bool`` or ``datetime64`` objects and
+                  the arguments to `to_replace` does not match the type of the
+                  value being replaced
+        """
+        raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
+
     def dropna(self, *, axis=0, inplace: bool = False, how=None) -> Series:
         """
         Return a new Series with missing values removed.
diff --git a/third_party/bigframes_vendored/sklearn/base.py b/third_party/bigframes_vendored/sklearn/base.py
index 847ad06f75..42868ce51f 100644
--- a/third_party/bigframes_vendored/sklearn/base.py
+++ b/third_party/bigframes_vendored/sklearn/base.py
@@ -144,6 +144,7 @@ def fit_transform(self, X, y=None):
             bigframes.dataframe.DataFrame: DataFrame of shape (n_samples, n_features_new)
                 Transformed DataFrame.
         """
+        raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
 
 class MetaEstimatorMixin:
diff --git a/third_party/bigframes_vendored/sklearn/cluster/_kmeans.py b/third_party/bigframes_vendored/sklearn/cluster/_kmeans.py
index ff1c04edbe..ece62dc147 100644
--- a/third_party/bigframes_vendored/sklearn/cluster/_kmeans.py
+++ b/third_party/bigframes_vendored/sklearn/cluster/_kmeans.py
@@ -49,7 +49,6 @@ def fit(
         self,
         X,
         y=None,
-        transforms: Optional[List[str]] = None,
     ):
         """Compute k-means clustering.
 
@@ -58,10 +57,6 @@ def fit(
                 DataFrame of shape (n_samples, n_features). Training data.
             y (default None):
                 Not used, present here for API consistency by convention.
-            transforms (Optional[List[str]], default None):
-                Do not use. Internal param to be deprecated.
-                Use bigframes.ml.pipeline instead.
-
 
         Returns:
             KMeans: Fitted Estimator.
diff --git a/third_party/bigframes_vendored/sklearn/decomposition/_pca.py b/third_party/bigframes_vendored/sklearn/decomposition/_pca.py
index 85feab0024..97fee5a501 100644
--- a/third_party/bigframes_vendored/sklearn/decomposition/_pca.py
+++ b/third_party/bigframes_vendored/sklearn/decomposition/_pca.py
@@ -49,10 +49,6 @@ def fit(self, X, y=None):
             y (default None):
                 Ignored.
 
-            transforms (Optional[List[str]], default None):
-                Do not use. Internal param to be deprecated.
-                Use bigframes.ml.pipeline instead.
-
         Returns:
             PCA: Fitted estimator.
         """
diff --git a/third_party/bigframes_vendored/sklearn/ensemble/_forest.py b/third_party/bigframes_vendored/sklearn/ensemble/_forest.py
index 79224a772d..6be41bf9aa 100644
--- a/third_party/bigframes_vendored/sklearn/ensemble/_forest.py
+++ b/third_party/bigframes_vendored/sklearn/ensemble/_forest.py
@@ -54,10 +54,6 @@ def fit(self, X, y):
                 Series or DataFrame of shape (n_samples,) or (n_samples, n_targets).
                 Target values. Will be cast to X's dtype if necessary.
 
-            transforms (Optional[List[str]], default None):
-                Do not use. Internal param to be deprecated.
-                Use bigframes.ml.pipeline instead.
-
 
         Returns:
             Fitted Estimator.
diff --git a/third_party/bigframes_vendored/sklearn/linear_model/_base.py b/third_party/bigframes_vendored/sklearn/linear_model/_base.py
index 8141da4e3b..81b4fca157 100644
--- a/third_party/bigframes_vendored/sklearn/linear_model/_base.py
+++ b/third_party/bigframes_vendored/sklearn/linear_model/_base.py
@@ -74,7 +74,6 @@ def fit(
         self,
         X,
         y,
-        transforms: Optional[List[str]] = None,
     ):
         """Fit linear model.
 
@@ -86,10 +85,6 @@ def fit(
                 Series or DataFrame of shape (n_samples,) or (n_samples, n_targets).
                 Target values. Will be cast to X's dtype if necessary.
 
-            transforms (Optional[List[str]], default None):
-                Do not use. Internal param to be deprecated.
-                Use bigframes.ml.pipeline instead.
-
         Returns:
             LinearRegression: Fitted Estimator.
         """
diff --git a/third_party/bigframes_vendored/sklearn/linear_model/_logistic.py b/third_party/bigframes_vendored/sklearn/linear_model/_logistic.py
index a06035eef6..133dc4498e 100644
--- a/third_party/bigframes_vendored/sklearn/linear_model/_logistic.py
+++ b/third_party/bigframes_vendored/sklearn/linear_model/_logistic.py
@@ -37,7 +37,6 @@ def fit(
         self,
         X,
         y,
-        transforms: Optional[List[str]] = None,
     ):
         """Fit the model according to the given training data.
 
@@ -50,10 +49,6 @@ def fit(
             y (bigframes.dataframe.DataFrame or bigframes.series.Series):
                 DataFrame of shape (n_samples,). Target vector relative to X.
 
-            transforms (Optional[List[str]], default None):
-                Do not use. Internal param to be deprecated.
-                Use bigframes.ml.pipeline instead.
-
 
         Returns:
             LogisticRegression: Fitted Estimator.
diff --git a/third_party/bigframes_vendored/sklearn/preprocessing/_data.py b/third_party/bigframes_vendored/sklearn/preprocessing/_data.py
index d013043467..89981e34c0 100644
--- a/third_party/bigframes_vendored/sklearn/preprocessing/_data.py
+++ b/third_party/bigframes_vendored/sklearn/preprocessing/_data.py
@@ -8,10 +8,10 @@
 # License: BSD 3 clause
 
 from bigframes import constants
-from third_party.bigframes_vendored.sklearn.base import BaseEstimator
+from third_party.bigframes_vendored.sklearn.base import BaseEstimator, TransformerMixin
 
 
-class StandardScaler(BaseEstimator):
+class StandardScaler(BaseEstimator, TransformerMixin):
     """Standardize features by removing the mean and scaling to unit variance.
 
     The standard score of a sample `x` is calculated as:z = (x - u) / s
@@ -28,30 +28,23 @@ class StandardScaler(BaseEstimator):
     machine learning estimators: they might behave badly if the
     individual features do not more or less look like standard normally
     distributed data (e.g. Gaussian with 0 mean and unit variance).
-    """
-
-    def fit(self, X):
-        """Compute the mean and std to be used for later scaling.
 
-        Examples:
+    Examples:
 
         .. code-block::
 
             from bigframes.ml.preprocessing import StandardScaler
+            import bigframes.pandas as bpd
 
-            enc = StandardScaler()
-            X = [['Male', 1], ['Female', 3], ['Female', 2]]
-            enc.fit(X)
-
-        Examples:
-
-        .. code-block::
-
-            from bigframes.ml import StandardScaler
+            scaler = StandardScaler()
+            data = bpd.DataFrame({"a": [0, 0, 1, 1], "b":[0, 0, 1, 1]})
+            scaler.fit(data)
+            print(scaler.transform(data))
+            print(scaler.transform(bpd.DataFrame({"a": [2], "b":[2]})))
+    """
 
-            enc = StandardScaler()
-            X = [['Male', 1], ['Female', 3], ['Female', 2]]
-            enc.fit(X)
+    def fit(self, X):
+        """Compute the mean and std to be used for later scaling.
 
         Args:
             X (bigframes.dataframe.DataFrame or bigframes.series.Series):
diff --git a/third_party/bigframes_vendored/sklearn/preprocessing/_encoder.py b/third_party/bigframes_vendored/sklearn/preprocessing/_encoder.py
index b1cf17e539..b0f0df8e15 100644
--- a/third_party/bigframes_vendored/sklearn/preprocessing/_encoder.py
+++ b/third_party/bigframes_vendored/sklearn/preprocessing/_encoder.py
@@ -37,12 +37,8 @@ class OneHotEncoder(BaseEstimator):
             when considering infrequent categories. If there are infrequent categories,
             max_categories includes the category representing the infrequent categories along with the frequent categories.
             Default None, set limit to 1,000,000.
-    """
-
-    def fit(self, X):
-        """Fit OneHotEncoder to X.
 
-        Examples:
+    Examples:
 
         Given a dataset with two features, we let the encoder find the unique
         values per feature and transform the data to a binary one-hot encoding.
@@ -50,10 +46,16 @@ def fit(self, X):
         .. code-block::
 
             from bigframes.ml.preprocessing import OneHotEncoder
+            import bigframes.pandas as bpd
 
             enc = OneHotEncoder()
-            X = [['Male', 1], ['Female', 3], ['Female', 2]]
+            X = bpd.DataFrame({"a": ["Male", "Female", "Female"], "b": ["1", "3", "2"]})
             enc.fit(X)
+            print(enc.transform(bpd.DataFrame({"a": ["Female", "Male"], "b": ["1", "4"]})))
+    """
+
+    def fit(self, X):
+        """Fit OneHotEncoder to X.
 
         Args:
             X (bigframes.dataframe.DataFrame or bigframes.series.Series):
diff --git a/third_party/bigframes_vendored/xgboost/sklearn.py b/third_party/bigframes_vendored/xgboost/sklearn.py
index 620c87fa3d..b7b43b85a3 100644
--- a/third_party/bigframes_vendored/xgboost/sklearn.py
+++ b/third_party/bigframes_vendored/xgboost/sklearn.py
@@ -37,10 +37,6 @@ def fit(self, X, y):
                 DataFrame of shape (n_samples,) or (n_samples, n_targets).
                 Target values. Will be cast to X's dtype if necessary.
 
-            transforms (Optional[List[str]], default None):
-                Do not use. Internal param to be deprecated.
-                Use bigframes.ml.pipeline instead.
-
         Returns:
             XGBModel: Fitted Estimator.
         """