migrate Experimental (#1)

* x * x * x
langchain-ai · Sep 26, 2024 · 9819006 · 9819006
1 parent 2214ef6
commit 9819006
Show file tree

Hide file tree

Showing 223 changed files with 25,286 additions and 60 deletions.
diff --git a/.github/scripts/check_diff.py b/.github/scripts/check_diff.py
@@ -2,7 +2,7 @@
 import sys
 from typing import Dict
 
-LIB_DIRS = ["libs/{lib}"]
+LIB_DIRS = ["libs/experimental"]
 
 if __name__ == "__main__":
     files = sys.argv[1:]

diff --git a/.github/workflows/_release.yml b/.github/workflows/_release.yml
@@ -12,7 +12,7 @@ on:
       working-directory:
         required: true
         type: string
-        default: 'libs/{lib}'
+        default: 'libs/experimental'
 
 env:
   PYTHON_VERSION: "3.11"
@@ -158,8 +158,6 @@ jobs:
         working-directory: ${{ inputs.working-directory }}
 
       - name: Run integration tests
-        env:
-          PARTNER_API_KEY: ${{ secrets.PARTNER_API_KEY }}
         run: make integration_tests
         working-directory: ${{ inputs.working-directory }}
 

diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,7 @@
+__pycache__
+.mypy_cache
+.pytest_cache
+.ruff_cache
+.mypy_cache_test
+.env
+.venv*
diff --git a/README.md b/README.md
@@ -1,59 +1,11 @@
-# 🦜️🔗 LangChain {partner}
+# 🦜️🔗 LangChain Experimental
 
-This repository contains 1 package with {partner} integrations with LangChain:
+This repository contains 1 package with experimental features of LangChain:
 
-- [langchain-{package_lower}](https://pypi.org/project/langchain-{package_lower}/)
+- [langchain-experimental](https://pypi.org/project/langchain-experimental/)
 
-## Initial Repo Checklist (Remove this section after completing)
-
-This setup assumes that the partner package is already split. For those instructions,
-see [these docs](https://python.langchain.com/docs/contributing/integrations#partner-packages).
-
-Code (auto ecli)
-
-- [ ] Fill out the readme above (for folks that follow pypi link)
-- [ ] Copy package into /libs folder
-- [ ] Update these fields in /libs/*/pyproject.toml
-
-    - `tool.poetry.repository`
-    - `tool.poetry.urls["Source Code"]`
-
-Workflow code (auto ecli)
-
-- [ ] Populate .github/workflows/_release.yml with `on.workflow_dispatch.inputs.working-directory.default`
-- [ ] Configure `LIB_DIRS` in .github/scripts/check_diff.py
-
-Workflow code (manual)
-
-- [ ] Add secrets as env vars in .github/workflows/_release.yml
-
-In github (manual)
-
-- [ ] Add integration testing secrets in Github (ask Erick for help)
-- [ ] Add partner collaborators in Github (ask Erick for help)
-- [ ] "Allow auto-merge" in General Settings 
-- [ ] Only "Allow squash merging" in General Settings
-- [ ] Set up ruleset matching CI build (ask Erick for help)
-    - name: ci build
-    - enforcement: active
-    - bypass: write
-    - target: default branch
-    - rules: restrict deletions, require status checks ("CI Success"), block force pushes
-- [ ] Set up ruleset
-    - name: require prs
-    - enforcement: active
-    - bypass: none
-    - target: default branch
-    - rules: restrict deletions, require a pull request before merging (0 approvals, no boxes), block force pushes
-
-Pypi (manual)
-
-- [ ] Add new repo to test-pypi and pypi trusted publishing (ask Erick for help)
-
-Slack
-
-- [ ] Set up release alerting in Slack (ask Erick for help)
-
-release:
-/github subscribe langchain-ai/langchain-{partner_lower} releases workflows:{name:"release"}
-/github unsubscribe langchain-ai/langchain-{partner_lower} issues pulls commits deployments
+> [!WARNING]
+> Portions of the code in this package may be dangerous if not properly deployed
+> in a sandboxed environment. Please be wary of deploying experimental code
+> to production unless you've taken appropriate precautions and
+> have already discussed it with your security team.
diff --git a/libs/experimental/LICENSE b/libs/experimental/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) LangChain, Inc.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/libs/experimental/Makefile b/libs/experimental/Makefile
@@ -0,0 +1,67 @@
+.PHONY: all format lint test tests test_watch integration_tests docker_tests help extended_tests
+
+# Default target executed when no arguments are given to make.
+all: help
+
+# Define a variable for the test file path.
+TEST_FILE ?= tests/unit_tests/
+
+test:
+	poetry run pytest $(TEST_FILE)
+
+tests:
+	poetry run pytest $(TEST_FILE)
+
+test_watch:
+	poetry run ptw --now . -- tests/unit_tests
+
+extended_tests:
+	poetry run pytest --only-extended tests/unit_tests
+
+integration_tests:
+	poetry run pytest tests/integration_tests
+
+check_imports: $(shell find langchain_experimental -name '*.py')
+	poetry run python ./scripts/check_imports.py $^
+
+
+######################
+# LINTING AND FORMATTING
+######################
+
+# Define a variable for Python and notebook files.
+PYTHON_FILES=.
+MYPY_CACHE=.mypy_cache
+lint format: PYTHON_FILES=.
+lint_diff format_diff: PYTHON_FILES=$(shell git diff --relative=libs/experimental --name-only --diff-filter=d master | grep -E '\.py$$|\.ipynb$$')
+lint_package: PYTHON_FILES=langchain_experimental
+lint_tests: PYTHON_FILES=tests
+lint_tests: MYPY_CACHE=.mypy_cache_test
+
+lint lint_diff lint_package lint_tests:
+	[ "$(PYTHON_FILES)" = "" ] || poetry run ruff check $(PYTHON_FILES)
+	[ "$(PYTHON_FILES)" = "" ] || poetry run ruff format $(PYTHON_FILES) --diff
+	[ "$(PYTHON_FILES)" = "" ] || mkdir -p $(MYPY_CACHE) && poetry run mypy $(PYTHON_FILES) --cache-dir $(MYPY_CACHE)
+
+format format_diff:
+	[ "$(PYTHON_FILES)" = "" ] || poetry run ruff format $(PYTHON_FILES)
+	[ "$(PYTHON_FILES)" = "" ] || poetry run ruff check --select I --fix $(PYTHON_FILES)
+
+spell_check:
+	poetry run codespell --toml pyproject.toml
+
+spell_fix:
+	poetry run codespell --toml pyproject.toml -w
+
+######################
+# HELP
+######################
+
+help:
+	@echo '----'
+	@echo 'format                       - run code formatters'
+	@echo 'lint                         - run linters'
+	@echo 'test                         - run unit tests'
+	@echo 'tests                        - run unit tests'
+	@echo 'test TEST_FILE=<test_file>   - run all tests in file'
+	@echo 'test_watch                   - run unit tests in watch mode'
diff --git a/libs/experimental/README.md b/libs/experimental/README.md
@@ -0,0 +1,16 @@
+# 🦜️🧪 LangChain Experimental
+
+This package holds experimental LangChain code, intended for research and experimental
+uses.
+
+> [!WARNING]
+> Portions of the code in this package may be dangerous if not properly deployed
+> in a sandboxed environment. Please be wary of deploying experimental code
+> to production unless you've taken appropriate precautions and
+> have already discussed it with your security team.
+
+Some of the code here may be marked with security notices. However,
+given the exploratory and experimental nature of the code in this package,
+the lack of a security notice on a piece of code does not mean that
+the code in question does not require additional security considerations
+in order to be safe to use.
diff --git a/libs/experimental/extended_testing_deps.txt b/libs/experimental/extended_testing_deps.txt
@@ -0,0 +1,8 @@
+presidio-anonymizer>=2.2.352,<3
+presidio-analyzer>=2.2.352,<3
+faker>=19.3.1,<20
+vowpal-wabbit-next==0.7.0
+sentence-transformers>=2,<3
+jinja2>=3,<4
+pandas>=2.0.1,<3
+tabulate>=0.9.0,<1
diff --git a/libs/experimental/langchain_experimental/__init__.py b/libs/experimental/langchain_experimental/__init__.py
@@ -0,0 +1,8 @@
+from importlib import metadata
+
+try:
+    __version__ = metadata.version(__package__)
+except metadata.PackageNotFoundError:
+    # Case where package metadata is not available.
+    __version__ = ""
+del metadata  # optional, avoids polluting the results of dir(__package__)
diff --git a/libs/experimental/langchain_experimental/agents/__init__.py b/libs/experimental/langchain_experimental/agents/__init__.py
@@ -0,0 +1,23 @@
+"""**Agent** is a class that uses an LLM to choose
+a sequence of actions to take.
+
+In Chains, a sequence of actions is hardcoded. In Agents,
+a language model is used as a reasoning engine to determine which actions
+to take and in which order.
+
+Agents select and use **Tools** and **Toolkits** for actions.
+"""
+
+from langchain_experimental.agents.agent_toolkits import (
+    create_csv_agent,
+    create_pandas_dataframe_agent,
+    create_spark_dataframe_agent,
+    create_xorbits_agent,
+)
+
+__all__ = [
+    "create_csv_agent",
+    "create_pandas_dataframe_agent",
+    "create_spark_dataframe_agent",
+    "create_xorbits_agent",
+]
diff --git a/libs/experimental/langchain_experimental/agents/agent_toolkits/__init__.py b/libs/experimental/langchain_experimental/agents/agent_toolkits/__init__.py
@@ -0,0 +1,19 @@
+from langchain_experimental.agents.agent_toolkits.csv.base import create_csv_agent
+from langchain_experimental.agents.agent_toolkits.pandas.base import (
+    create_pandas_dataframe_agent,
+)
+from langchain_experimental.agents.agent_toolkits.python.base import create_python_agent
+from langchain_experimental.agents.agent_toolkits.spark.base import (
+    create_spark_dataframe_agent,
+)
+from langchain_experimental.agents.agent_toolkits.xorbits.base import (
+    create_xorbits_agent,
+)
+
+__all__ = [
+    "create_xorbits_agent",
+    "create_pandas_dataframe_agent",
+    "create_spark_dataframe_agent",
+    "create_python_agent",
+    "create_csv_agent",
+]
diff --git a/libs/experimental/langchain_experimental/agents/agent_toolkits/csv/__init__.py b/libs/experimental/langchain_experimental/agents/agent_toolkits/csv/__init__.py
@@ -0,0 +1 @@
+"""CSV toolkit."""
diff --git a/libs/experimental/langchain_experimental/agents/agent_toolkits/csv/base.py b/libs/experimental/langchain_experimental/agents/agent_toolkits/csv/base.py
@@ -0,0 +1,66 @@
+from __future__ import annotations
+
+from io import IOBase
+from typing import TYPE_CHECKING, Any, List, Optional, Union
+
+from langchain_experimental.agents.agent_toolkits.pandas.base import (
+    create_pandas_dataframe_agent,
+)
+
+if TYPE_CHECKING:
+    from langchain.agents.agent import AgentExecutor
+    from langchain_core.language_models import LanguageModelLike
+
+
+def create_csv_agent(
+    llm: LanguageModelLike,
+    path: Union[str, IOBase, List[Union[str, IOBase]]],
+    pandas_kwargs: Optional[dict] = None,
+    **kwargs: Any,
+) -> AgentExecutor:
+    """Create pandas dataframe agent by loading csv to a dataframe.
+
+    Args:
+        llm: Language model to use for the agent.
+        path: A string path, file-like object or a list of string paths/file-like
+            objects that can be read in as pandas DataFrames with pd.read_csv().
+        pandas_kwargs: Named arguments to pass to pd.read_csv().
+        kwargs: Additional kwargs to pass to langchain_experimental.agents.agent_toolkits.pandas.base.create_pandas_dataframe_agent().
+
+    Returns:
+        An AgentExecutor with the specified agent_type agent and access to
+        a PythonAstREPLTool with the loaded DataFrame(s) and any user-provided extra_tools.
+
+    Example:
+        .. code-block:: python
+
+            from langchain_openai import ChatOpenAI
+            from langchain_experimental.agents import create_csv_agent
+
+            llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)
+            agent_executor = create_pandas_dataframe_agent(
+                llm,
+                "titanic.csv",
+                agent_type="openai-tools",
+                verbose=True
+            )
+    """  # noqa: E501
+    try:
+        import pandas as pd
+    except ImportError:
+        raise ImportError(
+            "pandas package not found, please install with `pip install pandas`."
+        )
+
+    _kwargs = pandas_kwargs or {}
+    if isinstance(path, (str, IOBase)):
+        df = pd.read_csv(path, **_kwargs)
+    elif isinstance(path, list):
+        df = []
+        for item in path:
+            if not isinstance(item, (str, IOBase)):
+                raise ValueError(f"Expected str or file-like object, got {type(path)}")
+            df.append(pd.read_csv(item, **_kwargs))
+    else:
+        raise ValueError(f"Expected str, list, or file-like object, got {type(path)}")
+    return create_pandas_dataframe_agent(llm, df, **kwargs)
diff --git a/libs/experimental/langchain_experimental/agents/agent_toolkits/pandas/__init__.py b/libs/experimental/langchain_experimental/agents/agent_toolkits/pandas/__init__.py
@@ -0,0 +1 @@
+"""Pandas toolkit."""