diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml new file mode 100644 index 000000000..3c15b7be6 --- /dev/null +++ b/.github/workflows/docs.yml @@ -0,0 +1,53 @@ +name: docs_pages_workflow + +on: [pull_request] + +permissions: + pull-requests: write + +jobs: + build_docs_job: + runs-on: ubuntu-latest + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + steps: + - name: Checkout + uses: actions/checkout@v3 + + - name: Set up Python + uses: actions/setup-python@v3 + with: + python-version: 3.8 + + - name: Get pip cache dir + id: pip-cache + run: | + python -m pip install --upgrade pip + echo "dir={$(pip cache dir)}" >> $GITHUB_OUTPUT + + - name: pip cache + uses: actions/cache@v3 + with: + path: ${{ steps.pip-cache.outputs.dir }} + key: ${{ runner.os }}-pip-${{ hashFiles('**/setup.py', '**/requirements.txt', '**/docs/requirements.txt') }} + restore-keys: | + ${{ runner.os }}-pip- + + - name: Install docs requirements + run: | + python -m pip install -r docs/requirements.txt + + - name: make the sphinx docs + run: | + make -C docs clean + make -C docs html + + - uses: readthedocs/actions/preview@v1 + with: + project-slug: "trlx" + project-language: "en" + # see: https://github.com/readthedocs/actions/tree/main/preview + # message-template (optional): Text message to be injected by the action in the Pull Request description. It supports the following placeholders to be replaced: + # {docs-pr-index-url}: URL to the root of the documentation for the Pull Request preview. + # platform (optional): Read the Docs Community (community) or Read the Docs for Business (business). (default: community) + # single-version (optional): Set this to 'true' if your project is single version, so we can link to the correct URL. (default: 'false') diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index d94d14f2a..cbf9fa775 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,7 +1,7 @@ # See https://pre-commit.com for more information # See https://pre-commit.com/hooks.html for more hooks repos: -- repo: https://github.com/pre-commit/pre-commit-hooks + - repo: https://github.com/pre-commit/pre-commit-hooks rev: v4.4.0 hooks: - id: check-case-conflict @@ -18,17 +18,24 @@ repos: args: [--fix=lf] - id: requirements-txt-fixer - id: trailing-whitespace -- repo: https://github.com/psf/black + - repo: https://github.com/psf/black rev: 23.1.0 hooks: - - id: black + - id: black files: ^(trlx|examples|tests|setup.py)/ -- repo: https://github.com/pycqa/isort + - repo: https://github.com/pycqa/isort rev: 5.12.0 hooks: - - id: isort + - id: isort name: isort (python) -- repo: https://github.com/pycqa/flake8 + - repo: https://github.com/pycqa/flake8 rev: 6.0.0 hooks: - - id: flake8 + - id: flake8 + - repo: https://github.com/codespell-project/codespell + rev: v2.2.2 + hooks: + - id: codespell + args: [--ignore-words, dictionary.txt] + additional_dependencies: + - tomli diff --git a/.readthedocs.yml b/.readthedocs.yml index c8f03ab0a..d5f60f2e8 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -1,9 +1,25 @@ +# .readthedocs.yml +# Read the Docs configuration file +# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details + +# Required version: 2 +build: + os: "ubuntu-20.04" + tools: + python: "3.8" + +# Build documentation in the docs/ directory with Sphinx sphinx: - configuration: docs/source/conf.py + configuration: docs/conf.py + fail_on_warning: false + +# Optionally build your docs in additional formats such as PDF and ePub +formats: + - htmlzip +# Optionally set the version of Python and requirements required to build your docs python: - version: 3.9 install: - - requirements: docs/requirements.txt + - requirements: docs/requirements.txt diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 000000000..6f9ee79d6 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,11 @@ +# Change log + +Best viewed on [trlx.readthedocs.io](https://trlx.readthedocs.io/en/latest/changelog.html). + + + +## trlx 0.4.0 (2022-12-05) + +- python 3.8 diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 0c5169ed5..8f97f071d 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -2,7 +2,11 @@ Looking to improve `trlX`? Thanks for considering! -There are many ways to contribute, from writing tutorials in [Colab notebooks](https://colab.research.google.com) to improving the project's [documentation](https://trlx.readthedocs.io), submitting bug reports and feature requests, or even implementing new features themselves. See the outstanding [issues](https://github.com/CarperAI/trlx/issues) for ideas on where to begin. +There are many ways to contribute, from writing tutorials in [Colab notebooks](https://colab.research.google.com) to improving the project's [documentation](https://trlx.readthedocs.io), to submitting bug reports and feature requests, or even implementing new features themselves. See the outstanding [issues](https://github.com/CarperAI/trlx/issues) for ideas on where to begin. + +- [Documentation Issues](https://github.com/CarperAI/trlx/issues?q=is%3Aissue+is%3Aopen+label%3Adocumentation) +- [Bug Fixes](https://github.com/CarperAI/trlx/issues?q=is%3Aissue+is%3Aopen+label%3Abug) +- [Feature Requests](https://github.com/CarperAI/trlx/issues?q=is%3Aissue+is%3Aopen+label%3A%22feature+request%22) Here are some guidelines to help you get started 🚀. @@ -16,40 +20,83 @@ To submit a bug report or a feature request, please open an [issue](https://gith Follow these steps to start contributing code: +1. Setup your environment: + +```bash +conda create -n trlx python=3.8 torch torch-cuda=11.7 -c pytorch -c nvidia +git clone https://github.com/CarperAI/trlx +cd trlx +pip install -e ".[dev]" +pre-commit install +``` + 1. Create your own [fork](https://docs.github.com/en/get-started/quickstart/fork-a-repo#forking-a-repository) of the repository and clone it to your local machine. + ```bash git clone https://github.com//trlx.git cd trlx git remote add upstream https://github.com/CarperAI/trlx.git ``` -2. Create a new branch for your changes and give it a concise name that reflects your contribution. + +1. Create a new branch for your changes and give it a concise name that reflects your contribution. + ```bash git checkout -b ``` -2. Install the development dependencies in a Python environment. + +1. Install the development dependencies in a Python environment. + ```bash pip install -e ".[dev]" pre-commit install ``` -4. Implement your changes. Make small, independent, and well documented commits along the way (check out [these](https://cbea.ms/git-commit/) tips). -5. Add unit tests whenever appropriate and ensure that the tests pass. To run the entire test suite, use the following command from within the project root directory. + +install pre-commit + +```bash +pip install pre-commit +pre-commit install +``` + +bonus: force run pre-commit on all the files + +```bash +pre-commit run --all-files +``` + +1. Implement your changes. Make small, independent, and well documented commits along the way (check out [these](https://cbea.ms/git-commit/) tips). + +1. Add unit tests whenever appropriate and ensure that the tests pass. To run the entire test suite, use the following command from within the project root directory. + ```bash pytest ``` + For changes with minimal project scope (e.g. a simple bug fix), you might want to run the unit tests for just a specific test file instead: + ```bash pytest -vv -k "" ``` -5. Commit your final changes. Our `pre-commit` hooks will automatically run before each commit and will prevent you from committing code that does not pass our style and linter checks. They'll also automatically format your code! To run these manually, use the following command: + +1. Commit your final changes. Our `pre-commit` hooks will automatically run before each commit and will prevent you from committing code that does not pass our style and linter checks. They'll also automatically format your code! To run these manually, use the following command: + ```bash pre-commit run --all-files ``` -6. Push the changes to your fork. +1. Push the changes to your fork. Finally ... 🥁 ... Create a [pull request](https://docs.github.com/en/github/collaborating-with-issues-and-pull-requests/creating-a-pull-request) to the `trlX` repository! Make sure to include a description of your changes and link to any relevant issues. -> __Tip__: If you're looking to introduce an experimental feature, we suggest testing the behavior of your proposed feature on some of the existing [examples](https://github.com/CarperAI/trlx/tree/master/examples), such as [random walks](https://github.com/CarperAI/trlx/blob/master/examples/randomwalks). This will help you get a better sense of how the feature would work in practice and will also help you identify any potential flaws in the implementation. +> **Tip**: If you're looking to introduce an experimental feature, we suggest testing the behavior of your proposed feature on some of the existing [examples](https://github.com/CarperAI/trlx/tree/master/examples), such as [random walks](https://github.com/CarperAI/trlx/blob/master/examples/randomwalks). This will help you get a better sense of how the feature would work in practice and will also help you identify any potential flaws in the implementation. + +## Tips & Tricks + +Set transformers verbosity level + +```bash +TRANSFORMERS_VERBOSITY=error +``` ## Asking questions @@ -63,4 +110,4 @@ This project adheres to the [Contributor Covenant Code of Conduct](https://githu By contributing, you agree that your contributions will be licensed under its MIT License. -# Thank you for your contribution 🐠! +## Thank you for your contribution! 🐠 diff --git a/LICENSE b/LICENSE index bb73c1d7e..787925dff 100644 --- a/LICENSE +++ b/LICENSE @@ -1,4 +1,4 @@ -MIT License +# MIT License Copyright (c) 2022 CarperAI diff --git a/README.md b/README.md index da9ba405d..e598cc6a3 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,7 @@ +![TRLX](./docs/_static/apple-touch-icon-114x114.png) + +[docs-image]: https://readthedocs.org/projects/trlX/badge/?version=latest +[docs-url]: https://trlX.readthedocs.io/en/latest/?badge=latest [![DOI](https://zenodo.org/badge/545104023.svg)](https://zenodo.org/badge/latestdoi/545104023) @@ -39,25 +43,25 @@ For more usage see [examples](./examples). You can also try the colab notebooks You can train a model using a reward function or a reward-labeled dataset. -#### Using a reward function +### Using a reward function ```python trainer = trlx.train('gpt2', reward_fn=lambda samples, **kwargs: [sample.count('cats') for sample in samples]) ``` -#### Using a reward-labeled dataset +### Using a reward-labeled dataset ```python trainer = trlx.train('EleutherAI/gpt-j-6B', dataset=[('dolphins', 'geese'), (1.0, 100.0)]) ``` -#### Trainers provide a wrapper over their underlying model +### Trainers provide a wrapper over their underlying model ```python trainer.generate(**tokenizer('Q: Who rules the world? A:', return_tensors='pt'), do_sample=True) ``` -#### Save the resulting model to a Hugging Face pretrained language model. (Ready to upload to the Hub!) +### Save the resulting model to a Hugging Face pretrained language model. (Ready to upload to the Hub!) ```python trainer.save_pretrained('/path/to/output/folder/') diff --git a/dictionary.txt b/dictionary.txt new file mode 100644 index 000000000..212e125ae --- /dev/null +++ b/dictionary.txt @@ -0,0 +1,2 @@ +rouge +sart diff --git a/docs/Makefile b/docs/Makefile index d0c3cbf10..ed8809902 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -5,7 +5,7 @@ # from the environment for the first two. SPHINXOPTS ?= SPHINXBUILD ?= sphinx-build -SOURCEDIR = source +SOURCEDIR = . BUILDDIR = build # Put it first so that "make" without argument is like "make help". diff --git a/docs/README.md b/docs/README.md new file mode 100644 index 000000000..62a4ae956 --- /dev/null +++ b/docs/README.md @@ -0,0 +1,5 @@ +# How To build the documentation + +```bash +make -c docs html +``` diff --git a/docs/_static/apple-touch-icon-114x114.png b/docs/_static/apple-touch-icon-114x114.png new file mode 100644 index 000000000..f41c09a09 Binary files /dev/null and b/docs/_static/apple-touch-icon-114x114.png differ diff --git a/docs/_static/apple-touch-icon-120x120.png b/docs/_static/apple-touch-icon-120x120.png new file mode 100644 index 000000000..6ff0b418e Binary files /dev/null and b/docs/_static/apple-touch-icon-120x120.png differ diff --git a/docs/_static/apple-touch-icon-144x144.png b/docs/_static/apple-touch-icon-144x144.png new file mode 100644 index 000000000..4657719af Binary files /dev/null and b/docs/_static/apple-touch-icon-144x144.png differ diff --git a/docs/_static/apple-touch-icon-152x152.png b/docs/_static/apple-touch-icon-152x152.png new file mode 100644 index 000000000..7bf86af1c Binary files /dev/null and b/docs/_static/apple-touch-icon-152x152.png differ diff --git a/docs/_static/apple-touch-icon-167x167.png b/docs/_static/apple-touch-icon-167x167.png new file mode 100644 index 000000000..7e2945647 Binary files /dev/null and b/docs/_static/apple-touch-icon-167x167.png differ diff --git a/docs/_static/apple-touch-icon-180x180.png b/docs/_static/apple-touch-icon-180x180.png new file mode 100644 index 000000000..31c726d0f Binary files /dev/null and b/docs/_static/apple-touch-icon-180x180.png differ diff --git a/docs/_static/apple-touch-icon-57x57.png b/docs/_static/apple-touch-icon-57x57.png new file mode 100644 index 000000000..25b5a2d8a Binary files /dev/null and b/docs/_static/apple-touch-icon-57x57.png differ diff --git a/docs/_static/apple-touch-icon-60x60.png b/docs/_static/apple-touch-icon-60x60.png new file mode 100644 index 000000000..eb7bfdc4f Binary files /dev/null and b/docs/_static/apple-touch-icon-60x60.png differ diff --git a/docs/_static/apple-touch-icon-72x72.png b/docs/_static/apple-touch-icon-72x72.png new file mode 100644 index 000000000..0562e6de7 Binary files /dev/null and b/docs/_static/apple-touch-icon-72x72.png differ diff --git a/docs/_static/apple-touch-icon-76x76.png b/docs/_static/apple-touch-icon-76x76.png new file mode 100644 index 000000000..084ad067c Binary files /dev/null and b/docs/_static/apple-touch-icon-76x76.png differ diff --git a/docs/_static/favicon-128x128.png b/docs/_static/favicon-128x128.png new file mode 100644 index 000000000..4e43cc31f Binary files /dev/null and b/docs/_static/favicon-128x128.png differ diff --git a/docs/_static/favicon-16x16.png b/docs/_static/favicon-16x16.png new file mode 100644 index 000000000..e06e67ffc Binary files /dev/null and b/docs/_static/favicon-16x16.png differ diff --git a/docs/_static/favicon-196x196.png b/docs/_static/favicon-196x196.png new file mode 100644 index 000000000..fcea049fc Binary files /dev/null and b/docs/_static/favicon-196x196.png differ diff --git a/docs/_static/favicon-32x32.png b/docs/_static/favicon-32x32.png new file mode 100644 index 000000000..5008598c0 Binary files /dev/null and b/docs/_static/favicon-32x32.png differ diff --git a/docs/_static/favicon-96x96.png b/docs/_static/favicon-96x96.png new file mode 100644 index 000000000..9d11839a5 Binary files /dev/null and b/docs/_static/favicon-96x96.png differ diff --git a/docs/_static/style.css b/docs/_static/style.css new file mode 100644 index 000000000..2fac0848d --- /dev/null +++ b/docs/_static/style.css @@ -0,0 +1,26 @@ +@import url("theme.css"); + +:root { + --block-bg-opacity: .5; +} + +.wy-side-nav-search { + background-color: #fff; +} + +.getting-started { + background-color: rgba(78, 150, 253, var(--block-bg-opacity)); +} + +.user-guides { + background-color: rgba(0, 169, 154, var(--block-bg-opacity)); +} + +.developer-docs { + background-color: rgba(171, 0, 182, var(--block-bg-opacity)); +} + +.key-ideas +{ + border: 0px +} diff --git a/docs/_static/trlx-logo-512x512.png b/docs/_static/trlx-logo-512x512.png new file mode 100644 index 000000000..d8bdb400c Binary files /dev/null and b/docs/_static/trlx-logo-512x512.png differ diff --git a/docs/_templates/layout.html b/docs/_templates/layout.html new file mode 100644 index 000000000..4c57ba830 --- /dev/null +++ b/docs/_templates/layout.html @@ -0,0 +1,2 @@ +{% extends "!layout.html" %} +{% set css_files = css_files + ["_static/style.css"] %} diff --git a/docs/build.sh b/docs/build.sh new file mode 100755 index 000000000..147ebab99 --- /dev/null +++ b/docs/build.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +`which sphinx-build` -T -E -b html -d _build/doctrees-readthedocs -D language=en . _build/html diff --git a/docs/changelog.md b/docs/changelog.md new file mode 100644 index 000000000..66efc0fec --- /dev/null +++ b/docs/changelog.md @@ -0,0 +1,2 @@ +```{include} ../CHANGELOG.md +``` diff --git a/docs/conf.py b/docs/conf.py new file mode 100644 index 000000000..32a8c2df3 --- /dev/null +++ b/docs/conf.py @@ -0,0 +1,187 @@ +# Configuration file for the Sphinx documentation builder. +# +# This file only contains a selection of the most common options. For a full +# list see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +# -- Path setup -------------------------------------------------------------- + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +# +import os +import sys + +sys.path.insert(0, os.path.abspath("..")) + + +# -- Project information ----------------------------------------------------- + +project = "trlX" +copyright = "2023, CarperAI" +author = "CarperAI" + +# -- General configuration --------------------------------------------------- + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. + +extensions = [ + "sphinx.ext.autodoc", + "sphinx.ext.autosummary", + "sphinx.ext.intersphinx", + "sphinx.ext.mathjax", + "sphinx.ext.napoleon", + "sphinx.ext.viewcode", + "matplotlib.sphinxext.plot_directive", + "sphinx_autodoc_typehints", + "myst_nb", + # "myst_parser", + "sphinx_remove_toctrees", + "sphinx_copybutton", + "sphinx_design", +] + +intersphinx_mapping = { + "python": ("https://docs.python.org/3/", None), + "numpy": ("https://docs.scipy.org/doc/numpy/", None), + "scipy": ("https://docs.scipy.org/doc/scipy/reference/", None), + "pytorch": ("https://pytorch.readthedocs.io/", None), +} + +autodoc_preserve_defaults = True + +# Add any paths that contain templates here, relative to this directory. +templates_path = ["_templates"] + +# The suffix(es) of source filenames. +# You can specify multiple suffix as a list of string: +# +source_suffix = [".rst", ".md"] + +# The master toctree document. +main_doc = "index" + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This pattern also affects html_static_path and html_extra_path. +exclude_patterns = [ + # Sometimes sphinx reads its own outputs as inputs! + "build/html", +] + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = None + +autosummary_generate = True +napolean_use_rtype = False + +# -- Options for nbsphinx ----------------------------------------------------- + +# Execute notebooks before conversion: 'always', 'never', 'auto' (default) +# We execute all notebooks, exclude the slow ones using 'exclude_patterns' +nbsphinx_execute = "always" + +# Use this kernel instead of the one stored in the notebook metadata: +# nbsphinx_kernel_name = 'python3' + +# List of arguments to be passed to the kernel that executes the notebooks: +# nbsphinx_execute_arguments = [] + +# If True, the build process is continued even if an exception occurs: +# nbsphinx_allow_errors = True + + +# Controls when a cell will time out (defaults to 30; use -1 for no timeout): +nbsphinx_timeout = 180 + +# Default Pygments lexer for syntax highlighting in code cells: +# nbsphinx_codecell_lexer = 'ipython3' + +# Width of input/output prompts used in CSS: +# nbsphinx_prompt_width = '8ex' + +# If window is narrower than this, input/output prompts are on separate lines: +# nbsphinx_responsive_width = '700px' + +# This is processed by Jinja2 and inserted before each notebook +nbsphinx_prolog = r""" # noqa: E501 +{% set docname = 'docs/' + env.doc2path(env.docname, base=None) %} +.. only:: html + .. role:: raw-html(raw) + :format: html + .. nbinfo:: + Interactive online version: + :raw-html:`Open In Colab` + __ https://github.com/CarperAI/trlx/blob/ + {{ env.config.release }}/{{ docname }} +""" + +# This is processed by Jinja2 and inserted after each notebook +# nbsphinx_epilog = r""" +# """ + +# Input prompt for code cells. "%s" is replaced by the execution count. +# nbsphinx_input_prompt = 'In [%s]:' + +# Output prompt for code cells. "%s" is replaced by the execution count. +# nbsphinx_output_prompt = 'Out[%s]:' + +# Specify conversion functions for custom notebook formats: +# import jupytext +# nbsphinx_custom_formats = { +# '.Rmd': lambda s: jupytext.reads(s, '.Rmd'), +# } + +# Link or path to require.js, set to empty string to disable +# nbsphinx_requirejs_path = '' + +# Options for loading require.js +# nbsphinx_requirejs_options = {'async': 'async'} + +# mathjax_config = { +# 'TeX': {'equationNumbers': {'autoNumber': 'AMS', 'useLabelIds': True}}, +# } + +# Additional files needed for generating LaTeX/PDF output: +# latex_additional_files = ['references.bib'] + + +# -- Options for HTML output ------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +# +html_theme = "sphinx_book_theme" + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ["_static"] + +# Output file base name for HTML help builder. +htmlhelp_basename = "TRLXdoc" + +# -- Extension configuration ------------------------------------------------- + +# Tell sphinx-autodoc-typehints to generate stub parameter annotations including +# types, even if the parameters aren't explicitly documented. +always_document_param_types = True + +# Theme options are theme-specific and customize the look and feel of a theme +# further. For a list of options available for each theme, see the +# documentation. +html_theme_options = { + # "logo_only": True, + "show_toc_level": 2, + "repository_url": "https://github.com/CarperAI/trlx", + "use_repository_button": True, # add a "link to repository" button +} + +# The name of an image file (relative to this directory) to place at the top +# of the sidebar. +html_logo = "_static/apple-touch-icon-144x144.png" + +html_favicon = "_static/favicon-16x16.png" diff --git a/docs/source/configs.rst b/docs/configs.rst similarity index 86% rename from docs/source/configs.rst rename to docs/configs.rst index da5e1f2e6..0e2abd369 100644 --- a/docs/source/configs.rst +++ b/docs/configs.rst @@ -25,10 +25,10 @@ the specific method being used (i.e. ILQL or PPO) **PPO** -.. autoclass:: trlx.data.method_configs.PPOConfig +.. autoclass:: trlx.trainer.nn.ppo_models.MethodConfig :members: **ILQL** -.. autoclass:: trlx.data.method_configs.ILQLConfig +.. autoclass:: trlx.trainer.nn.ilql_models.ILQLConfig :members: diff --git a/docs/source/data.rst b/docs/data.rst similarity index 100% rename from docs/source/data.rst rename to docs/data.rst diff --git a/docs/examples.md b/docs/examples.md new file mode 100644 index 000000000..3f518b7d9 --- /dev/null +++ b/docs/examples.md @@ -0,0 +1,26 @@ +# Examples + +In the ``examples`` folder you can find several example training tasks. + +Check the configs folder for the associated configs files. + +## [randomwalks](/examples.randomwalks) + +does offline reinforcement on a set of graph random walks to stitch shortest paths +to some destination. + +## simulacra + +optimizes prompts by using [prompts-ratings dataset](https://github.com/JD-P/simulacra-aesthetic-captions). + +## architext + +tries to optimize designs represented textually by minimizing number of rooms (pre-trained model is under a license on hf). + +## ilql_sentiments and ppo_sentiments + +train to generate movie reviews with a positive sentiment, in offline setting – by fitting to IMDB +dataset sentiment scores, and in online setting – by sampling finetuned on IMDB +model and rating samples with learned sentiment reward model, You can tweak +these scripts to your liking and tune hyperparameters to your problem if you +wish to use trlx for some custom task. diff --git a/docs/faq.md b/docs/faq.md new file mode 100644 index 000000000..e663db2d2 --- /dev/null +++ b/docs/faq.md @@ -0,0 +1,8 @@ +# Frequently Asked Questions + +```{admonition} How to add a new page to the documentation? +RST primer for Sphinx: https://thomas-cokelaer.info/tutorials/sphinx/rest_syntax.html +``` + +We are collecting here answers to frequently asked questions. +Contributions welcome! diff --git a/docs/glossary.md b/docs/glossary.md new file mode 100644 index 000000000..aef6f47bb --- /dev/null +++ b/docs/glossary.md @@ -0,0 +1,81 @@ +# Glossary of Terms + +```{glossary} +[Agent]() + An agent in reinforcement learning is the entity that interacts with the {term}`Environment` to learn how to maximize its {term}`Reward`. + +[Action]() + An action in reinforcement learning is the signal that the {term}`Agent` provides to the {term}`Environment` to indicate what it wants to do. + + In other words, an action is a scalar value that the agent provides to the environment to indicate what it wants to do. The agent's goal is to maximize the total reward it receives over a sequence of {term}`Steps`. + +[CPU](https://en.wikipedia.org/wiki/Central_processing_unit) + Short for *Central Processing Unit*, CPUs are the standard computational architecture + available in most computers. trlX can run computations on CPUs, but often can achieve + much better performance on {term}`GPU` . + + +[Device](https://en.wikipedia.org/wiki/Device_computing) + The generic name used to refer to the {term}`CPU`, {term}`GPU`, used + by TRLX to perform computations. + +[Environment]() + An environment in reinforcement learning is the system that the agent interacts with. It is the source of {term}`State`, {term}`Action`, and {term}`Reward`. + + In other words, an environment is a system that defines the agent's observation space, action space, and reward function. It is the source of the agent's experience, and the goal of the agent is to maximize the total reward it receives over a sequence of {term}`Steps`. + +[GPU](https://en.wikipedia.org/wiki/Graphics_processing_unit) + Short for *Graphical Processing Unit*, GPUs were originally specialized for operations + related to rendering of images on screen, but now are much more general-purpose. TRLX is + able to target GPUs for fast operations on arrays (see also {term}`CPU`). + +[Policy]() + A policy in reinforcement learning is a function that maps {term}`State` to {term}`Action`. + + In other words, a policy is a function that maps the agent's current state to the action it should take. The agent's goal is to maximize the total reward it receives over a sequence of {term}`Steps`. + +[PPO](https://arxiv.org/abs/1707.06347) + Short for *Proximal Policy Optimization*, PPO is a {term}`Policy Gradient` algorithm + that is able to learn policies in high-dimensional, continuous action spaces. + +[Policy Gradient](https://spinningup.openai.com/en/latest/spinningup/rl_intro3.html#policy-gradients) + Policy gradient methods are a class of reinforcement learning algorithms that are able to + learn policies in high-dimensional, continuous action spaces. + +[Reinforcement Learning](https://en.wikipedia.org/wiki/Reinforcement_learning) + Reinforcement learning (RL) is a machine learning paradigm that trains an agent to maximize its + {term}`Reward` by interacting with an {term}`Environment`. + +[Reward]() + A reward in reinforcement learning is the signal that the {term}`Environment` provides to the {term}`Agent` to indicate how well it is performing. + + In other words, a reward is a scalar value that the environment provides to the agent to indicate how well it is performing. The agent's goal is to maximize the total reward it receives over a sequence of {term}`Steps`. + +[Rollout]() + A rollout in reinforcement learning is the process of executing a {term}`Policy`, starting from a specific state in the {term}`Environment`, and following it to the end to obtain a complete trajectory of {term}`State`, {term}`Action`, and {term}`Reward`. + + In other words, a Rollout is a simulation of a policy's behavior in the environment over a fixed number of {term}`Steps` or until a terminal state is reached. It provides a means of evaluating the {term}`Policy`'s performance, as the total reward collected over the trajectory can be used as a measure of its effectiveness. + +[State]() + A state in reinforcement learning is the observation that the {term}`Environment` provides to the {term}`Agent`. + +[Steps]() + A step in reinforcement learning is the process of taking a single {term}`Action` in the {term}`Environment`, and observing the resulting {term}`State` and {term}`Reward`. + + In other words, a step is a single iteration of the environment's dynamics, where the agent takes an action and receives a reward and a new state. The agent's goal is to maximize the total reward it receives over a sequence of steps. + +[Trajectory] + + In a {term}`PPO` (Proximal Policy Optimization) setup, a fixed-length trajectory + segment refers to a fixed number of time steps in an episode of an + environment.At each time step, the agent takes an action based on the current + state and receives a reward from the environment. By using fixed-length + trajectory segments, the agent's behavior is divided into chunks of a fixed + length, and each chunk is used for a single PPO update. This allows for more + efficient use of the {term}`Agent`'s experience by breaking it into smaller pieces, and + it also helps to stabilize the learning process by making the training updates + less sensitive to the length of the episode. Fixed-length trajectory segments + are often used in Reinforcement Learning (RL) algorithms, including {term}`PPO`, to + update the policy network. + +``` diff --git a/docs/source/index.rst b/docs/index.rst similarity index 76% rename from docs/source/index.rst rename to docs/index.rst index 1b2947593..547191c2f 100644 --- a/docs/source/index.rst +++ b/docs/index.rst @@ -8,16 +8,36 @@ Welcome to trlX's documentation! trlX is a library made for training large language models using reinforcement learning. It currently supports training using PPO or ILQL for models up to 20B using Accelerate. +Installation +------------ +.. code-block:: bash + + pip install "trlx" + + .. toctree:: :maxdepth: 2 :caption: Contents: + index data models + orchestrator configs pipeline + trainer examples +.. toctree:: + :hidden: + :maxdepth: 1 + :caption: Resources + + changelog + faq + glossary + + Indices and tables ================== diff --git a/docs/models.md b/docs/models.md new file mode 100644 index 000000000..91361720e --- /dev/null +++ b/docs/models.md @@ -0,0 +1 @@ +# Models diff --git a/docs/orchestrator.rst b/docs/orchestrator.rst new file mode 100644 index 000000000..0a8a6a059 --- /dev/null +++ b/docs/orchestrator.rst @@ -0,0 +1,23 @@ +.. _orchestrator: + +Orchestrators +******************* + +Orchestrators manage reading data from a pipeline and creating RL data elements (i.e. ``trlx.data.RLElement``) +to push to a models rollout storage. Use the ``trlx.orchestrator.register_orchestrator`` decorator when creating +new orchestrators. + +**General** + +.. autoclass:: trlx.orchestrator.Orchestrator + :members: + +**PPO** + +.. autoclass:: trlx.orchestrator.ppo_orchestrator.PPOOrchestrator + :members: + +**ILQL** + +.. autoclass:: trlx.orchestrator.offline_orchestrator.OfflineOrchestrator + :members: diff --git a/docs/pipeline.md b/docs/pipeline.md new file mode 100644 index 000000000..066a28de5 --- /dev/null +++ b/docs/pipeline.md @@ -0,0 +1,30 @@ +# Pipelines and Rollout Store + +## Pipelines + +Pipelines in trlX provide a way to read from a dataset. They are used to fetch data from the dataset and feed it to the models for training or inference. The pipelines allow for efficient processing of the data and ensure that the models have access to the data they need for their tasks. + +## Rollout Stores + +Rollout stores in trlX are used to store experiences created for the models by the orchestrator. The experiences in the rollout stores serve as the training data for the models. The models use the experiences stored in their rollout stores to learn and improve their behavior. The rollout stores provide a convenient and efficient way for the models to access the experiences they need for training. + +## General + +.. autoclass:: trlx.pipeline.BasePipeline + :members: + +.. autoclass:: trlx.pipeline.BaseRolloutStore + :members: + +## PPO + +.. autoclass:: trlx.pipeline.ppo_pipeline.PPORolloutStorage + :members: + +## ILQL + +.. autoclass:: trlx.pipeline.offline_pipeline.PromptPipeline + :members: + +.. autoclass:: trlx.pipeline.offline_pipeline.ILQLRolloutStorage + :members: diff --git a/docs/requirements.txt b/docs/requirements.txt index 7a33f300e..3052a2f0c 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,11 +1,20 @@ -accelerate==0.12.0 -datasets==2.4.0 -deepspeed==0.7.3 -einops==0.4.1 -numpy==1.23.2 -sphinx==4.0.0 -sphinx_rtd_theme +accelerate +commonmark +datasets +deepspeed +docutils +jupyter-sphinx +matplotlib +myst-nb +nbsphinx +Pygments +ray +readthedocs-sphinx-ext +rich +sphinx-autodoc-typehints +sphinx-book-theme +sphinx-copybutton +sphinx-design +sphinx-remove-toctrees torchtyping -tqdm==4.64.0 -transformers==4.21.2 -wandb==0.13.2 +transformers diff --git a/docs/source/conf.py b/docs/source/conf.py deleted file mode 100644 index 0a9a11c86..000000000 --- a/docs/source/conf.py +++ /dev/null @@ -1,54 +0,0 @@ -# Configuration file for the Sphinx documentation builder. -# -# This file only contains a selection of the most common options. For a full -# list see the documentation: -# https://www.sphinx-doc.org/en/master/usage/configuration.html - -# -- Path setup -------------------------------------------------------------- - -# If extensions (or modules to document with autodoc) are in another directory, -# add these directories to sys.path here. If the directory is relative to the -# documentation root, use os.path.abspath to make it absolute, like shown here. -# -import os -import sys - -import sphinx_rtd_theme - -sys.path.insert(0, os.path.abspath('../..')) - - -# -- Project information ----------------------------------------------------- - -project = 'trlX' -copyright = '2022, CarperAI' -author = 'CarperAI' - -# -- General configuration --------------------------------------------------- - -# Add any Sphinx extension module names here, as strings. They can be -# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom -# ones. - -extensions = ['sphinx_rtd_theme', 'sphinx.ext.todo', 'sphinx.ext.viewcode', 'sphinx.ext.autodoc', 'sphinx.ext.autosummary', 'sphinx.ext.autosectionlabel'] - -# Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] - -# List of patterns, relative to source directory, that match files and -# directories to ignore when looking for source files. -# This pattern also affects html_static_path and html_extra_path. -exclude_patterns = [] - - -# -- Options for HTML output ------------------------------------------------- - -# The theme to use for HTML and HTML Help pages. See the documentation for -# a list of builtin themes. -# -html_theme = 'sphinx_rtd_theme' - -# Add any paths that contain custom static files (such as style sheets) here, -# relative to this directory. They are copied after the builtin static files, -# so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] diff --git a/docs/source/examples.rst b/docs/source/examples.rst deleted file mode 100644 index 6f5db49d1..000000000 --- a/docs/source/examples.rst +++ /dev/null @@ -1,18 +0,0 @@ -.. _examples: - -Examples -************************ - -In the ``examples`` folder you can find several example training tasks. Check -the configs folder for the associated configs files. ``examples.randomwalks`` -does offline reinforcement on a set of graph random walks to stitch shortest -paths to some destination. ``examples.simulacra`` optimizes prompts by using -prompts-ratings dataset (https://github.com/JD-P/simulacra-aesthetic-captions). -``examples.architext`` tries to optimize designs represented textually by -minimazing number of rooms (pretrained model is under a license on hf). -``examples.ilql_sentiments`` and ``examples.ppo_sentiments`` train to generate -movie reviews with a positive sentiment, in offline setting – by fitting to IMDB -dataset sentiment scores, and in online setting – by sampling finetuned on IMDB -model and rating samples with learned sentiment reward model, You can tweak -these scripts to your liking and tune hyperparameters to your problem if you -wish to use trlx for some custom task. diff --git a/docs/source/pipeline.rst b/docs/source/pipeline.rst deleted file mode 100644 index 68279d889..000000000 --- a/docs/source/pipeline.rst +++ /dev/null @@ -1,28 +0,0 @@ -.. _pipeline: - -Pipelines -************************ - -Pipelines are how you read from a dataset with trlX. Rollout stores are how models store experiences created -for them. It is these experiences in their rollout store that they are trained on. - -**General** - -.. autoclass:: trlx.pipeline.BasePipeline - :members: - -.. autoclass:: trlx.pipeline.BaseRolloutStore - :members: - -**PPO** - -.. autoclass:: trlx.pipeline.ppo_pipeline.PPORolloutStorage - :members: - -**ILQL** - -.. autoclass:: trlx.pipeline.offline_pipeline.PromptPipeline - :members: - -.. autoclass:: trlx.pipeline.offline_pipeline.ILQLRolloutStorage - :members: diff --git a/docs/source/trainer.rst b/docs/trainer.rst similarity index 100% rename from docs/source/trainer.rst rename to docs/trainer.rst diff --git a/examples/experiments/grounded_program_synthesis/lang.py b/examples/experiments/grounded_program_synthesis/lang.py index d2436c3f6..9c3f076c0 100644 --- a/examples/experiments/grounded_program_synthesis/lang.py +++ b/examples/experiments/grounded_program_synthesis/lang.py @@ -109,7 +109,7 @@ def __call__(self, statement_string: str): # This is used to store the input, output and the function template. # Input : List given as an input to the function. # function_template : The atomic function in a given DSL Grammar -# Output : Transformed outut by applying function on the input. +# Output : Transformed output by applying function on the input. generation_template = {"function_template": "NONE", "output": "NONE", "input": []} diff --git a/examples/experiments/grounded_program_synthesis/train_trlx.py b/examples/experiments/grounded_program_synthesis/train_trlx.py index 8071fc210..6cfe793a0 100644 --- a/examples/experiments/grounded_program_synthesis/train_trlx.py +++ b/examples/experiments/grounded_program_synthesis/train_trlx.py @@ -17,7 +17,7 @@ def __init__(self): self.train_data = json.load(f) with open("dataset/test.json", "r") as f: self.test_data = json.load(f) - logger.info("Sucessfully loaded the dataset") + logger.info("Successfully loaded the dataset") def load_datapoints(self, split="train"): if split == "train": @@ -74,7 +74,7 @@ def main(hparams={}): if __name__ == "__main__": - # TEST REWARD FUNTION + # TEST REWARD FUNCTION assert (reward_fn(["Input: 1 Output: [-4,-5,-2] Function: div_n(reverse([-2, -5, -4]),1)"])) == [1] assert (reward_fn(["Input: 1 Output: [-4,-5,-2] Function: div_n(reverse([-2, -5, -a]),1)"])) == [-1] assert (reward_fn(["Input: 1 Output: [-4,-5,-2] Function: div_n(reverse([-2, -5, -3]),1)"])) == [-0.5] diff --git a/pyproject.toml b/pyproject.toml index f6ec87f55..40e261241 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,3 +8,8 @@ profile = "black" [tool.black] line-length = 120 + +[tool.pytest.ini_options] +minversion = "6.0" +addopts = "-ra -q --doctest-modules -vv --cov=trlx/ " +testpaths = ["tests"] diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 000000000..da17a1871 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,10 @@ +accelerate +datasets +deepspeed +docutils +matplotlib +Pygments +ray +rich +torchtyping +transformers diff --git a/setup.cfg b/setup.cfg index 4a54f7747..700745f98 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,38 +1,66 @@ [metadata] -name = trlx -author = Alex Havrilla -version = 0.3.0 -url = https://github.com/CarperAI/trlx -description = A repo for distributed training of language models with Reinforcement Learning via Human Feedback (RLHF) +author = The CarperAI team +description = Transformer Reinforcement Learning X: A repo for distributed training of language models with Reinforcement Learning via Human Feedback (RLHF) +keywords = + Deep Learning + Reinforcement Learning +license = MIT long_description = file: README.md long_description_content_type = text/markdown -license = MIT +name = trlx +url = https://github.com/CarperAI/trlx +version = v0.4.0-dev + +classifiers = + Development Status :: 3 - Alpha + Intended Audience :: Developers + Intended Audience :: Education + Intended Audience :: Science/Research + License :: OSI Approved :: MIT>=0.13.5 + Operating System :: OS Independent + Programming Language :: Python :: 3 + Programming Language :: Python :: 3.8 + Topic :: Scientific/Engineering :: Artificial Intelligence + [options] +python_requires = + >=3.8.0 packages = find: install_requires = - accelerate>=0.12.0 + accelerate~=0.15.0 datasets deepspeed>=0.7.3 einops>=0.4.1 + networkx numpy>=1.23.2 - torchtyping - transformers>=4.21.2 - tqdm - rich - wandb>=0.13.5 + packaging>=20.0 + psutil + pyyaml + ray ray>=2.0.1 + rich tabulate>=0.9.0 - networkx + torch~=1.13.0 + torchtyping~=0.1.4 + tqdm + transformers>=4.21.2 + typing-extensions~=3.10.0 + + + [options.extras_require] -bnb = bitsandbytes +bnb = + bitsandbytes +wandb = + wandb dev = black isort flake8 - pre-commit - pytest + pre-commit>= 2.21.0 + pytest>=6.0 pytest-cov [options.packages.find] diff --git a/setup.py b/setup.py index 606849326..482112223 100644 --- a/setup.py +++ b/setup.py @@ -1,3 +1,5 @@ from setuptools import setup -setup() +e +if __name__ == "__main__": + setup() diff --git a/trlx/trainer/__init__.py b/trlx/trainer/__init__.py index e1c469e21..969e38ab3 100644 --- a/trlx/trainer/__init__.py +++ b/trlx/trainer/__init__.py @@ -68,7 +68,7 @@ def sample(self, prompts: Iterable[str], length: int, n_samples: int) -> Iterabl :param prompts: List of prompts to tokenize and use as context - :param length: How many new tokens to genrate for each prompt + :param length: How many new tokens to generate for each prompt :type length: int :param n_samples: Default behavior is to take number of prompts as this diff --git a/trlx/trainer/accelerate_ilql_trainer.py b/trlx/trainer/accelerate_ilql_trainer.py index 231b2c059..1a0d53cdc 100644 --- a/trlx/trainer/accelerate_ilql_trainer.py +++ b/trlx/trainer/accelerate_ilql_trainer.py @@ -79,7 +79,7 @@ def save_pretrained(self, directory: Optional[str] = None): of the Trainer config checkpoint dir named "hf_model" (e.g. `/ckpts/hf_model`). """ # TODO: Support saving with `transformers.PreTrainedModel.save_pretrained`. - # This is currently not supported becasue `nn.ilql_models.CausalLMWithValueHeads` + # This is currently not supported because `nn.ilql_models.CausalLMWithValueHeads` # requires a custom `generate` method using its (value/q) heads to steer # sampling - something that is not possible with the default # `transformers.PreTrainedModel.generate`. diff --git a/trlx/trainer/accelerate_ppo_trainer.py b/trlx/trainer/accelerate_ppo_trainer.py index 4b4f31e62..007ba1546 100644 --- a/trlx/trainer/accelerate_ppo_trainer.py +++ b/trlx/trainer/accelerate_ppo_trainer.py @@ -270,7 +270,7 @@ def make_experience(self, num_rollouts: int = 1024, iter_count: int = 0): # noq while len(ppo_rl_elements) < num_rollouts: # Get next batch in prompt dataset and refresh if exhausted - # TOOD (jon-tow): Make `prompt_dataloader` a cyclic/infinite DataLoader to not require manually + # TODO (jon-tow): Make `prompt_dataloader` a cyclic/infinite DataLoader to not require manually # "refreshing" the contents of the `prompt_iterator` try: batch: PromptBatch = next(self.prompt_iterator) diff --git a/trlx/trainer/nemo/gpt.py b/trlx/trainer/nemo/gpt.py index 89eb2554b..ca1abc51f 100644 --- a/trlx/trainer/nemo/gpt.py +++ b/trlx/trainer/nemo/gpt.py @@ -666,7 +666,7 @@ def fwd_output_and_loss_func(batch: List[torch.Tensor], model, checkpoint_activa ) else: # In-between stages are given data via the pipeline engine - # Still need to specify thes arguments to avoid errors + # Still need to specify these arguments to avoid errors model_output = model(input_ids=None, position_ids=None, attention_mask=None) def gather_ntc(t: torch.Tensor):