From b0730f4db0b08baebbe343d6f2ffb4ca302fc1ed Mon Sep 17 00:00:00 2001
From: Steven Liu <59462357+stevhliu@users.noreply.github.com>
Date: Wed, 21 Feb 2024 11:32:32 -0800
Subject: [PATCH] structure, install (#1072)

---
 docs/source/_toctree.yml       | 34 ++++++--------
 docs/source/compiling.mdx      | 50 --------------------
 docs/source/errors.mdx         |  2 +-
 docs/source/installation.mdx   | 86 +++++++++++++++++++++++++---------
 docs/source/integrations.mdx   |  6 ++-
 docs/source/nonpytorchcuda.mdx | 46 ------------------
 6 files changed, 85 insertions(+), 139 deletions(-)
 delete mode 100644 docs/source/compiling.mdx
 delete mode 100644 docs/source/nonpytorchcuda.mdx

diff --git a/docs/source/_toctree.yml b/docs/source/_toctree.yml
index ede41bb6c..7584207d0 100644
--- a/docs/source/_toctree.yml
+++ b/docs/source/_toctree.yml
@@ -1,34 +1,30 @@
 - title: Get started
   sections:
   - local: index
-    title: Index
+    title: bitsandbytes
   - local: quickstart
     title: Quickstart
   - local: installation
     title: Installation
-- title: Features & Integrations
+- title: Guides
   sections:
-  - local: quantization
-    title: Quantization
   - local: optimizers
-    title: Optimizers
-  - local: integrations
-    title: Integrations
+    title: 8-bit optimizers
   - local: algorithms
     title: Algorithms
-- title: Support & Learning
+  - local: integrations
+    title: Integrations
+  - local: errors
+    title: Troubleshoot
+  - local: contributing
+    title: Contribute
+  - local: faqs
+    title: FAQs
+- title: Explanation
   sections:
   - local: resources
     title: Papers, resources & how to cite
-  - local: errors
-    title: Errors & Solutions
-  - local: nonpytorchcuda
-    title: Non-PyTorch CUDA
-  - local: compiling
-    title: Compilation from Source (extended)
-  - local: faqs
-    title: FAQs (Frequently Asked Questions)
-- title: Contributors Guidelines
+- title: API reference
   sections:
-  - local: contributing
-    title: Contributing
+  - local: quantization
+    title: Quantization
diff --git a/docs/source/compiling.mdx b/docs/source/compiling.mdx
deleted file mode 100644
index 39e277e71..000000000
--- a/docs/source/compiling.mdx
+++ /dev/null
@@ -1,50 +0,0 @@
-# Compiling from Source[[compiling]]
-
-## Linux
-
-To compile from source, you need the following:
-
-* The ability to compile C++ (gcc, make, headers, etc)
-* CMake (version 3.22.1 or newer)
-* Python 3.10 or newer
-* [The CUDA toolkit](https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html) (nvcc)
-
-On Ubuntu, install the first two with `apt-get install -y build-essential cmake`.
-
-To install the CUDA toolkit, follow the [instructions from your distro](https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html).
-
-
-
-To install the package from source, then run
-
-```
-pip install -r requirements-dev.txt
-cmake -DCOMPUTE_BACKEND=cuda -S .
-make
-pip install .
-```
-
-If you have multiple versions of CUDA installed, or have installed it in a non-standard location, please refer to [cmake CUDA documentation](https://cliutils.gitlab.io/modern-cmake/chapters/packages/CUDA.html) for how to configure the CUDA compiler used.
-
-## Windows
-
-The following is required to install from source on Windows
-
-* [Microsoft Visual Studio](https://visualstudio.microsoft.com/downloads/) with C++ support
-* CMake (version 3.22.1 or newer)
-* Python 3.10 or newer
-* [The CUDA toolkit](https://docs.nvidia.com/cuda/cuda-installation-guide-microsoft-windows/index.html) (nvcc)
-
-To install the CUDA toolkit, follow the [instructions for Windows](https://docs.nvidia.com/cuda/cuda-installation-guide-microsoft-windows/index.html).
-
-To install the package from source, then run
-```
-pip install -r requirements-dev.txt
-cmake -DCOMPUTE_BACKEND=cuda -S .
-cmake --build . --config Release
-pip install .
-```
-
-## Compilation for Kepler Architecture
-
-From version 0.39.1, bitsandbytes no longer includes Kepler binaries in pip installations, requiring manual compilation. Follow the general steps and use `cuda11x_nomatmul_kepler` for Kepler-targeted compilation.
diff --git a/docs/source/errors.mdx b/docs/source/errors.mdx
index 293017173..95594ea11 100644
--- a/docs/source/errors.mdx
+++ b/docs/source/errors.mdx
@@ -1,4 +1,4 @@
-# Errors & Solutions
+# Troubleshoot
 
 ## No kernel image available
 
diff --git a/docs/source/installation.mdx b/docs/source/installation.mdx
index af65a3c7f..f055e44f0 100644
--- a/docs/source/installation.mdx
+++ b/docs/source/installation.mdx
@@ -1,27 +1,35 @@
 # Installation
 
-Note currently `bitsandbytes` is only supported on CUDA GPU hardwares, support for AMD GPUs and M1 chips (MacOS) is coming soon.
+bitsandbytes is only supported on CUDA GPUs for CUDA versions **10.2 - 12.0**. Select your operating system below to see the installation instructions.
 
 <hfoptions id="OS system">
 <hfoption id="Linux">
 
-## Hardware requirements:
- - LLM.int8(): NVIDIA Turing (RTX 20xx; T4) or Ampere GPU (RTX 30xx; A4-A100); (a GPU from 2018 or newer).
- - 8-bit optimizers and quantization: NVIDIA Kepler GPU or newer (>=GTX 78X).
+For Linux systems, make sure your hardware meets the following requirements to use bitsandbytes features.
 
-Supported CUDA versions: 10.2 - 12.0  #TODO: check currently supported versions
+| **Feature** | **Hardware requirement** |
+|---|---|
+| LLM.int8() | NVIDIA Turing (RTX 20 series, T4) or Ampere (RTX 30 series, A4-A100) GPUs |
+| 8-bit optimizers/quantization | NVIDIA Kepler (GTX 780 or newer) |
 
-## Linux
+> [!WARNING]
+> bitsandbytes >= 0.39.1 no longer includes Kepler binaries in pip installations. This requires manual compilation, and you should follow the general steps and use `cuda11x_nomatmul_kepler` for Kepler-targeted compilation.
 
-### From Pypi
+To install from PyPI.
 
 ```bash
 pip install bitsandbytes
 ```
 
-### From source
+To compile from source, you need CMake >= **3.22.1** and Python >= **3.10** installed. Make sure you have a compiler installed to compile C++ (gcc, make, headers, etc.). For example, to install a compiler and CMake on Ubuntu:
 
-You need CMake and Python installed. For Linux, make sure to install a compiler (`apt install build-essential`, for example).
+```bash
+apt-get install -y build-essential cmake
+```
+
+You should also install CUDA Toolkit by following the [NVIDIA CUDA Installation Guide for Linux](https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html) guide from NVIDIA.
+
+Now to install the bitsandbytes package from source, run the following commands:
 
 ```bash
 git clone https://github.com/TimDettmers/bitsandbytes.git && cd bitsandbytes/
@@ -30,17 +38,16 @@ cmake -DCOMPUTE_BACKEND=cuda -S .
 make
 pip install .
 ```
-Note support for non-CUDA GPUs (e.g. AMD, Intel, Apple Silicon), is also coming soon.
-For a more detailed compilation guide, head to the [dedicated page on the topic](./compiling)
+
+> [!TIP]
+> If you have multiple versions of CUDA installed or installed it in a non-standard location, please refer to CMake CUDA documentation for how to configure the CUDA compiler.
 
 </hfoption>
 <hfoption id="Windows">
 
-## Windows
-
-Windows builds require Visual Studio with C++ support, as well as the Cuda SDK installed.
+Windows systems require Visual Studio with C++ support as well as an installation of the CUDA SDK.
 
-Currently for Windows users, you need to build bitsandbytes from source:
+You'll need to build bitsandbytes from source. To compile from source, you need CMake >= **3.22.1** and Python >= **3.10** installed. You should also install CUDA Toolkit by following the [CUDA Installation Guide for Windows](https://docs.nvidia.com/cuda/cuda-installation-guide-microsoft-windows/index.html) guide from NVIDIA.
 
 ```bash
 git clone https://github.com/TimDettmers/bitsandbytes.git && cd bitsandbytes/
@@ -52,15 +59,52 @@ python -m build --wheel
 
 Big thanks to [wkpark](https://github.com/wkpark), [Jamezo97](https://github.com/Jamezo97), [rickardp](https://github.com/rickardp), [akx](https://github.com/akx) for their amazing contributions to make bitsandbytes compatible with Windows.
 
-For a more detailed compilation guide, head to the [dedicated page on the topic](./compiling)
-
 </hfoption>
 <hfoption id="MacOS">
 
-## MacOS
-
-Mac support is still a work in progress. Please make sure to check out the [Apple Silicon implementation coordination issue](https://github.com/TimDettmers/bitsandbytes/issues/1020) to get notified about the discussions and progress with respect to MacOS integration.
+> [!TIP]
+> MacOS support is still a work in progress! Subscribe to this [issue](https://github.com/TimDettmers/bitsandbytes/issues/1020) to get notified about discussions and to track the integration progress.
 
 </hfoption>
-
 </hfoptions>
+
+## PyTorch CUDA versions
+
+Some bitsandbytes features may need a newer CUDA version than the one currently supported by PyTorch binaries from Conda and pip. In this case, you should follow these instructions to load a precompiled bitsandbytes binary.
+
+1. Determine the path of the CUDA version you want to use. Common paths include:
+
+* `/usr/local/cuda`
+* `/usr/local/cuda-XX.X` where `XX.X` is the CUDA version number
+
+Then locally install the CUDA version you need with this script from bitsandbytes:
+
+```bash
+wget https://raw.githubusercontent.com/TimDettmers/bitsandbytes/main/install_cuda.sh
+# Syntax cuda_install CUDA_VERSION INSTALL_PREFIX EXPORT_TO_BASH
+#   CUDA_VERSION in {110, 111, 112, 113, 114, 115, 116, 117, 118, 120, 121, 122}
+#   EXPORT_TO_BASH in {0, 1} with 0=False and 1=True
+
+# For example, the following installs CUDA 11.7 to ~/local/cuda-11.7 and exports the path to your .bashrc
+
+bash cuda_install.sh 117 ~/local 1
+```
+
+2. Set the environment variables `BNB_CUDA_VERSION` and `LD_LIBRARY_PATH` by manually overriding the CUDA version installed by PyTorch.
+
+> [!TIP]
+> It is recommended to add the following lines to the `.bashrc` file to make them permanent.
+
+```bash
+export BNB_CUDA_VERSION=<VERSION>
+export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:<PATH>
+```
+
+For example, to use a local install path:
+
+```bash
+export BNB_CUDA_VERSION=117
+export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/tim/local/cuda-11.7
+```
+
+3. Now when you launch bitsandbytes with these environment variables, the PyTorch CUDA version is overridden by the new CUDA version (in this example, version 11.7) and a different bitsandbytes library is loaded.
diff --git a/docs/source/integrations.mdx b/docs/source/integrations.mdx
index 0df7efb72..0e37765c5 100644
--- a/docs/source/integrations.mdx
+++ b/docs/source/integrations.mdx
@@ -6,8 +6,10 @@ Please review the [bitsandbytes section in the Accelerate docs](https://huggingf
 
 Details about the BitsAndBytesConfig can be found [here](https://huggingface.co/docs/transformers/v4.37.2/en/main_classes/quantization#transformers.BitsAndBytesConfig).
 
-## Beware: bf16 is optional compute data type
-If your hardware supports it, `bf16` is the optimal compute dtype. The default is `float32` for backward compatibility and numerical stability. `float16` often leads to numerical instabilities, but `bfloat16` provides the benefits of both worlds: numerical stability and significant computation speedup. Therefore, be sure to check if your hardware supports `bf16` and configure it using the `bnb_4bit_compute_dtype` parameter in BitsAndBytesConfig:
+> [!WARNING]
+> **Beware: bf16 is the optimal compute data type!**
+>
+> If your hardware supports it, `bf16` is the optimal compute dtype. The default is `float32` for backward compatibility and numerical stability. `float16` often leads to numerical instabilities, but `bfloat16` provides the benefits of both worlds: numerical stability equivalent to float32, but combined with the memory footprint and significant computation speedup of a 16-bit data type. Therefore, be sure to check if your hardware supports `bf16` and configure it using the `bnb_4bit_compute_dtype` parameter in BitsAndBytesConfig:
 
 ```py
 import torch
diff --git a/docs/source/nonpytorchcuda.mdx b/docs/source/nonpytorchcuda.mdx
deleted file mode 100644
index 099a6961b..000000000
--- a/docs/source/nonpytorchcuda.mdx
+++ /dev/null
@@ -1,46 +0,0 @@
-# How to use a CUDA version that is different from PyTorch
-
-Some features of `bitsandbytes` may need a newer CUDA version than regularly supported by PyTorch binaries from conda / pip. In that case you can use the following instructions to load a precompiled `bitsandbytes` binary that works for you.
-
-## Installing or determining the CUDA installation
-
-Determine the path of the CUDA version that you want to use. Common paths paths are:
-```bash
-/usr/local/cuda
-/usr/local/cuda-XX.X
-```
-
-where XX.X is the CUDA version number.
-
-You can also install CUDA version that you need locally with a script provided by `bitsandbytes` as follows:
-
-```bash
-wget https://raw.githubusercontent.com/TimDettmers/bitsandbytes/main/install_cuda.sh
-# Syntax cuda_install CUDA_VERSION INSTALL_PREFIX EXPORT_TO_BASH
-#   CUDA_VERSION in {110, 111, 112, 113, 114, 115, 116, 117, 118, 120, 121, 122}
-#   EXPORT_TO_BASH in {0, 1} with 0=False and 1=True
-
-# For example, the following installs CUDA 11.7 to ~/local/cuda-11.7 and exports the path to your .bashrc
-
-bash cuda_install.sh 117 ~/local 1
-```
-
-## Setting the environmental variables `BNB_CUDA_VERSION`, and `LD_LIBRARY_PATH`
-
-To manually override the PyTorch installed CUDA version you need to set to variable, like so:
-
-```bash
-export BNB_CUDA_VERSION=<VERSION>
-export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:<PATH>
-```
-
-For example, to use the local install path from above:
-
-```bash
-export BNB_CUDA_VERSION=117
-export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/tim/local/cuda-11.7
-```
-
-It is best to add these lines to the `.bashrc` file to make them permanent.
-
-If you now launch bitsandbytes with these environmental variables the PyTorch CUDA version will be overridden by the new CUDA version and a different bitsandbytes library is loaded (in this case version 117).