♨

FlagAI-Open · Jul 30, 2023 · b6eddba · b6eddba
2 parents 02e6a51 + 7aeb339
commit b6eddba
Show file tree

Hide file tree

Showing 328 changed files with 519,982 additions and 559 deletions.
diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml
@@ -1,6 +1,7 @@
 # This workflow will install Python dependencies, run tests and lint with a single version of Python
 # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
 
+
 name: Python application
 
 on:
@@ -19,10 +20,10 @@ jobs:
 
     steps:
     - uses: actions/checkout@v3
-    - name: Set up Python 3.10
+    - name: Set up Python 3.9
       uses: actions/setup-python@v3
       with:
-        python-version: "3.10"
+        python-version: "3.9"
     - name: Install dependencies
       run: |
         python -m pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple

diff --git a/.gitignore b/.gitignore
@@ -19,6 +19,8 @@ test_report
 /data/
 /tests/*/data
 checkpoints
+checkpoints_in
+checkpoints_out
 state_dict
 checkpoints*
 vocabs
@@ -28,3 +30,9 @@ qqp
 glm_large_qqp_pytorch
 wandb
 clip_benchmark_datasets
+examples/AltCLIP/clip_benchmark_datasets
+examples/glm_pretrain/data.lazy
+examples/glm_pretrain/examples/glm_pretrain/data.lazy
+examples/vit_cifar100/cifar100
+examples/vit_cifar100/data
+output/
diff --git a/BAAI_Aquila_Model_License.pdf b/BAAI_Aquila_Model_License.pdf
diff --git a/CHANGELOG.md b/CHANGELOG.md
diff --git a/CLA.md b/CLA.md
diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md
diff --git a/COMMITTERS.csv b/COMMITTERS.csv
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
diff --git a/Dockerfile b/Dockerfile
@@ -1,53 +1,61 @@
-#Change to your base image, such as pytorch1.11+py38
-#https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel_21-02.html#rel_21-02
-FROM nvcr.io/nvidia/pytorch:21.06-py3
-#You can set available pypi sources
-RUN /bin/bash -c "pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple"
-
-ENV STAGE_DIR=/tmp
-RUN mkdir -p ${STAGE_DIR}
-#Ubuntu
-RUN apt-get update && apt-get install -y openssh-server && apt-get install -y git
-ARG SSH_PORT=6001
-#Client Liveness & Uncomment Port 22 for SSH Daemon
-RUN echo "ClientAliveInterval 30" >> /etc/ssh/sshd_config
-RUN mkdir -p /var/run/sshd && cp /etc/ssh/sshd_config ${STAGE_DIR}/sshd_config && \
-    sed "0,/^#Port 22/s//Port 22/" ${STAGE_DIR}/sshd_config > /etc/ssh/sshd_config
-RUN cat /etc/ssh/sshd_config > ${STAGE_DIR}/sshd_config && \
-    sed "0,/^Port 22/s//Port ${SSH_PORT}/" ${STAGE_DIR}/sshd_config > /etc/ssh/sshd_config && \
-    sed -i 's/#PermitRootLogin prohibit-password/PermitRootLogin yes/g' /etc/ssh/sshd_config
-EXPOSE ${SSH_PORT}
-
-#Set SSH KEY
-RUN mkdir /root/.ssh
-RUN printf "#StrictHostKeyChecking no\n#UserKnownHostsFile /dev/null" >> /etc/ssh/ssh_config && \
- ssh-keygen -t rsa -f /root/.ssh/id_rsa -N "" && cat /root/.ssh/id_rsa.pub >> /root/.ssh/authorized_keys && \
-   chmod og-wx /root/.ssh/authorized_keys
-
-RUN echo $'Host 127.0.0.1 \n\
-    Hostname 127.0.0.1 \n\
-    Port 6001 \n\
-    StrictHostKeyChecking no \n\
-    User root' > /root/.ssh/config
-RUN echo $'Host localhost \n\
-    Hostname localhost \n\
-    Port 6001 \n\
-    StrictHostKeyChecking no \n\
-    User root' >> /root/.ssh/config
-
-RUN echo "service ssh start" >> /root/.bashrc
-
-#Main deps
-RUN pip install tensorboard
-RUN pip install sentencepiece
-RUN pip install boto3
-RUN pip install jieba
-RUN pip install ftfy
-RUN pip install deepspeed==0.7.7
-RUN pip install bmtrain
-
-RUN pip install flagai
-#For development usage, you can change as follows
-#RUN git clone https://github.com/FlagAI-Open/FlagAI.git && cd FlagAI && python setup.py install
+FROM nvcr.io/nvidia/cuda:11.7.0-devel-ubuntu20.04
+
+LABEL zhanglu0704
+
+ENV TZ=Asia/Shanghai
+
+VOLUME /etc/localtime
+
+ENV WORK_DID=/workspace
+
+WORKDIR ${WORK_DID}
+
+RUN apt update && \
+    apt install -y g++ gcc cmake curl wget vim unzip git openssh-server net-tools python3-packaging && \
+    apt install -y python3.9 python3.9-dev python3-pip && \
+    apt clean -y && \
+    rm -rf /var/cache/apt/archives 
+
+RUN rm /usr/bin/python3 && \
+    ln -s /usr/bin/python3.9 /usr/bin/python3 && \
+    ln -s /usr/bin/python3 /usr/bin/python && \
+    python -m pip install --upgrade pip
+
+RUN pip install torch==1.13.0+cu117 torchvision==0.14.0+cu117 torchaudio==0.13.0 \
+    --extra-index-url https://download.pytorch.org/whl/cu117
+
+COPY requirements.txt  ${WORK_DID}/
+
+RUN python -m pip install -r ${WORK_DID}/requirements.txt
+
+RUN git clone https://github.com/NVIDIA/apex && \
+    cd apex && \
+    # git checkout -f 23.05 && \
+    # pip install -v --disable-pip-version-check --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" . && \
+    pip install -v --disable-pip-version-check --no-cache-dir --no-build-isolation --config-settings "--build-option=--cpp_ext" --config-settings "--build-option=--cuda_ext" . && \
+    cd ../ && rm -rf apex
+
+RUN git clone https://github.com/OpenBMB/BMTrain && \
+    cd BMTrain && \
+    git checkout -f 0.2.2 && \
+    # python setup.py install --prefix=/usr/local/
+    pip install -v . && \
+    cd ../ && rm -rf BMTrain
+
+RUN git clone https://github.com/FlagAI-Open/FlagAI.git && \
+    cd FlagAI && \
+    pip install -v . && \
+    cd ../ && rm -rf FlagAI
+
+RUN echo "ClientAliveInterval 30" >> /etc/ssh/sshd_config && \
+    sed -i 's/#PermitRootLogin prohibit-password/PermitRootLogin yes/g' /etc/ssh/sshd_config && \
+    echo "StrictHostKeyChecking no" >> /etc/ssh/ssh_config && \
+    echo "UserKnownHostsFile /dev/null" >> /etc/ssh/ssh_config
+
+RUN ssh-keygen -t rsa -f /root/.ssh/id_rsa -N "" && \
+    cat /root/.ssh/id_rsa.pub >> /root/.ssh/authorized_keys && \
+    chmod og-wx /root/.ssh/authorized_keys
 
 CMD service ssh start && tail -f /dev/null
+
+# sudo docker build -f Dockerfile --shm-size='120g' -t flagai:dev-ubuntu20-cuda11.7-py39 .
diff --git a/GOVERNANCE.md b/GOVERNANCE.md
diff --git a/LICENSE b/LICENSE
diff --git a/README.md b/README.md
@@ -8,7 +8,8 @@
 --------------------------------------------------------------------------------
 
 
-FlagAI (Fast LArge-scale General AI models) is a fast, easy-to-use and extensible toolkit for large-scale model. Our goal is to support training, fine-tuning, and deployment of large-scale models on various downstream tasks with multi-modality. 
+FlagAI (Fast LArge-scale General AI models) is a fast, easy-to-use and extensible toolkit for large-scale model. Our goal is to support training, fine-tuning, and deployment of large-scale models on various downstream tasks with multi-modality.
+
 
 
 ## Why should I use FlagAI?
@@ -18,7 +19,7 @@ FlagAI (Fast LArge-scale General AI models) is a fast, easy-to-use and extensibl
 
     FlagAI provides an API that allows you to quickly download pre-trained models and fine-tune them on a wide range of datasets collected from [SuperGLUE](https://super.gluebenchmark.com/) and [CLUE](https://github.com/CLUEbenchmark/CLUE) benchmarks for both Chinese and English text.
 
-    FlagAI now supports over 30 mainstream models, including multilingual text and image representation model [**AltCLIP**](https://github.com/FlagAI-Open/FlagAI/tree/master/examples/AltCLIP), text-to-image generation model [**AltDiffusion**](https://github.com/FlagAI-Open/FlagAI/tree/master/examples/AltDiffusion) [![Huggingface space](https://img.shields.io/badge/🤗-Huggingface%20Space-cyan.svg)](https://huggingface.co/spaces/BAAI/bilingual_stable_diffusion), [**WuDao GLM**](/docs/GLM.md) (with a maximum of 10 billion parameters), [**EVA-CLIP**](https://github.com/FlagAI-Open/FlagAI/tree/master/examples/EVA_CLIP), **OPT**, **BERT**, **RoBERTa**, **GPT2**, **T5**, **ALM**, and models from **Huggingface Transformers**, etc.
+    FlagAI now supports over 30 mainstream models, including Language Model [**Aquila**](https://github.com/FlagAI-Open/FlagAI/tree/master/examples/Aquila), multilingual text and image representation model [**AltCLIP**](https://github.com/FlagAI-Open/FlagAI/tree/master/examples/AltCLIP), text-to-image generation model [**AltDiffusion**](https://github.com/FlagAI-Open/FlagAI/tree/master/examples/AltDiffusion) [![Huggingface space](https://img.shields.io/badge/🤗-Huggingface%20Space-cyan.svg)](https://huggingface.co/spaces/BAAI/bilingual_stable_diffusion), [**WuDao GLM**](/docs/GLM.md) (with a maximum of 10 billion parameters), [**EVA-CLIP**](https://github.com/FlagAI-Open/FlagAI/tree/master/examples/EVA_CLIP), **OPT**, **BERT**, **RoBERTa**, **GPT2**, **T5**, **ALM**, and models from **Huggingface Transformers**, etc.
 
 
 2. **Parallel train with fewer than 10 lines of code**
@@ -55,6 +56,7 @@ FlagAI (Fast LArge-scale General AI models) is a fast, easy-to-use and extensibl
 
 |   Model          |  Task    | Train | Finetune | Inference/Generate | Examples       |                                                         
 | :---------------- | :------- | :-- |:-- | :-- | :--------------------------------------------- |
+| Aquila      | Natural Language Processing  | ✅  | ✅  | ✅  | [README.md](examples/Aquila/README.md) 
 | ALM          | Arabic Text Generation  |  ✅  | ❌  | ✅  | [README.md](/examples/ALM/README.md)  |                         
 | AltCLIP       | Image-Text Matching  | ✅  | ✅  | ✅  | [README.md](/examples/AltCLIP/README.md)   |  
 | AltCLIP-m18      | Image-Text Matching  | ✅  | ✅  | ✅  | [README.md](examples/AltCLIP-m18/README.md)   |                             
@@ -140,20 +142,30 @@ git clone https://github.com/NVIDIA/apex
 cd apex
 pip install -v --disable-pip-version-check --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" ./
 ```
-- [Optional] For ZeRO optimizers, install [DEEPSPEED](https://github.com/microsoft/DeepSpeed)
+- [Optional] For ZeRO optimizers, install [DEEPSPEED](https://github.com/microsoft/DeepSpeed) (>= 0.7.7)
 ```
 git clone https://github.com/microsoft/DeepSpeed
 cd DeepSpeed
 DS_BUILD_CPU_ADAM=1 DS_BUILD_AIO=1 DS_BUILD_UTILS=1 pip install -e .
 ds_report # check the deespeed status
 ```
-- [Optional] For BMTrain training, install [BMTrain](https://github.com/OpenBMB/BMTrain)
+- [Optional] For BMTrain training, install [BMTrain](https://github.com/OpenBMB/BMTrain) (>= 0.2.2)
 ```
 git clone https://github.com/OpenBMB/BMTrain
 cd BMTrain
 python setup.py install
 ```
-- [Tips] For single-node docker environments, we need to set up ports for your ssh. e.g., [email protected] with port 7110
+- [Optional] For BMInf low-resource inference, install [BMInf](https://github.com/OpenBMB/BMInf)
+```
+pip install bminf
+
+```
+- [Optional] For Flash Attention, install [Flash-attention](https://github.com/HazyResearch/flash-attention) (>=1.0.2)
+```
+pip install flash-attn
+```
+
+- [Tips] For single-node docker environments, we need to set up ports for your ssh. e.g., [email protected] with port 711
 ```
 >>> vim ~/.ssh/config
 Host 127.0.0.1
@@ -272,7 +284,7 @@ for text_pair in test_data:
 
 ## LICENSE
 
-The majority of FlagAI is licensed under the [Apache 2.0 license](LICENSE), however portions of the project are available under separate license terms:
+The majority of FlagAI is licensed under the [Apache 2.0 license](https://www.apache.org/licenses/LICENSE-2.0), however portions of the project are available under separate license terms:
 
 * Megatron-LM is licensed under the [Megatron-LM license](https://github.com/NVIDIA/Megatron-LM/blob/main/LICENSE)
 * GLM is licensed under the [MIT license](https://github.com/THUDM/GLM/blob/main/LICENSE)
@@ -281,6 +293,7 @@ The majority of FlagAI is licensed under the [Apache 2.0 license](LICENSE), howe
 
 
 ## News
+- [9 June 2023] release v1.7.0, Support Aquila [#324](https://github.com/FlagAI-Open/FlagAI/pull/324);
 - [31 Mar 2023] release v1.6.3, Support AltCLIP-m18 [#303](https://github.com/FlagAI-Open/FlagAI/pull/303) and AltDiffusion-m18 [#302](https://github.com/FlagAI-Open/FlagAI/pull/302); 
 - [17 Mar 2023] release v1.6.2, Support application of new optimizers [#266](https://github.com/FlagAI-Open/FlagAI/pull/266), and added a new gpt model name 'GPT2-base-en' for English; 
 - [2 Mar 2023] release v1.6.1, Support Galactica model [#234](https://github.com/FlagAI-Open/FlagAI/pull/234); BMInf, a low-resource inference package [#238](https://github.com/FlagAI-Open/FlagAI/pull/238), and examples for p-tuning [#227](https://github.com/FlagAI-Open/FlagAI/pull/238)
@@ -292,6 +305,12 @@ The majority of FlagAI is licensed under the [Apache 2.0 license](LICENSE), howe
 - [29 Jun 2022] release v1.1.0, support OPTs downloading and inference/fine-tuning [#63](https://github.com/FlagAI-Open/FlagAI/pull/63)
 - [17 May 2022] made our first contribution in [#1](https://github.com/FlagAI-Open/FlagAI/pull/1)
 
+## Platforms supported
+
+<div  align="center">    
+<img src="./examples/Aquila/img/merged_platform.jpg" height = "100" align=center />
+</div>
+
 
 
 ## Misc