Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
xxaier committed Jul 30, 2023
2 parents 02e6a51 + 7aeb339 commit b6eddba
Show file tree
Hide file tree
Showing 328 changed files with 519,982 additions and 559 deletions.
5 changes: 3 additions & 2 deletions .github/workflows/python-app.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# This workflow will install Python dependencies, run tests and lint with a single version of Python
# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions


name: Python application

on:
Expand All @@ -19,10 +20,10 @@ jobs:

steps:
- uses: actions/checkout@v3
- name: Set up Python 3.10
- name: Set up Python 3.9
uses: actions/setup-python@v3
with:
python-version: "3.10"
python-version: "3.9"
- name: Install dependencies
run: |
python -m pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple
Expand Down
8 changes: 8 additions & 0 deletions .gitignore
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ test_report
/data/
/tests/*/data
checkpoints
checkpoints_in
checkpoints_out
state_dict
checkpoints*
vocabs
Expand All @@ -28,3 +30,9 @@ qqp
glm_large_qqp_pytorch
wandb
clip_benchmark_datasets
examples/AltCLIP/clip_benchmark_datasets
examples/glm_pretrain/data.lazy
examples/glm_pretrain/examples/glm_pretrain/data.lazy
examples/vit_cifar100/cifar100
examples/vit_cifar100/data
output/
Binary file added BAAI_Aquila_Model_License.pdf
Binary file not shown.
Empty file modified CHANGELOG.md
100644 → 100755
Empty file.
Empty file modified CLA.md
100644 → 100755
Empty file.
Empty file modified CODE_OF_CONDUCT.md
100644 → 100755
Empty file.
Empty file modified COMMITTERS.csv
100644 → 100755
Empty file.
Empty file modified CONTRIBUTING.md
100644 → 100755
Empty file.
110 changes: 59 additions & 51 deletions Dockerfile
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,53 +1,61 @@
#Change to your base image, such as pytorch1.11+py38
#https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel_21-02.html#rel_21-02
FROM nvcr.io/nvidia/pytorch:21.06-py3
#You can set available pypi sources
RUN /bin/bash -c "pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple"

ENV STAGE_DIR=/tmp
RUN mkdir -p ${STAGE_DIR}
#Ubuntu
RUN apt-get update && apt-get install -y openssh-server && apt-get install -y git
ARG SSH_PORT=6001
#Client Liveness & Uncomment Port 22 for SSH Daemon
RUN echo "ClientAliveInterval 30" >> /etc/ssh/sshd_config
RUN mkdir -p /var/run/sshd && cp /etc/ssh/sshd_config ${STAGE_DIR}/sshd_config && \
sed "0,/^#Port 22/s//Port 22/" ${STAGE_DIR}/sshd_config > /etc/ssh/sshd_config
RUN cat /etc/ssh/sshd_config > ${STAGE_DIR}/sshd_config && \
sed "0,/^Port 22/s//Port ${SSH_PORT}/" ${STAGE_DIR}/sshd_config > /etc/ssh/sshd_config && \
sed -i 's/#PermitRootLogin prohibit-password/PermitRootLogin yes/g' /etc/ssh/sshd_config
EXPOSE ${SSH_PORT}

#Set SSH KEY
RUN mkdir /root/.ssh
RUN printf "#StrictHostKeyChecking no\n#UserKnownHostsFile /dev/null" >> /etc/ssh/ssh_config && \
ssh-keygen -t rsa -f /root/.ssh/id_rsa -N "" && cat /root/.ssh/id_rsa.pub >> /root/.ssh/authorized_keys && \
chmod og-wx /root/.ssh/authorized_keys

RUN echo $'Host 127.0.0.1 \n\
Hostname 127.0.0.1 \n\
Port 6001 \n\
StrictHostKeyChecking no \n\
User root' > /root/.ssh/config
RUN echo $'Host localhost \n\
Hostname localhost \n\
Port 6001 \n\
StrictHostKeyChecking no \n\
User root' >> /root/.ssh/config

RUN echo "service ssh start" >> /root/.bashrc

#Main deps
RUN pip install tensorboard
RUN pip install sentencepiece
RUN pip install boto3
RUN pip install jieba
RUN pip install ftfy
RUN pip install deepspeed==0.7.7
RUN pip install bmtrain

RUN pip install flagai
#For development usage, you can change as follows
#RUN git clone https://github.com/FlagAI-Open/FlagAI.git && cd FlagAI && python setup.py install
FROM nvcr.io/nvidia/cuda:11.7.0-devel-ubuntu20.04

LABEL zhanglu0704

ENV TZ=Asia/Shanghai

VOLUME /etc/localtime

ENV WORK_DID=/workspace

WORKDIR ${WORK_DID}

RUN apt update && \
apt install -y g++ gcc cmake curl wget vim unzip git openssh-server net-tools python3-packaging && \
apt install -y python3.9 python3.9-dev python3-pip && \
apt clean -y && \
rm -rf /var/cache/apt/archives

RUN rm /usr/bin/python3 && \
ln -s /usr/bin/python3.9 /usr/bin/python3 && \
ln -s /usr/bin/python3 /usr/bin/python && \
python -m pip install --upgrade pip

RUN pip install torch==1.13.0+cu117 torchvision==0.14.0+cu117 torchaudio==0.13.0 \
--extra-index-url https://download.pytorch.org/whl/cu117

COPY requirements.txt ${WORK_DID}/

RUN python -m pip install -r ${WORK_DID}/requirements.txt

RUN git clone https://github.com/NVIDIA/apex && \
cd apex && \
# git checkout -f 23.05 && \
# pip install -v --disable-pip-version-check --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" . && \
pip install -v --disable-pip-version-check --no-cache-dir --no-build-isolation --config-settings "--build-option=--cpp_ext" --config-settings "--build-option=--cuda_ext" . && \
cd ../ && rm -rf apex

RUN git clone https://github.com/OpenBMB/BMTrain && \
cd BMTrain && \
git checkout -f 0.2.2 && \
# python setup.py install --prefix=/usr/local/
pip install -v . && \
cd ../ && rm -rf BMTrain

RUN git clone https://github.com/FlagAI-Open/FlagAI.git && \
cd FlagAI && \
pip install -v . && \
cd ../ && rm -rf FlagAI

RUN echo "ClientAliveInterval 30" >> /etc/ssh/sshd_config && \
sed -i 's/#PermitRootLogin prohibit-password/PermitRootLogin yes/g' /etc/ssh/sshd_config && \
echo "StrictHostKeyChecking no" >> /etc/ssh/ssh_config && \
echo "UserKnownHostsFile /dev/null" >> /etc/ssh/ssh_config

RUN ssh-keygen -t rsa -f /root/.ssh/id_rsa -N "" && \
cat /root/.ssh/id_rsa.pub >> /root/.ssh/authorized_keys && \
chmod og-wx /root/.ssh/authorized_keys

CMD service ssh start && tail -f /dev/null

# sudo docker build -f Dockerfile --shm-size='120g' -t flagai:dev-ubuntu20-cuda11.7-py39 .
Empty file modified GOVERNANCE.md
100644 → 100755
Empty file.
Empty file modified LICENSE
100644 → 100755
Empty file.
31 changes: 25 additions & 6 deletions README.md
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@
--------------------------------------------------------------------------------


FlagAI (Fast LArge-scale General AI models) is a fast, easy-to-use and extensible toolkit for large-scale model. Our goal is to support training, fine-tuning, and deployment of large-scale models on various downstream tasks with multi-modality.
FlagAI (Fast LArge-scale General AI models) is a fast, easy-to-use and extensible toolkit for large-scale model. Our goal is to support training, fine-tuning, and deployment of large-scale models on various downstream tasks with multi-modality.



## Why should I use FlagAI?
Expand All @@ -18,7 +19,7 @@ FlagAI (Fast LArge-scale General AI models) is a fast, easy-to-use and extensibl

FlagAI provides an API that allows you to quickly download pre-trained models and fine-tune them on a wide range of datasets collected from [SuperGLUE](https://super.gluebenchmark.com/) and [CLUE](https://github.com/CLUEbenchmark/CLUE) benchmarks for both Chinese and English text.

FlagAI now supports over 30 mainstream models, including multilingual text and image representation model [**AltCLIP**](https://github.com/FlagAI-Open/FlagAI/tree/master/examples/AltCLIP), text-to-image generation model [**AltDiffusion**](https://github.com/FlagAI-Open/FlagAI/tree/master/examples/AltDiffusion) [![Huggingface space](https://img.shields.io/badge/🤗-Huggingface%20Space-cyan.svg)](https://huggingface.co/spaces/BAAI/bilingual_stable_diffusion), [**WuDao GLM**](/docs/GLM.md) (with a maximum of 10 billion parameters), [**EVA-CLIP**](https://github.com/FlagAI-Open/FlagAI/tree/master/examples/EVA_CLIP), **OPT**, **BERT**, **RoBERTa**, **GPT2**, **T5**, **ALM**, and models from **Huggingface Transformers**, etc.
FlagAI now supports over 30 mainstream models, including Language Model [**Aquila**](https://github.com/FlagAI-Open/FlagAI/tree/master/examples/Aquila), multilingual text and image representation model [**AltCLIP**](https://github.com/FlagAI-Open/FlagAI/tree/master/examples/AltCLIP), text-to-image generation model [**AltDiffusion**](https://github.com/FlagAI-Open/FlagAI/tree/master/examples/AltDiffusion) [![Huggingface space](https://img.shields.io/badge/🤗-Huggingface%20Space-cyan.svg)](https://huggingface.co/spaces/BAAI/bilingual_stable_diffusion), [**WuDao GLM**](/docs/GLM.md) (with a maximum of 10 billion parameters), [**EVA-CLIP**](https://github.com/FlagAI-Open/FlagAI/tree/master/examples/EVA_CLIP), **OPT**, **BERT**, **RoBERTa**, **GPT2**, **T5**, **ALM**, and models from **Huggingface Transformers**, etc.


2. **Parallel train with fewer than 10 lines of code**
Expand Down Expand Up @@ -55,6 +56,7 @@ FlagAI (Fast LArge-scale General AI models) is a fast, easy-to-use and extensibl

| Model | Task | Train | Finetune | Inference/Generate | Examples |
| :---------------- | :------- | :-- |:-- | :-- | :--------------------------------------------- |
| Aquila | Natural Language Processing | ✅ | ✅ | ✅ | [README.md](examples/Aquila/README.md)
| ALM | Arabic Text Generation |||| [README.md](/examples/ALM/README.md) |
| AltCLIP | Image-Text Matching |||| [README.md](/examples/AltCLIP/README.md) |
| AltCLIP-m18 | Image-Text Matching |||| [README.md](examples/AltCLIP-m18/README.md) |
Expand Down Expand Up @@ -140,20 +142,30 @@ git clone https://github.com/NVIDIA/apex
cd apex
pip install -v --disable-pip-version-check --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" ./
```
- [Optional] For ZeRO optimizers, install [DEEPSPEED](https://github.com/microsoft/DeepSpeed)
- [Optional] For ZeRO optimizers, install [DEEPSPEED](https://github.com/microsoft/DeepSpeed) (>= 0.7.7)
```
git clone https://github.com/microsoft/DeepSpeed
cd DeepSpeed
DS_BUILD_CPU_ADAM=1 DS_BUILD_AIO=1 DS_BUILD_UTILS=1 pip install -e .
ds_report # check the deespeed status
```
- [Optional] For BMTrain training, install [BMTrain](https://github.com/OpenBMB/BMTrain)
- [Optional] For BMTrain training, install [BMTrain](https://github.com/OpenBMB/BMTrain) (>= 0.2.2)
```
git clone https://github.com/OpenBMB/BMTrain
cd BMTrain
python setup.py install
```
- [Tips] For single-node docker environments, we need to set up ports for your ssh. e.g., [email protected] with port 7110
- [Optional] For BMInf low-resource inference, install [BMInf](https://github.com/OpenBMB/BMInf)
```
pip install bminf
```
- [Optional] For Flash Attention, install [Flash-attention](https://github.com/HazyResearch/flash-attention) (>=1.0.2)
```
pip install flash-attn
```

- [Tips] For single-node docker environments, we need to set up ports for your ssh. e.g., [email protected] with port 711
```
>>> vim ~/.ssh/config
Host 127.0.0.1
Expand Down Expand Up @@ -272,7 +284,7 @@ for text_pair in test_data:

## LICENSE

The majority of FlagAI is licensed under the [Apache 2.0 license](LICENSE), however portions of the project are available under separate license terms:
The majority of FlagAI is licensed under the [Apache 2.0 license](https://www.apache.org/licenses/LICENSE-2.0), however portions of the project are available under separate license terms:

* Megatron-LM is licensed under the [Megatron-LM license](https://github.com/NVIDIA/Megatron-LM/blob/main/LICENSE)
* GLM is licensed under the [MIT license](https://github.com/THUDM/GLM/blob/main/LICENSE)
Expand All @@ -281,6 +293,7 @@ The majority of FlagAI is licensed under the [Apache 2.0 license](LICENSE), howe


## News
- [9 June 2023] release v1.7.0, Support Aquila [#324](https://github.com/FlagAI-Open/FlagAI/pull/324);
- [31 Mar 2023] release v1.6.3, Support AltCLIP-m18 [#303](https://github.com/FlagAI-Open/FlagAI/pull/303) and AltDiffusion-m18 [#302](https://github.com/FlagAI-Open/FlagAI/pull/302);
- [17 Mar 2023] release v1.6.2, Support application of new optimizers [#266](https://github.com/FlagAI-Open/FlagAI/pull/266), and added a new gpt model name 'GPT2-base-en' for English;
- [2 Mar 2023] release v1.6.1, Support Galactica model [#234](https://github.com/FlagAI-Open/FlagAI/pull/234); BMInf, a low-resource inference package [#238](https://github.com/FlagAI-Open/FlagAI/pull/238), and examples for p-tuning [#227](https://github.com/FlagAI-Open/FlagAI/pull/238)
Expand All @@ -292,6 +305,12 @@ The majority of FlagAI is licensed under the [Apache 2.0 license](LICENSE), howe
- [29 Jun 2022] release v1.1.0, support OPTs downloading and inference/fine-tuning [#63](https://github.com/FlagAI-Open/FlagAI/pull/63)
- [17 May 2022] made our first contribution in [#1](https://github.com/FlagAI-Open/FlagAI/pull/1)

## Platforms supported

<div align="center">
<img src="./examples/Aquila/img/merged_platform.jpg" height = "100" align=center />
</div>



## Misc
Expand Down
Loading

0 comments on commit b6eddba

Please sign in to comment.