Skip to content

Commit

Permalink
Merge pull request #3097 from lissyx/update-0.8
Browse files Browse the repository at this point in the history
Update 0.8
  • Loading branch information
lissyx authored Jun 23, 2020
2 parents 1b10a25 + 18c37b9 commit 86c88c2
Show file tree
Hide file tree
Showing 148 changed files with 1,763 additions and 1,730 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,5 @@
/doc/.build/
/doc/xml-c/
/doc/xml-java/
Dockerfile.build
Dockerfile.train
2 changes: 1 addition & 1 deletion .taskcluster.yml
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ tasks:
- "--login"
- "-cxe"
- >
echo "deb http://archive.ubuntu.com/ubuntu/ trusty-updates main" > /etc/apt/sources.list.d/trusty-updates.list &&
echo "deb http://archive.ubuntu.com/ubuntu/ xenial-updates main" > /etc/apt/sources.list.d/xenial-updates.list &&
apt-get -qq update && apt-get -qq -y install git python3-pip curl sudo &&
adduser --system --home /home/build-user build-user &&
cd /home/build-user/ &&
Expand Down
183 changes: 75 additions & 108 deletions Dockerfile → Dockerfile.build.tmpl
Original file line number Diff line number Diff line change
@@ -1,83 +1,73 @@
# Please refer to the USING documentation, "Dockerfile for building from source"

# Need devel version cause we need /usr/include/cudnn.h
# for compiling libctc_decoder_with_kenlm.so
FROM nvidia/cuda:10.0-cudnn7-devel-ubuntu18.04
FROM nvidia/cuda:10.1-cudnn7-devel-ubuntu18.04

ENV DEEPSPEECH_REPO=#DEEPSPEECH_REPO#
ENV DEEPSPEECH_SHA=#DEEPSPEECH_SHA#

# >> START Install base software

# Get basic packages
RUN apt-get update && apt-get install -y --no-install-recommends \
apt-utils \
bash-completion \
build-essential \
ca-certificates \
cmake \
curl \
wget \
g++ \
gcc \
git \
git-lfs \
libbz2-dev \
libboost-all-dev \
libgsm1-dev \
libltdl-dev \
liblzma-dev \
libmagic-dev \
libpng-dev \
libsox-fmt-mp3 \
libsox-dev \
locales \
openjdk-8-jdk \
pkg-config \
python3 \
python3-dev \
python3-pip \
python3-wheel \
python3-numpy \
libcurl3-dev \
ca-certificates \
gcc \
sox \
libsox-fmt-mp3 \
htop \
nano \
cmake \
libboost-all-dev \
zlib1g-dev \
libbz2-dev \
liblzma-dev \
locales \
pkg-config \
libpng-dev \
libsox-dev \
libmagic-dev \
libgsm1-dev \
libltdl-dev \
openjdk-8-jdk \
bash-completion \
g++ \
unzip

RUN ln -s -f /usr/bin/python3 /usr/bin/python
unzip \
wget \
zlib1g-dev

# Install NCCL 2.2
RUN apt-get --no-install-recommends install -qq -y --allow-downgrades --allow-change-held-packages libnccl2=2.3.7-1+cuda10.0 libnccl-dev=2.3.7-1+cuda10.0
RUN update-alternatives --install /usr/bin/pip pip /usr/bin/pip3 1
RUN update-alternatives --install /usr/bin/python python /usr/bin/python3 1

# Install Bazel
RUN curl -LO "https://github.com/bazelbuild/bazel/releases/download/0.24.1/bazel_0.24.1-linux-x86_64.deb"
RUN curl -LO "https://github.com/bazelbuild/bazel/releases/download/2.0.0/bazel_2.0.0-linux-x86_64.deb"
RUN dpkg -i bazel_*.deb

# Install CUDA CLI Tools
RUN apt-get --no-install-recommends install -qq -y cuda-command-line-tools-10-0

# Install pip
RUN wget https://bootstrap.pypa.io/get-pip.py && \
python3 get-pip.py && \
rm get-pip.py

# << END Install base software




# >> START Configure Tensorflow Build

# Clone TensorFlow from Mozilla repo
RUN git clone https://github.com/mozilla/tensorflow/
WORKDIR /tensorflow
RUN git checkout r1.15

RUN git checkout r2.2

# GPU Environment Setup
ENV TF_NEED_ROCM 0
ENV TF_NEED_OPENCL_SYCL 0
ENV TF_NEED_OPENCL 0
ENV TF_NEED_CUDA 1
ENV TF_CUDA_PATHS "/usr/local/cuda,/usr/lib/x86_64-linux-gnu/"
ENV TF_CUDA_VERSION 10.0
ENV TF_CUDNN_VERSION 7
ENV TF_CUDA_PATHS "/usr,/usr/local/cuda-10.1,/usr/lib/x86_64-linux-gnu/"
ENV TF_CUDA_VERSION 10.1
ENV TF_CUDNN_VERSION 7.6
ENV TF_CUDA_COMPUTE_CAPABILITIES 6.0
ENV TF_NCCL_VERSION 2.3
ENV TF_NCCL_VERSION 2.4

# Common Environment Setup
ENV TF_BUILD_CONTAINER_TYPE GPU
Expand Down Expand Up @@ -105,14 +95,12 @@ ENV TF_NEED_TENSORRT 0
ENV TF_NEED_GDR 0
ENV TF_NEED_VERBS 0
ENV TF_NEED_OPENCL_SYCL 0

ENV PYTHON_BIN_PATH /usr/bin/python3.6
ENV PYTHON_LIB_PATH /usr/lib/python3.6/dist-packages
ENV PYTHON_LIB_PATH /usr/local/lib/python3.6/dist-packages

# << END Configure Tensorflow Build




# >> START Configure Bazel

# Running bazel inside a `docker build` command causes trouble, cf:
Expand All @@ -124,39 +112,17 @@ RUN echo "startup --batch" >>/etc/bazel.bazelrc
RUN echo "build --spawn_strategy=standalone --genrule_strategy=standalone" \
>>/etc/bazel.bazelrc

# Put cuda libraries to where they are expected to be
RUN mkdir /usr/local/cuda/lib && \
ln -s /usr/lib/x86_64-linux-gnu/libnccl.so.2 /usr/local/cuda/lib/libnccl.so.2 && \
ln -s /usr/include/nccl.h /usr/local/cuda/include/nccl.h && \
ln -s /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/libcuda.so.1 && \
ln -s /usr/include/cudnn.h /usr/local/cuda/include/cudnn.h


# Set library paths
ENV LD_LIBRARY_PATH $LD_LIBRARY_PATH:/usr/local/cuda/extras/CUPTI/lib64:/usr/local/cuda/lib64:/usr/lib/x86_64-linux-gnu/:/usr/local/cuda/lib64/stubs/

# << END Configure Bazel

WORKDIR /

# Copy DeepSpeech repo contents to container's /DeepSpeech
COPY . /DeepSpeech/

# Alternative clone from GitHub
# RUN apt-get update && apt-get install -y git-lfs
# WORKDIR /
# RUN git lfs install
# RUN git clone https://github.com/mozilla/DeepSpeech.git

RUN git clone $DEEPSPEECH_REPO
WORKDIR /DeepSpeech

RUN DS_NODECODER=1 pip3 --no-cache-dir install .
RUN git checkout $DEEPSPEECH_SHA

# Link DeepSpeech native_client libs to tf folder
RUN ln -s /DeepSpeech/native_client /tensorflow




# >> START Build and bind

WORKDIR /tensorflow
Expand All @@ -170,59 +136,60 @@ RUN ./configure

# passing LD_LIBRARY_PATH is required cause Bazel doesn't pickup it from environment


# Build DeepSpeech
RUN bazel build --workspace_status_command="bash native_client/bazel_workspace_status_cmd.sh" --config=monolithic --config=cuda -c opt --copt=-O3 --copt="-D_GLIBCXX_USE_CXX11_ABI=0" --copt=-mtune=generic --copt=-march=x86-64 --copt=-msse --copt=-msse2 --copt=-msse3 --copt=-msse4.1 --copt=-msse4.2 --copt=-mavx --copt=-fvisibility=hidden //native_client:libdeepspeech.so --verbose_failures --action_env=LD_LIBRARY_PATH=${LD_LIBRARY_PATH}

###
### Using TensorFlow upstream should work
###
# # Build TF pip package
# RUN bazel build --config=opt --config=cuda --copt="-D_GLIBCXX_USE_CXX11_ABI=0" --copt=-mtune=generic --copt=-march=x86-64 --copt=-msse --copt=-msse2 --copt=-msse3 --copt=-msse4.1 --copt=-msse4.2 --copt=-mavx //tensorflow/tools/pip_package:build_pip_package --verbose_failures --action_env=LD_LIBRARY_PATH=${LD_LIBRARY_PATH}
#
# # Build wheel
# RUN bazel-bin/tensorflow/tools/pip_package/build_pip_package /tmp/tensorflow_pkg
#
# # Install tensorflow from our custom wheel
# RUN pip3 install /tmp/tensorflow_pkg/*.whl
RUN bazel build \
--workspace_status_command="bash native_client/bazel_workspace_status_cmd.sh" \
--config=monolithic \
--config=cuda \
-c opt \
--copt=-O3 \
--copt="-D_GLIBCXX_USE_CXX11_ABI=0" \
--copt=-mtune=generic \
--copt=-march=x86-64 \
--copt=-msse \
--copt=-msse2 \
--copt=-msse3 \
--copt=-msse4.1 \
--copt=-msse4.2 \
--copt=-mavx \
--copt=-fvisibility=hidden \
//native_client:libdeepspeech.so \
--verbose_failures \
--action_env=LD_LIBRARY_PATH=${LD_LIBRARY_PATH}

# Copy built libs to /DeepSpeech/native_client
RUN cp /tensorflow/bazel-bin/native_client/libdeepspeech.so /DeepSpeech/native_client/

# Install TensorFlow
WORKDIR /DeepSpeech/
RUN pip3 install tensorflow-gpu==1.15.0


# Build client.cc and install Python client and decoder bindings
ENV TFDIR /tensorflow

RUN nproc

WORKDIR /DeepSpeech/native_client
RUN make deepspeech
RUN make NUM_PROCESSES=$(nproc) deepspeech

WORKDIR /DeepSpeech
RUN cd native_client/python && make bindings
RUN cd native_client/python && make NUM_PROCESSES=$(nproc) bindings
RUN pip3 install --upgrade native_client/python/dist/*.whl

RUN cd native_client/ctcdecode && make bindings
RUN cd native_client/ctcdecode && make NUM_PROCESSES=$(nproc) bindings
RUN pip3 install --upgrade native_client/ctcdecode/dist/*.whl


# << END Build and bind




# Allow Python printing utf-8
ENV PYTHONIOENCODING UTF-8

# Build KenLM in /DeepSpeech/native_client/kenlm folder
WORKDIR /DeepSpeech/native_client
RUN rm -rf kenlm \
&& git clone --depth 1 https://github.com/kpu/kenlm && cd kenlm \
&& mkdir -p build \
&& cd build \
&& cmake .. \
&& make -j 4
RUN rm -rf kenlm && \
git clone https://github.com/kpu/kenlm && \
cd kenlm && \
git checkout 87e85e66c99ceff1fab2500a7c60c01da7315eec && \
mkdir -p build && \
cd build && \
cmake .. && \
make -j $(nproc)

# Done
WORKDIR /DeepSpeech
53 changes: 53 additions & 0 deletions Dockerfile.train.tmpl
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# Please refer to the TRAINING documentation, "Basic Dockerfile for training"

FROM tensorflow/tensorflow:1.15.2-gpu-py3
ENV DEBIAN_FRONTEND=noninteractive

ENV DEEPSPEECH_REPO=#DEEPSPEECH_REPO#
ENV DEEPSPEECH_SHA=#DEEPSPEECH_SHA#

RUN apt-get update && apt-get install -y --no-install-recommends \
apt-utils \
bash-completion \
build-essential \
curl \
git \
git-lfs \
libbz2-dev \
locales \
python3-venv \
unzip \
wget

# We need to remove it because it's breaking deepspeech install later with
# weird errors about setuptools
RUN apt-get purge -y python3-xdg

# Install dependencies for audio augmentation
RUN apt-get install -y --no-install-recommends libopus0 libsndfile1

WORKDIR /
RUN git lfs install
RUN git clone $DEEPSPEECH_REPO

WORKDIR /DeepSpeech
RUN git checkout $DEEPSPEECH_SHA

# Build CTC decoder first, to avoid clashes on incompatible versions upgrades
RUN cd native_client/ctcdecode && make NUM_PROCESSES=$(nproc) bindings
RUN pip3 install --upgrade native_client/ctcdecode/dist/*.whl

# Prepare deps
RUN pip3 install --upgrade pip==20.0.2 wheel==0.34.2 setuptools==46.1.3

# Install DeepSpeech
# - No need for the decoder since we did it earlier
# - There is already correct TensorFlow GPU installed on the base image,
# we don't want to break that
RUN DS_NODECODER=y DS_NOTENSORFLOW=y pip3 install --upgrade -e .

# Tool to convert output graph for inference
RUN python3 util/taskcluster.py --source tensorflow --branch r1.15 \
--artifact convert_graphdef_memmapped_format --target .

RUN ./bin/run-ldc93s1.sh
8 changes: 8 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
DEEPSPEECH_REPO ?= https://github.com/mozilla/DeepSpeech.git
DEEPSPEECH_SHA ?= origin/master

Dockerfile%: Dockerfile%.tmpl
sed \
-e "s|#DEEPSPEECH_REPO#|$(DEEPSPEECH_REPO)|g" \
-e "s|#DEEPSPEECH_SHA#|$(DEEPSPEECH_SHA)|g" \
< $< > $@
2 changes: 1 addition & 1 deletion README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ Project DeepSpeech

DeepSpeech is an open source Speech-To-Text engine, using a model trained by machine learning techniques based on `Baidu's Deep Speech research paper <https://arxiv.org/abs/1412.5567>`_. Project DeepSpeech uses Google's `TensorFlow <https://www.tensorflow.org/>`_ to make the implementation easier.

Documentation for installation, usage, and training models is available on `deepspeech.readthedocs.io <http://deepspeech.readthedocs.io/?badge=latest>`_.
Documentation for installation, usage, and training models are available on `deepspeech.readthedocs.io <http://deepspeech.readthedocs.io/?badge=latest>`_.

For the latest release, including pre-trained models and checkpoints, `see the latest release on GitHub <https://github.com/mozilla/DeepSpeech/releases/latest>`_.

Expand Down
1 change: 1 addition & 0 deletions bin/import_cv.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ def one_sample(sample):
else:
# This one is good - keep it for the target CSV
rows.append((wav_filename, file_size, label))
counter["imported_time"] += frames
counter["all"] += 1
counter["total_time"] += frames
return (counter, rows)
Expand Down
1 change: 1 addition & 0 deletions bin/import_cv2.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ def one_sample(args):
else:
# This one is good - keep it for the target CSV
rows.append((os.path.split(wav_filename)[-1], file_size, label, sample[2]))
counter["imported_time"] += frames
counter["all"] += 1
counter["total_time"] += frames

Expand Down
1 change: 1 addition & 0 deletions bin/import_lingua_libre.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ def one_sample(sample):
else:
# This one is good - keep it for the target CSV
rows.append((wav_filename, file_size, label))
counter["imported_time"] += frames
counter["all"] += 1
counter["total_time"] += frames

Expand Down
1 change: 1 addition & 0 deletions bin/import_m-ailabs.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ def one_sample(sample):
else:
# This one is good - keep it for the target CSV
rows.append((wav_filename, file_size, label))
counter["imported_time"] += frames
counter["all"] += 1
counter["total_time"] += frames
return (counter, rows)
Expand Down
Loading

0 comments on commit 86c88c2

Please sign in to comment.