Merge pull request #3097 from lissyx/update-0.8

Update 0.8
mozilla · Jun 23, 2020 · 86c88c2 · 86c88c2
2 parents 1b10a25 + 18c37b9
commit 86c88c2
Show file tree

Hide file tree

Showing 148 changed files with 1,763 additions and 1,730 deletions.
diff --git a/.gitignore b/.gitignore
@@ -32,3 +32,5 @@
 /doc/.build/
 /doc/xml-c/
 /doc/xml-java/
+Dockerfile.build
+Dockerfile.train
diff --git a/.taskcluster.yml b/.taskcluster.yml
@@ -45,7 +45,7 @@ tasks:
         - "--login"
         - "-cxe"
         - >
-          echo "deb http://archive.ubuntu.com/ubuntu/ trusty-updates main" > /etc/apt/sources.list.d/trusty-updates.list &&
+          echo "deb http://archive.ubuntu.com/ubuntu/ xenial-updates main" > /etc/apt/sources.list.d/xenial-updates.list &&
           apt-get -qq update && apt-get -qq -y install git python3-pip curl sudo &&
           adduser --system --home /home/build-user build-user &&
           cd /home/build-user/ &&

diff --git a/Dockerfile → Dockerfile.build.tmpl b/Dockerfile → Dockerfile.build.tmpl
@@ -1,83 +1,73 @@
+# Please refer to the USING documentation, "Dockerfile for building from source"
+
 # Need devel version cause we need /usr/include/cudnn.h 
-# for compiling libctc_decoder_with_kenlm.so
-FROM nvidia/cuda:10.0-cudnn7-devel-ubuntu18.04
+FROM nvidia/cuda:10.1-cudnn7-devel-ubuntu18.04
 
+ENV DEEPSPEECH_REPO=#DEEPSPEECH_REPO#
+ENV DEEPSPEECH_SHA=#DEEPSPEECH_SHA#
 
 # >> START Install base software
 
 # Get basic packages
 RUN apt-get update && apt-get install -y --no-install-recommends \
         apt-utils \
+        bash-completion \
         build-essential \
+        ca-certificates \
+        cmake \
         curl \
-        wget \
+        g++ \
+        gcc \
         git \
+        git-lfs \
+        libbz2-dev \
+        libboost-all-dev \
+        libgsm1-dev \
+        libltdl-dev \
+        liblzma-dev \
+        libmagic-dev \
+        libpng-dev \
+        libsox-fmt-mp3 \
+        libsox-dev \
+        locales \
+        openjdk-8-jdk \
+        pkg-config \
         python3 \
         python3-dev \
         python3-pip \
         python3-wheel \
         python3-numpy \
-        libcurl3-dev  \
-        ca-certificates \
-        gcc \
         sox \
-        libsox-fmt-mp3 \
-        htop \
-        nano \
-        cmake \
-        libboost-all-dev \
-        zlib1g-dev \
-        libbz2-dev \
-        liblzma-dev \
-        locales \
-        pkg-config \
-        libpng-dev \
-        libsox-dev \
-        libmagic-dev \
-        libgsm1-dev \
-        libltdl-dev \
-        openjdk-8-jdk \
-        bash-completion \
-        g++ \
-        unzip
-
-RUN ln -s -f /usr/bin/python3 /usr/bin/python
+        unzip \
+        wget \
+        zlib1g-dev
 
-# Install NCCL 2.2
-RUN apt-get --no-install-recommends install -qq -y --allow-downgrades --allow-change-held-packages libnccl2=2.3.7-1+cuda10.0 libnccl-dev=2.3.7-1+cuda10.0
+RUN update-alternatives --install /usr/bin/pip pip /usr/bin/pip3 1
+RUN update-alternatives --install /usr/bin/python python /usr/bin/python3 1
 
 # Install Bazel
-RUN curl -LO "https://github.com/bazelbuild/bazel/releases/download/0.24.1/bazel_0.24.1-linux-x86_64.deb"
+RUN curl -LO "https://github.com/bazelbuild/bazel/releases/download/2.0.0/bazel_2.0.0-linux-x86_64.deb"
 RUN dpkg -i bazel_*.deb
 
-# Install CUDA CLI Tools
-RUN apt-get --no-install-recommends install -qq -y cuda-command-line-tools-10-0
-
-# Install pip
-RUN wget https://bootstrap.pypa.io/get-pip.py && \
-    python3 get-pip.py && \
-    rm get-pip.py
-
 # << END Install base software
 
-
-
-
 # >> START Configure Tensorflow Build
 
 # Clone TensorFlow from Mozilla repo
 RUN git clone https://github.com/mozilla/tensorflow/
 WORKDIR /tensorflow
-RUN git checkout r1.15
-
+RUN git checkout r2.2
 
 # GPU Environment Setup
+ENV TF_NEED_ROCM 0
+ENV TF_NEED_OPENCL_SYCL 0
+ENV TF_NEED_OPENCL 0
 ENV TF_NEED_CUDA 1
-ENV TF_CUDA_PATHS "/usr/local/cuda,/usr/lib/x86_64-linux-gnu/"
-ENV TF_CUDA_VERSION 10.0
-ENV TF_CUDNN_VERSION 7
+ENV TF_CUDA_PATHS "/usr,/usr/local/cuda-10.1,/usr/lib/x86_64-linux-gnu/"
+ENV TF_CUDA_VERSION 10.1
+ENV TF_CUDNN_VERSION 7.6
 ENV TF_CUDA_COMPUTE_CAPABILITIES 6.0
-ENV TF_NCCL_VERSION 2.3
+ENV TF_NCCL_VERSION 2.4
 
 # Common Environment Setup
 ENV TF_BUILD_CONTAINER_TYPE GPU
@@ -105,14 +95,12 @@ ENV TF_NEED_TENSORRT 0
 ENV TF_NEED_GDR 0
 ENV TF_NEED_VERBS 0
 ENV TF_NEED_OPENCL_SYCL 0
+
 ENV PYTHON_BIN_PATH /usr/bin/python3.6
-ENV PYTHON_LIB_PATH /usr/lib/python3.6/dist-packages
+ENV PYTHON_LIB_PATH /usr/local/lib/python3.6/dist-packages
 
 # << END Configure Tensorflow Build
 
-
-
-
 # >> START Configure Bazel
 
 # Running bazel inside a `docker build` command causes trouble, cf:
@@ -124,39 +112,17 @@ RUN echo "startup --batch" >>/etc/bazel.bazelrc
 RUN echo "build --spawn_strategy=standalone --genrule_strategy=standalone" \
     >>/etc/bazel.bazelrc
 
-# Put cuda libraries to where they are expected to be
-RUN mkdir /usr/local/cuda/lib &&  \
-    ln -s /usr/lib/x86_64-linux-gnu/libnccl.so.2 /usr/local/cuda/lib/libnccl.so.2 && \
-    ln -s /usr/include/nccl.h /usr/local/cuda/include/nccl.h && \
-    ln -s /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/libcuda.so.1 && \
-    ln -s /usr/include/cudnn.h /usr/local/cuda/include/cudnn.h
-
-
-# Set library paths
-ENV LD_LIBRARY_PATH $LD_LIBRARY_PATH:/usr/local/cuda/extras/CUPTI/lib64:/usr/local/cuda/lib64:/usr/lib/x86_64-linux-gnu/:/usr/local/cuda/lib64/stubs/
-
 # << END Configure Bazel
 
+WORKDIR /
 
-# Copy DeepSpeech repo contents to container's /DeepSpeech
-COPY . /DeepSpeech/
-
-# Alternative clone from GitHub 
-# RUN apt-get update && apt-get install -y git-lfs 
-# WORKDIR /
-# RUN git lfs install
-# RUN git clone https://github.com/mozilla/DeepSpeech.git
-
+RUN git clone $DEEPSPEECH_REPO
 WORKDIR /DeepSpeech
-
-RUN DS_NODECODER=1 pip3 --no-cache-dir install .
+RUN git checkout $DEEPSPEECH_SHA
 
 # Link DeepSpeech native_client libs to tf folder
 RUN ln -s /DeepSpeech/native_client /tensorflow
 
-
-
-
 # >> START Build and bind
 
 WORKDIR /tensorflow
@@ -170,59 +136,60 @@ RUN ./configure
 
 # passing LD_LIBRARY_PATH is required cause Bazel doesn't pickup it from environment
 
-
 # Build DeepSpeech
-RUN bazel build --workspace_status_command="bash native_client/bazel_workspace_status_cmd.sh" --config=monolithic --config=cuda -c opt --copt=-O3 --copt="-D_GLIBCXX_USE_CXX11_ABI=0" --copt=-mtune=generic --copt=-march=x86-64 --copt=-msse --copt=-msse2 --copt=-msse3 --copt=-msse4.1 --copt=-msse4.2 --copt=-mavx --copt=-fvisibility=hidden //native_client:libdeepspeech.so --verbose_failures --action_env=LD_LIBRARY_PATH=${LD_LIBRARY_PATH}
-
-###
-### Using TensorFlow upstream should work
-###
-# # Build TF pip package
-# RUN bazel build --config=opt --config=cuda --copt="-D_GLIBCXX_USE_CXX11_ABI=0" --copt=-mtune=generic --copt=-march=x86-64 --copt=-msse --copt=-msse2 --copt=-msse3 --copt=-msse4.1 --copt=-msse4.2 --copt=-mavx //tensorflow/tools/pip_package:build_pip_package --verbose_failures --action_env=LD_LIBRARY_PATH=${LD_LIBRARY_PATH}
-#
-# # Build wheel
-# RUN bazel-bin/tensorflow/tools/pip_package/build_pip_package /tmp/tensorflow_pkg
-#
-# # Install tensorflow from our custom wheel
-# RUN pip3 install /tmp/tensorflow_pkg/*.whl
+RUN bazel build \
+	--workspace_status_command="bash native_client/bazel_workspace_status_cmd.sh" \
+	--config=monolithic \
+	--config=cuda \
+	-c opt \
+	--copt=-O3 \
+	--copt="-D_GLIBCXX_USE_CXX11_ABI=0" \
+	--copt=-mtune=generic \
+	--copt=-march=x86-64 \
+	--copt=-msse \
+	--copt=-msse2 \
+	--copt=-msse3 \
+	--copt=-msse4.1 \
+	--copt=-msse4.2 \
+	--copt=-mavx \
+	--copt=-fvisibility=hidden \
+	//native_client:libdeepspeech.so \
+	--verbose_failures \
+	--action_env=LD_LIBRARY_PATH=${LD_LIBRARY_PATH}
 
 # Copy built libs to /DeepSpeech/native_client
 RUN cp /tensorflow/bazel-bin/native_client/libdeepspeech.so /DeepSpeech/native_client/
 
-# Install TensorFlow
-WORKDIR /DeepSpeech/
-RUN pip3 install tensorflow-gpu==1.15.0
-
-
 # Build client.cc and install Python client and decoder bindings
 ENV TFDIR /tensorflow
+
+RUN nproc
+
 WORKDIR /DeepSpeech/native_client
-RUN make deepspeech
+RUN make NUM_PROCESSES=$(nproc) deepspeech
 
 WORKDIR /DeepSpeech
-RUN cd native_client/python && make bindings
+RUN cd native_client/python && make NUM_PROCESSES=$(nproc) bindings
 RUN pip3 install --upgrade native_client/python/dist/*.whl
 
-RUN cd native_client/ctcdecode && make bindings
+RUN cd native_client/ctcdecode && make NUM_PROCESSES=$(nproc) bindings
 RUN pip3 install --upgrade native_client/ctcdecode/dist/*.whl
 
-
 # << END Build and bind
 
-
-
-
 # Allow Python printing utf-8
 ENV PYTHONIOENCODING UTF-8
 
 # Build KenLM in /DeepSpeech/native_client/kenlm folder
 WORKDIR /DeepSpeech/native_client
-RUN rm -rf kenlm \
-    && git clone --depth 1 https://github.com/kpu/kenlm && cd kenlm \
-    && mkdir -p build \
-    && cd build \
-    && cmake .. \
-    && make -j 4
+RUN rm -rf kenlm && \
+	git clone https://github.com/kpu/kenlm && \
+	cd kenlm && \
+	git checkout 87e85e66c99ceff1fab2500a7c60c01da7315eec && \
+	mkdir -p build && \
+	cd build && \
+	cmake .. && \
+	make -j $(nproc)
 
 # Done
 WORKDIR /DeepSpeech
diff --git a/Dockerfile.train.tmpl b/Dockerfile.train.tmpl
@@ -0,0 +1,53 @@
+# Please refer to the TRAINING documentation, "Basic Dockerfile for training"
+
+FROM tensorflow/tensorflow:1.15.2-gpu-py3
+ENV DEBIAN_FRONTEND=noninteractive
+
+ENV DEEPSPEECH_REPO=#DEEPSPEECH_REPO#
+ENV DEEPSPEECH_SHA=#DEEPSPEECH_SHA#
+
+RUN apt-get update && apt-get install -y --no-install-recommends \
+        apt-utils \
+        bash-completion \
+        build-essential \
+        curl \
+        git \
+        git-lfs \
+        libbz2-dev \
+        locales \
+        python3-venv \
+        unzip \
+        wget
+
+# We need to remove it because it's breaking deepspeech install later with
+# weird errors about setuptools
+RUN apt-get purge -y python3-xdg
+
+# Install dependencies for audio augmentation
+RUN apt-get install -y --no-install-recommends libopus0 libsndfile1
+
+WORKDIR /
+RUN git lfs install
+RUN git clone $DEEPSPEECH_REPO
+
+WORKDIR /DeepSpeech
+RUN git checkout $DEEPSPEECH_SHA
+
+# Build CTC decoder first, to avoid clashes on incompatible versions upgrades
+RUN cd native_client/ctcdecode && make NUM_PROCESSES=$(nproc) bindings
+RUN pip3 install --upgrade native_client/ctcdecode/dist/*.whl
+
+# Prepare deps
+RUN pip3 install --upgrade pip==20.0.2 wheel==0.34.2 setuptools==46.1.3
+
+# Install DeepSpeech
+#  - No need for the decoder since we did it earlier
+#  - There is already correct TensorFlow GPU installed on the base image,
+#    we don't want to break that
+RUN DS_NODECODER=y DS_NOTENSORFLOW=y pip3 install --upgrade -e .
+
+# Tool to convert output graph for inference
+RUN python3 util/taskcluster.py --source tensorflow --branch r1.15 \
+        --artifact convert_graphdef_memmapped_format  --target .
+
+RUN ./bin/run-ldc93s1.sh
diff --git a/Makefile b/Makefile
@@ -0,0 +1,8 @@
+DEEPSPEECH_REPO ?= https://github.com/mozilla/DeepSpeech.git
+DEEPSPEECH_SHA  ?= origin/master
+
+Dockerfile%: Dockerfile%.tmpl
+	sed \
+		-e "s|#DEEPSPEECH_REPO#|$(DEEPSPEECH_REPO)|g" \
+		-e "s|#DEEPSPEECH_SHA#|$(DEEPSPEECH_SHA)|g" \
+		< $< > $@
diff --git a/README.rst b/README.rst
@@ -14,7 +14,7 @@ Project DeepSpeech
 
 DeepSpeech is an open source Speech-To-Text engine, using a model trained by machine learning techniques based on `Baidu's Deep Speech research paper <https://arxiv.org/abs/1412.5567>`_. Project DeepSpeech uses Google's `TensorFlow <https://www.tensorflow.org/>`_ to make the implementation easier.
 
-Documentation for installation, usage, and training models is available on `deepspeech.readthedocs.io <http://deepspeech.readthedocs.io/?badge=latest>`_.
+Documentation for installation, usage, and training models are available on `deepspeech.readthedocs.io <http://deepspeech.readthedocs.io/?badge=latest>`_.
 
 For the latest release, including pre-trained models and checkpoints, `see the latest release on GitHub <https://github.com/mozilla/DeepSpeech/releases/latest>`_.
 

diff --git a/bin/import_cv.py b/bin/import_cv.py
@@ -93,6 +93,7 @@ def one_sample(sample):
     else:
         # This one is good - keep it for the target CSV
         rows.append((wav_filename, file_size, label))
+        counter["imported_time"] += frames
     counter["all"] += 1
     counter["total_time"] += frames
     return (counter, rows)

diff --git a/bin/import_cv2.py b/bin/import_cv2.py
@@ -78,6 +78,7 @@ def one_sample(args):
     else:
         # This one is good - keep it for the target CSV
         rows.append((os.path.split(wav_filename)[-1], file_size, label, sample[2]))
+        counter["imported_time"] += frames
     counter["all"] += 1
     counter["total_time"] += frames
 

diff --git a/bin/import_lingua_libre.py b/bin/import_lingua_libre.py
@@ -91,6 +91,7 @@ def one_sample(sample):
     else:
         # This one is good - keep it for the target CSV
         rows.append((wav_filename, file_size, label))
+        counter["imported_time"] += frames
     counter["all"] += 1
     counter["total_time"] += frames
 

diff --git a/bin/import_m-ailabs.py b/bin/import_m-ailabs.py
@@ -91,6 +91,7 @@ def one_sample(sample):
     else:
         # This one is good - keep it for the target CSV
         rows.append((wav_filename, file_size, label))
+        counter["imported_time"] += frames
     counter["all"] += 1
     counter["total_time"] += frames
     return (counter, rows)