Skip to content

Commit bf5ae9c

Browse files
author
Alexandre Lissy
committed
Fix #3299: Build KenLM on CI
1 parent 34a62bd commit bf5ae9c

File tree

51 files changed

+1246
-42
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

51 files changed

+1246
-42
lines changed

.gitmodules

+3
Original file line numberDiff line numberDiff line change
@@ -5,3 +5,6 @@
55
[submodule "tensorflow"]
66
path = tensorflow
77
url = https://github.com/mozilla/tensorflow.git
8+
[submodule "kenlm"]
9+
path = kenlm
10+
url = https://github.com/kpu/kenlm

data/smoke_test/vocab.txt

+501
Large diffs are not rendered by default.

kenlm

Submodule kenlm added at 689a25a

native_client/BUILD

+6-1
Original file line numberDiff line numberDiff line change
@@ -225,11 +225,16 @@ cc_binary(
225225
"@com_google_absl//absl/types:optional",
226226
"@boost//:program_options",
227227
],
228+
linkstatic = 1,
228229
linkopts = [
229230
"-lm",
230231
"-ldl",
231232
"-pthread",
232-
],
233+
] + select({
234+
# ARMv7: error: Android 5.0 and later only support position-independent executables (-fPIE).
235+
"//tensorflow:android": ["-fPIE -pie"],
236+
"//conditions:default": [],
237+
}),
233238
)
234239

235240
cc_binary(

taskcluster/.shared.yml

+32
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,13 @@ training:
1111
deepspeech:
1212
packages_xenial:
1313
apt: 'make build-essential gfortran git libblas-dev liblapack-dev libsox-dev libmagic-dev libgsm1-dev libltdl-dev libpng-dev python python-dev zlib1g-dev'
14+
kenlm:
15+
packages_xenial:
16+
apt: 'apt-get -qq update && apt-get -qq -y install cmake realpath build-essential libboost-dev wget software-properties-common zlib1g-dev libbz2-dev liblzma-dev libboost-program-options-dev libboost-system-dev libboost-thread-dev libboost-test-dev'
17+
packages_android:
18+
apt: 'apt-get -qq update && DEBIAN_FRONTEND=noninteractive apt-get -qq -y install cmake coreutils build-essential wget software-properties-common unzip'
19+
packages_win:
20+
pacman: 'pacman --noconfirm -S cmake mingw-w64-x86_64-boost mingw-w64-x86_64-toolchain zlib bzip2 xz'
1421
tensorflow:
1522
packages_xenial:
1623
apt: 'apt-get -qq update && apt-get -qq -y install realpath build-essential python-virtualenv python-dev python-pip libblas-dev liblapack-dev gfortran wget software-properties-common pixz zip zlib1g-dev unzip'
@@ -125,6 +132,31 @@ system:
125132
win:
126133
url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.pyenv.win.8/artifacts/public/pyenv.tar.gz'
127134
namespace: 'project.deepspeech.pyenv.win.8'
135+
kenlm:
136+
android_arm64_cpu:
137+
url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.kenlm.689a25aae9171b3ea46bd80d4189f540f35f1a02.4.android-arm64/artifacts/public/kenlm.tar.gz"
138+
namespace: "project.deepspeech.kenlm.689a25aae9171b3ea46bd80d4189f540f35f1a02.4.android-arm64"
139+
android_armv7_cpu:
140+
url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.kenlm.689a25aae9171b3ea46bd80d4189f540f35f1a02.4.android-armv7/artifacts/public/kenlm.tar.gz"
141+
namespace: "project.deepspeech.kenlm.689a25aae9171b3ea46bd80d4189f540f35f1a02.4.android-armv7"
142+
android_x86_64_cpu:
143+
url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.kenlm.689a25aae9171b3ea46bd80d4189f540f35f1a02.4.android-x86_64/artifacts/public/kenlm.tar.gz"
144+
namespace: "project.deepspeech.kenlm.689a25aae9171b3ea46bd80d4189f540f35f1a02.4.android-x86_64"
145+
linux_amd64_cpu:
146+
url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.kenlm.689a25aae9171b3ea46bd80d4189f540f35f1a02.4.cpu/artifacts/public/kenlm.tar.gz"
147+
namespace: "project.deepspeech.kenlm.689a25aae9171b3ea46bd80d4189f540f35f1a02.4.cpu"
148+
linux_arm64_cpu:
149+
url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.kenlm.689a25aae9171b3ea46bd80d4189f540f35f1a02.4.arm64/artifacts/public/kenlm.tar.gz"
150+
namespace: "project.deepspeech.kenlm.689a25aae9171b3ea46bd80d4189f540f35f1a02.4.arm64"
151+
linux_rpi3_cpu:
152+
url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.kenlm.689a25aae9171b3ea46bd80d4189f540f35f1a02.4.rpi3/artifacts/public/kenlm.tar.gz"
153+
namespace: "project.deepspeech.kenlm.689a25aae9171b3ea46bd80d4189f540f35f1a02.4.rpi3"
154+
darwin_amd64_cpu:
155+
url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.kenlm.689a25aae9171b3ea46bd80d4189f540f35f1a02.4.osx/artifacts/public/kenlm.tar.gz"
156+
namespace: "project.deepspeech.kenlm.689a25aae9171b3ea46bd80d4189f540f35f1a02.4.osx"
157+
win_amd64_cpu:
158+
url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.kenlm.689a25aae9171b3ea46bd80d4189f540f35f1a02.4.win/artifacts/public/kenlm.tar.gz"
159+
namespace: "project.deepspeech.kenlm.689a25aae9171b3ea46bd80d4189f540f35f1a02.4.win"
128160
swig:
129161
repo: "https://github.com/lissyx/swig"
130162
sha1: "1a4c14945012f1282c2eddc174fb7674d5295de8"
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
build:
2+
template_file: generic_tc_caching-linux-opt-base.tyml
3+
cache:
4+
artifact_url: ${system.kenlm.android_arm64_cpu.url}
5+
artifact_namespace: ${system.kenlm.android_arm64_cpu.namespace}
6+
system_config:
7+
>
8+
${kenlm.packages_android.apt}
9+
docker_image: "ubuntu:20.04"
10+
scripts:
11+
setup: "taskcluster/kenlm_tc-setup.sh --android-arm64"
12+
build: "taskcluster/kenlm_tc-build.sh --android-arm64"
13+
package: "taskcluster/kenlm_tc-package.sh"
14+
workerType: "${docker.dsBuild}"
15+
metadata:
16+
name: "KenLM Android ARM64 CPU"
17+
description: "Building KenLM for Android/ARM64, CPU only, optimized version"
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
build:
2+
template_file: generic_tc_caching-linux-opt-base.tyml
3+
cache:
4+
artifact_url: ${system.kenlm.android_armv7_cpu.url}
5+
artifact_namespace: ${system.kenlm.android_armv7_cpu.namespace}
6+
system_config:
7+
>
8+
${kenlm.packages_android.apt}
9+
docker_image: "ubuntu:20.04"
10+
scripts:
11+
setup: "taskcluster/kenlm_tc-setup.sh --android-armv7"
12+
build: "taskcluster/kenlm_tc-build.sh --android-armv7"
13+
package: "taskcluster/kenlm_tc-package.sh"
14+
workerType: "${docker.dsBuild}"
15+
metadata:
16+
name: "KenLM Android ARMv7 CPU"
17+
description: "Building KenLM for Android/ARMv7, CPU only, optimized version"
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
build:
2+
template_file: generic_tc_caching-linux-opt-base.tyml
3+
cache:
4+
artifact_url: ${system.kenlm.android_x86_64_cpu.url}
5+
artifact_namespace: ${system.kenlm.android_x86_64_cpu.namespace}
6+
system_config:
7+
>
8+
${kenlm.packages_android.apt}
9+
docker_image: "ubuntu:20.04"
10+
scripts:
11+
setup: "taskcluster/kenlm_tc-setup.sh --android-x86_64"
12+
build: "taskcluster/kenlm_tc-build.sh --android-x86_64"
13+
package: "taskcluster/kenlm_tc-package.sh"
14+
workerType: "${docker.dsBuild}"
15+
metadata:
16+
name: "KenLM Android x86_64 CPU"
17+
description: "Building KenLM for Android/x86_64, CPU only, optimized version"
+13
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
build:
2+
template_file: generic_tc_caching-darwin-opt-base.tyml
3+
cache:
4+
artifact_url: ${system.kenlm.darwin_amd64_cpu.url}
5+
artifact_namespace: ${system.kenlm.darwin_amd64_cpu.namespace}
6+
scripts:
7+
setup: "taskcluster/kenlm_tc-setup.sh --macos-amd64"
8+
build: "taskcluster/kenlm_tc-build.sh --macos-amd64"
9+
package: "taskcluster/kenlm_tc-package.sh"
10+
workerType: ${macOS.dsBuild}
11+
metadata:
12+
name: "KenLM macOS AMD64 CPU"
13+
description: "Building KenLM for macOS/AMD64, CPU only, optimized version"
+16
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
build:
2+
template_file: generic_tc_caching-linux-opt-base.tyml
3+
cache:
4+
artifact_url: ${system.kenlm.linux_amd64_cpu.url}
5+
artifact_namespace: ${system.kenlm.linux_amd64_cpu.namespace}
6+
system_config:
7+
>
8+
${kenlm.packages_xenial.apt}
9+
scripts:
10+
setup: "taskcluster/kenlm_tc-setup.sh --linux-amd64"
11+
build: "taskcluster/kenlm_tc-build.sh --linux-amd64"
12+
package: "taskcluster/kenlm_tc-package.sh"
13+
workerType: "${docker.dsBuild}"
14+
metadata:
15+
name: "KenLM Linux AMD64 CPU"
16+
description: "Building KenLM for Linux/AMD64, CPU only, optimized version"
+16
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
build:
2+
template_file: generic_tc_caching-linux-opt-base.tyml
3+
cache:
4+
artifact_url: ${system.kenlm.linux_arm64_cpu.url}
5+
artifact_namespace: ${system.kenlm.linux_arm64_cpu.namespace}
6+
system_setup:
7+
>
8+
apt-get -qq update && apt-get -qq -y install cmake wget pixz bzip2 multistrap
9+
scripts:
10+
setup: "taskcluster/kenlm_tc-setup.sh --linux-arm64"
11+
build: "taskcluster/kenlm_tc-build.sh --linux-arm64"
12+
package: "taskcluster/kenlm_tc-package.sh"
13+
workerType: "${docker.dsBuild}"
14+
metadata:
15+
name: "KenLM Linux ARM64 CPU"
16+
description: "Building KenLM for Linux/ARM64, CPU only, optimized version"
+16
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
build:
2+
template_file: generic_tc_caching-linux-opt-base.tyml
3+
cache:
4+
artifact_url: ${system.kenlm.linux_rpi3_cpu.url}
5+
artifact_namespace: ${system.kenlm.linux_rpi3_cpu.namespace}
6+
system_setup:
7+
>
8+
apt-get -qq update && apt-get -qq -y install cmake wget pixz bzip2 multistrap
9+
scripts:
10+
setup: "taskcluster/kenlm_tc-setup.sh --linux-rpi3"
11+
build: "taskcluster/kenlm_tc-build.sh --linux-rpi3"
12+
package: "taskcluster/kenlm_tc-package.sh"
13+
workerType: "${docker.dsBuild}"
14+
metadata:
15+
name: "KenLM Linux RPi3 CPU"
16+
description: "Building KenLM for Linux/RPi3, CPU only, optimized version"
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
[General]
2+
arch=arm64
3+
noauth=true
4+
unpack=true
5+
debootstrap=Debian
6+
aptsources=Debian
7+
cleanup=true
8+
9+
[Debian]
10+
packages=libc6 libc6-dev libstdc++-7-dev linux-libc-dev libboost-dev zlib1g-dev libbz2-dev liblzma-dev libboost-program-options-dev libboost-system-dev libboost-thread-dev libboost-test-dev
11+
source=http://deb.debian.org/debian
12+
keyring=debian-archive-keyring
13+
components=main
14+
suite=buster
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
[General]
2+
arch=armhf
3+
noauth=true
4+
unpack=true
5+
debootstrap=Raspbian
6+
aptsources=Raspbian
7+
cleanup=true
8+
9+
[Raspbian]
10+
packages=libc6 libc6-dev libffi-dev libstdc++-6-dev linux-libc-dev libboost-dev zlib1g-dev libbz2-dev liblzma-dev libboost-program-options-dev libboost-system-dev libboost-thread-dev libboost-test-dev
11+
source=http://raspbian.raspberrypi.org/raspbian/
12+
keyring=raspbian-archive-keyring
13+
components=main
14+
suite=buster

taskcluster/kenlm_tc-build.sh

+93
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
#!/bin/bash
2+
3+
set -xe
4+
5+
target=$1
6+
7+
source $(dirname $0)/tc-all-vars.sh
8+
9+
MAKE_TARGETS="lmplz filter build_binary"
10+
CMAKE_BUILD="cmake --build . -j ${DS_CPU_COUNT} --target ${MAKE_TARGETS}"
11+
CMAKE_DEFINES="-DFORCE_STATIC=ON"
12+
case "${target}" in
13+
--android-arm64)
14+
export Boost_DIR="${DS_ROOT_TASK}/DeepSpeech/ds/ndk_21_boost_1.72.0/libs/arm64-v8a/cmake/Boost-1.72.0/"
15+
CMAKE_DEFINES="$CMAKE_DEFINES -DCMAKE_SYSTEM_NAME=Android"
16+
CMAKE_DEFINES="$CMAKE_DEFINES -DCMAKE_SYSTEM_VERSION=21"
17+
CMAKE_DEFINES="$CMAKE_DEFINES -DCMAKE_ANDROID_ARCH_ABI=arm64-v8a"
18+
CMAKE_DEFINES="$CMAKE_DEFINES -DCMAKE_ANDROID_NDK=${ANDROID_NDK_HOME}"
19+
CMAKE_DEFINES="$CMAKE_DEFINES -DCMAKE_ANDROID_STL_TYPE=c++_static"
20+
CMAKE_DEFINES="$CMAKE_DEFINES -DTHREADS_PTHREAD_ARG=2"
21+
;;
22+
23+
--android-armv7)
24+
export Boost_DIR="${DS_ROOT_TASK}/DeepSpeech/ds/ndk_21_boost_1.72.0/libs/armeabi-v7a/cmake/Boost-1.72.0/"
25+
CMAKE_DEFINES="$CMAKE_DEFINES -DCMAKE_SYSTEM_NAME=Android"
26+
CMAKE_DEFINES="$CMAKE_DEFINES -DCMAKE_SYSTEM_VERSION=21"
27+
CMAKE_DEFINES="$CMAKE_DEFINES -DCMAKE_ANDROID_ARCH_ABI=armeabi-v7a"
28+
CMAKE_DEFINES="$CMAKE_DEFINES -DCMAKE_ANDROID_NDK=${ANDROID_NDK_HOME}"
29+
CMAKE_DEFINES="$CMAKE_DEFINES -DCMAKE_ANDROID_STL_TYPE=c++_static"
30+
CMAKE_DEFINES="$CMAKE_DEFINES -DTHREADS_PTHREAD_ARG=2"
31+
;;
32+
33+
--android-x86_64)
34+
export Boost_DIR="${DS_ROOT_TASK}/DeepSpeech/ds/ndk_21_boost_1.72.0/libs/x86_64/cmake/Boost-1.72.0/"
35+
CMAKE_DEFINES="$CMAKE_DEFINES -DCMAKE_SYSTEM_NAME=Android"
36+
CMAKE_DEFINES="$CMAKE_DEFINES -DCMAKE_SYSTEM_VERSION=21"
37+
CMAKE_DEFINES="$CMAKE_DEFINES -DCMAKE_ANDROID_ARCH_ABI=x86_64"
38+
CMAKE_DEFINES="$CMAKE_DEFINES -DCMAKE_ANDROID_NDK=${ANDROID_NDK_HOME}"
39+
CMAKE_DEFINES="$CMAKE_DEFINES -DCMAKE_ANDROID_STL_TYPE=c++_static"
40+
CMAKE_DEFINES="$CMAKE_DEFINES -DTHREADS_PTHREAD_ARG=2"
41+
;;
42+
43+
--macos-amd64)
44+
export KENLM_BREW="${TASKCLUSTER_ORIG_TASKDIR}/homebrew-kenlm"
45+
export PATH=${KENLM_BREW}/bin:$PATH
46+
CMAKE_DEFINES="$CMAKE_DEFINES -DZLIB_LIBRARY=${KENLM_BREW}/opt/zlib/lib/libz.a"
47+
#CMAKE_DEFINES="$CMAKE_DEFINES -DBZIP2_LIBRARIES=${KENLM_BREW}/opt/bzip2/lib/libz2.a"
48+
CMAKE_DEFINES="$CMAKE_DEFINES -DTHREADS_PTHREAD_ARG=2"
49+
;;
50+
51+
--linux-arm64)
52+
TOOLCHAIN=${DS_ROOT_TASK}/DeepSpeech/ds/gcc-linaro-7.2.1-2017.11-x86_64_aarch64-linux-gnu/bin/aarch64-linux-gnu
53+
CMAKE_DEFINES="$CMAKE_DEFINES -DCMAKE_SYSTEM_NAME=Linux"
54+
CMAKE_DEFINES="$CMAKE_DEFINES -DCMAKE_SYSTEM_PROCESSOR=aarch64"
55+
CMAKE_DEFINES="$CMAKE_DEFINES -DCMAKE_C_COMPILER=${TOOLCHAIN}-gcc"
56+
CMAKE_DEFINES="$CMAKE_DEFINES -DCMAKE_CXX_COMPILER=${TOOLCHAIN}-g++"
57+
CMAKE_DEFINES="$CMAKE_DEFINES -DCMAKE_SYSROOT=${DS_ROOT_TASK}/DeepSpeech/ds/multistrap-armbian64-buster/"
58+
CMAKE_DEFINES="$CMAKE_DEFINES -DTHREADS_PTHREAD_ARG=2"
59+
CMAKE_BUILD="make -j ${DS_CPU_COUNT} ${MAKE_TARGETS}"
60+
;;
61+
62+
--linux-rpi3)
63+
TOOLCHAIN=${DS_ROOT_TASK}/DeepSpeech/ds/gcc-linaro-7.2.1-2017.11-x86_64_arm-linux-gnueabihf/bin/arm-linux-gnueabihf
64+
CMAKE_DEFINES="$CMAKE_DEFINES -DCMAKE_SYSTEM_NAME=Linux"
65+
CMAKE_DEFINES="$CMAKE_DEFINES -DCMAKE_SYSTEM_PROCESSOR=arm"
66+
CMAKE_DEFINES="$CMAKE_DEFINES -DCMAKE_C_COMPILER=${TOOLCHAIN}-gcc"
67+
CMAKE_DEFINES="$CMAKE_DEFINES -DCMAKE_CXX_COMPILER=${TOOLCHAIN}-g++"
68+
CMAKE_DEFINES="$CMAKE_DEFINES -DCMAKE_SYSROOT=${DS_ROOT_TASK}/DeepSpeech/ds/multistrap-raspbian-buster/"
69+
CMAKE_DEFINES="$CMAKE_DEFINES -DTHREADS_PTHREAD_ARG=2"
70+
CMAKE_BUILD="make -j ${DS_CPU_COUNT} ${MAKE_TARGETS}"
71+
;;
72+
73+
--linux-amd64)
74+
CMAKE_BUILD="make -j ${DS_CPU_COUNT} ${MAKE_TARGETS}"
75+
;;
76+
77+
--windows-amd64)
78+
export Boost_DIR="$TASKCLUSTER_TASK_DIR/boost_1_72_0/lib64-msvc-14.2/cmake/Boost-1.72.0/"
79+
export CMAKE_GENERATOR='Visual Studio 16 2019'
80+
export PATH=$TASKCLUSTER_TASK_DIR/cmake-3.18.2-win64-x64/bin/:$PATH
81+
CMAKE_DEFINES="$CMAKE_DEFINES -A x64"
82+
CMAKE_DEFINES="$CMAKE_DEFINES -DLIBLZMA_LIBRARY=$TASKCLUSTER_TASK_DIR/xz-5.2.5/bin_x86-64/liblzma.a -DLIBLZMA_INCLUDE_DIR=$TASKCLUSTER_TASK_DIR/xz-5.2.5/include/"
83+
#CMAKE_DEFINES="$CMAKE_DEFINES -DBZIP2_LIBRARIES=$TASKCLUSTER_TASK_DIR/bzip2-dev-1.0.8.0-win-x64/libbz2-static.lib -DBZIP2_INCLUDE_DIR=$TASKCLUSTER_TASK_DIR/bzip2-dev-1.0.8.0-win-x64/"
84+
;;
85+
esac
86+
87+
mkdir ${DS_ROOT_TASK}/DeepSpeech/ds/kenlm/build/
88+
89+
pushd ${DS_ROOT_TASK}/DeepSpeech/ds/kenlm/build/
90+
export EIGEN3_ROOT=${DS_ROOT_TASK}/DeepSpeech/ds/eigen-3.3.7
91+
cmake ${CMAKE_DEFINES} ../
92+
${CMAKE_BUILD}
93+
popd

taskcluster/kenlm_tc-package.sh

+14
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
#!/bin/bash
2+
3+
set -xe
4+
5+
source $(dirname $0)/tc-all-vars.sh
6+
7+
mkdir -p ${TASKCLUSTER_ARTIFACTS} || true
8+
9+
cd ${DS_ROOT_TASK}/DeepSpeech/ds/kenlm/build/bin/ && \
10+
tar \
11+
-czf ${TASKCLUSTER_ARTIFACTS}/kenlm.tar.gz \
12+
build_binary${PLATFORM_EXE_SUFFIX} \
13+
filter${PLATFORM_EXE_SUFFIX} \
14+
lmplz${PLATFORM_EXE_SUFFIX}

0 commit comments

Comments
 (0)