Skip to content

Commit bc88eb1

Browse files
committedNov 25, 2022
examples : add "command" tool (ggerganov#171)
1 parent b8ce25d commit bc88eb1

File tree

9 files changed

+735
-51
lines changed

9 files changed

+735
-51
lines changed
 

‎.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ build-sanitize-thread/
1313

1414
main
1515
stream
16+
command
1617
bench
1718
sync.sh
1819
compile_commands.json

‎Makefile

+4-1
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ libwhisper.so: ggml.o whisper.o
134134
$(CXX) $(CXXFLAGS) -shared -o libwhisper.so ggml.o whisper.o $(LDFLAGS)
135135

136136
clean:
137-
rm -f *.o main stream bench libwhisper.a libwhisper.so
137+
rm -f *.o main stream command bench libwhisper.a libwhisper.so
138138

139139
#
140140
# Examples
@@ -149,6 +149,9 @@ main: examples/main/main.cpp ggml.o whisper.o
149149
stream: examples/stream/stream.cpp ggml.o whisper.o
150150
$(CXX) $(CXXFLAGS) examples/stream/stream.cpp ggml.o whisper.o -o stream $(CC_SDL) $(LDFLAGS)
151151

152+
command: examples/command/command.cpp ggml.o whisper.o
153+
$(CXX) $(CXXFLAGS) examples/command/command.cpp ggml.o whisper.o -o command $(CC_SDL) $(LDFLAGS)
154+
152155
bench: examples/bench/bench.cpp ggml.o whisper.o
153156
$(CXX) $(CXXFLAGS) examples/bench/bench.cpp ggml.o whisper.o -o bench $(LDFLAGS)
154157

‎README.md

+26-25
Original file line numberDiff line numberDiff line change
@@ -98,26 +98,27 @@ c++ -I. -I./examples -O3 -std=c++11 -pthread examples/main/main.cpp whisper.o gg
9898
usage: ./main [options] file0.wav file1.wav ...
9999

100100
options:
101-
-h, --help show this help message and exit
102-
-t N, --threads N number of threads to use during computation (default: 4)
103-
-p N, --processors N number of processors to use during computation (default: 1)
104-
-ot N, --offset-t N time offset in milliseconds (default: 0)
105-
-on N, --offset-n N segment index offset (default: 0)
106-
-mc N, --max-context N maximum number of text context tokens to store (default: max)
107-
-ml N, --max-len N maximum segment length in characters (default: 0)
108-
-wt N, --word-thold N word timestamp probability threshold (default: 0.010000)
109-
-v, --verbose verbose output
110-
--translate translate from source language to english
111-
-otxt, --output-txt output result in a text file
112-
-ovtt, --output-vtt output result in a vtt file
113-
-osrt, --output-srt output result in a srt file
114-
-owts, --output-words output script for generating karaoke video
115-
-ps, --print_special print special tokens
116-
-pc, --print_colors print colors
117-
-nt, --no_timestamps do not print timestamps
118-
-l LANG, --language LANG spoken language (default: en)
119-
-m FNAME, --model FNAME model path (default: models/ggml-base.en.bin)
120-
-f FNAME, --file FNAME input WAV file path
101+
-h, --help [default] show this help message and exit
102+
-t N, --threads N [4 ] number of threads to use during computation
103+
-p N, --processors N [1 ] number of processors to use during computation
104+
-ot N, --offset-t N [0 ] time offset in milliseconds
105+
-on N, --offset-n N [0 ] segment index offset
106+
-d N, --duration N [0 ] duration of audio to process in milliseconds
107+
-mc N, --max-context N [-1 ] maximum number of text context tokens to store
108+
-ml N, --max-len N [0 ] maximum segment length in characters
109+
-wt N, --word-thold N [0.01 ] word timestamp probability threshold
110+
-su, --speed-up [false ] speed up audio by x2 (reduced accuracy)
111+
-tr, --translate [false ] translate from source language to english
112+
-otxt, --output-txt [false ] output result in a text file
113+
-ovtt, --output-vtt [false ] output result in a vtt file
114+
-osrt, --output-srt [false ] output result in a srt file
115+
-owts, --output-words [false ] output script for generating karaoke video
116+
-ps, --print-special [false ] print special tokens
117+
-pc, --print-colors [false ] print colors
118+
-nt, --no-timestamps [true ] do not print timestamps
119+
-l LANG, --language LANG [en ] spoken language
120+
-m FNAME, --model FNAME [models/ggml-base.en.bin] model path
121+
-f FNAME, --file FNAME [ ] input WAV file path
121122

122123
bash ./models/download-ggml-model.sh base.en
123124
Downloading ggml model base.en ...
@@ -149,13 +150,13 @@ whisper_model_load: n_text_layer = 6
149150
whisper_model_load: n_mels = 80
150151
whisper_model_load: f16 = 1
151152
whisper_model_load: type = 2
152-
whisper_model_load: mem_required = 670.00 MB
153153
whisper_model_load: adding 1607 extra tokens
154-
whisper_model_load: ggml ctx size = 140.60 MB
155-
whisper_model_load: memory size = 22.83 MB
156-
whisper_model_load: model size = 140.54 MB
154+
whisper_model_load: mem_required = 506.00 MB
155+
whisper_model_load: ggml ctx size = 140.60 MB
156+
whisper_model_load: memory size = 22.83 MB
157+
whisper_model_load: model size = 140.54 MB
157158

158-
system_info: n_threads = 4 / 10 | AVX2 = 0 | AVX512 = 0 | NEON = 1 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 |
159+
system_info: n_threads = 4 / 10 | AVX = 0 | AVX2 = 0 | AVX512 = 0 | NEON = 1 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 |
159160

160161
main: processing 'samples/jfk.wav' (176000 samples, 11.0 sec), 4 threads, 1 processors, lang = en, task = transcribe, timestamps = 1 ...
161162

‎examples/CMakeLists.txt

+1
Original file line numberDiff line numberDiff line change
@@ -24,5 +24,6 @@ if (EMSCRIPTEN)
2424
else()
2525
add_subdirectory(main)
2626
add_subdirectory(stream)
27+
add_subdirectory(command)
2728
add_subdirectory(bench)
2829
endif()

‎examples/command/CMakeLists.txt

+7
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
if (WHISPER_SUPPORT_SDL2)
2+
# command
3+
set(TARGET command)
4+
add_executable(${TARGET} command.cpp)
5+
target_include_directories(${TARGET} PRIVATE ${SDL2_INCLUDE_DIRS})
6+
target_link_libraries(${TARGET} PRIVATE whisper ${SDL2_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT})
7+
endif ()

‎examples/command/README.md

+26
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
# command
2+
3+
This is a basic Voice Assistant example that accepts voice commands from the microphone.
4+
More info is available in [issue #171](https://github.com/ggerganov/whisper.cpp/issues/171).
5+
6+
```java
7+
# Run with default arguments and small model
8+
./command -m ./models/ggml-small.en.bin -t 8
9+
10+
# On Raspberry Pi, use tiny or base models + "-ac 768" for better performance
11+
./bin/command -m ../models/ggml-tiny.en.bin -ac 768
12+
```
13+
14+
## Building
15+
16+
The `command` tool depends on SDL2 library to capture audio from the microphone. You can build it like this:
17+
18+
```bash
19+
# Install SDL2 on Linux
20+
sudo apt-get install libsdl2-dev
21+
22+
# Install SDL2 on Mac OS
23+
brew install sdl2
24+
25+
make command
26+
```

0 commit comments

Comments
 (0)