Skip to content

Commit

Permalink
whisper : add integer quantization support
Browse files Browse the repository at this point in the history
  • Loading branch information
ggerganov committed Apr 30, 2023
1 parent 0ccd674 commit 454d97d
Show file tree
Hide file tree
Showing 5 changed files with 238 additions and 96 deletions.
13 changes: 10 additions & 3 deletions examples/helpers.js
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,15 @@ function loadRemote(url, dst, size_mb, cbProgress, cbReady, cbCancel, cbPrint) {
var db = event.target.result;
var tx = db.transaction(['models'], 'readwrite');
var os = tx.objectStore('models');
var rq = os.put(data, url);

var rq = null;
try {
var rq = os.put(data, url);
} catch (e) {
cbPrint('loadRemote: failed to store "' + url + '" in the IndexedDB: \n' + e);
cbCancel();
return;
}

rq.onsuccess = function (event) {
cbPrint('loadRemote: "' + url + '" stored in the IndexedDB');
Expand Down Expand Up @@ -180,7 +188,6 @@ function loadRemote(url, dst, size_mb, cbProgress, cbReady, cbCancel, cbPrint) {

rq.onabort = function (event) {
cbPrint('loadRemote: failed to open IndexedDB: abort');

cbCancel();
};
}

6 changes: 3 additions & 3 deletions examples/whisper.wasm/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,9 @@ endif()
set_target_properties(${TARGET} PROPERTIES LINK_FLAGS " \
--bind \
-s USE_PTHREADS=1 \
-s PTHREAD_POOL_SIZE=8 \
-s INITIAL_MEMORY=1500MB \
-s TOTAL_MEMORY=1500MB \
-s PTHREAD_POOL_SIZE_STRICT=0 \
-s INITIAL_MEMORY=2000MB \
-s TOTAL_MEMORY=2000MB \
-s FORCE_FILESYSTEM=1 \
-s EXPORTED_RUNTIME_METHODS=\"['print', 'printErr', 'ccall', 'cwrap']\" \
${EXTRA_FLAGS} \
Expand Down
10 changes: 8 additions & 2 deletions examples/whisper.wasm/emscripten.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,12 @@ std::thread g_worker;

std::vector<struct whisper_context *> g_contexts(4, nullptr);

static inline int mpow2(int n) {
int p = 1;
while (p <= n) p *= 2;
return p/2;
}

EMSCRIPTEN_BINDINGS(whisper) {
emscripten::function("init", emscripten::optional_override([](const std::string & path_model) {
if (g_worker.joinable()) {
Expand Down Expand Up @@ -43,7 +49,7 @@ EMSCRIPTEN_BINDINGS(whisper) {
}
}));

emscripten::function("full_default", emscripten::optional_override([](size_t index, const emscripten::val & audio, const std::string & lang, bool translate) {
emscripten::function("full_default", emscripten::optional_override([](size_t index, const emscripten::val & audio, const std::string & lang, int nthreads, bool translate) {
if (g_worker.joinable()) {
g_worker.join();
}
Expand All @@ -66,7 +72,7 @@ EMSCRIPTEN_BINDINGS(whisper) {
params.print_special = false;
params.translate = translate;
params.language = whisper_is_multilingual(g_contexts[index]) ? lang.c_str() : "en";
params.n_threads = std::min(8, (int) std::thread::hardware_concurrency());
params.n_threads = std::min(nthreads, std::min(16, mpow2(std::thread::hardware_concurrency())));
params.offset_ms = 0;

std::vector<float> pcmf32;
Expand Down
105 changes: 89 additions & 16 deletions examples/whisper.wasm/index-tmpl.html
Original file line number Diff line number Diff line change
Expand Up @@ -40,21 +40,34 @@

Note that the computation is quite heavy and may take a few seconds to complete.<br>
The transcription results will be displayed in the text area below.<br><br>
<b>Important: your browser must support WASM SIMD instructions for this to work.</b>
<b>Important:</b>
<ul>
<li>your browser must support WASM SIMD instructions for this to work</li>
<li>quantized models are still in experimental stage (<a href="https://github.com/ggerganov/ggml/pull/27">more info</a>)</li>
<li>Firefox cannot load files larger than 256 MB - use Chrome instead</li>
</ul>

<br><br><hr>
<hr>

<div id="model">
Whisper model: <span id="model-whisper-status"></span>
Whisper models: <span id="model-whisper-status"></span><br><br>
<button id="fetch-whisper-tiny-en" onclick="loadWhisper('tiny.en')">tiny.en (75 MB)</button>
<button id="fetch-whisper-tiny" onclick="loadWhisper('tiny')">tiny (75 MB)</button>
<button id="fetch-whisper-base-en" onclick="loadWhisper('base.en')">base.en (142 MB)</button>
<button id="fetch-whisper-base" onclick="loadWhisper('base')">base (142 MB)</button>
<button id="fetch-whisper-small-en" onclick="loadWhisper('small.en')">small.en (466 MB)</button>
<button id="fetch-whisper-small" onclick="loadWhisper('small')">small (466 MB)</button>
<span id="fetch-whisper-progress"></span>

<input type="file" id="whisper-file" name="file" onchange="loadFile(event, 'whisper.bin')" />
<br><br>
Quantized models:<br><br>
<button id="fetch-whisper-base-en-q4_0" onclick="loadWhisper('base-en-q4_0')">base.en (4bit, 49 MB)</button>
<button id="fetch-whisper-base-q4_0" onclick="loadWhisper('base-q4_0')">base (4bit, 49 MB)</button>
<button id="fetch-whisper-small-en-q4_0" onclick="loadWhisper('small-en-q4_0')">small.en (4bit, 152 MB)</button>
<button id="fetch-whisper-small-q4_0" onclick="loadWhisper('small-q4_0')">small (4bit, 152 MB)</button><br>
<button id="fetch-whisper-medium-en-q4_0" onclick="loadWhisper('medium-en-q4_0')">medium.en (4bit, 469 MB)</button>
<button id="fetch-whisper-medium-q4_0" onclick="loadWhisper('medium-q4_0')">medium (4bit, 469 MB)</button>
<button id="fetch-whisper-large-q4_0" onclick="loadWhisper('large-q4_0')">large (4bit, 985 MB)</button>
<span id="fetch-whisper-progress"></span>
</div>

<br>
Expand Down Expand Up @@ -161,6 +174,12 @@
<option value="yi">Yiddish</option>
</select>
</td>
<!-- Slider to select number of threads between 1 and 16 -->
<td>
Threads:
<input type="range" id="threads" name="threads" min="1" max="16" value="8" onchange="changeThreads(this.value)" />
<span id="threads-value">8</span>
</td>
<td>
<button onclick="onProcess(false);">Transcribe</button>
</td>
Expand Down Expand Up @@ -263,11 +282,13 @@

Module.FS_createDataFile("/", fname, buf, true, true);

model_whisper = fname;
//model_whisper = fname;

document.getElementById('model-whisper-status').innerHTML = 'loaded "' + model_whisper + '"!';

printTextarea('storeFS: stored model: ' + fname + ' size: ' + buf.length);

document.getElementById('model').innerHTML = 'Model fetched: ' + model_whisper;
}

function loadFile(event, fname) {
Expand All @@ -292,6 +313,15 @@
document.getElementById('fetch-whisper-tiny' ).style.display = 'none';
document.getElementById('fetch-whisper-base' ).style.display = 'none';
document.getElementById('fetch-whisper-small' ).style.display = 'none';

document.getElementById('fetch-whisper-base-en-q4_0' ).style.display = 'none';
document.getElementById('fetch-whisper-base-q4_0' ).style.display = 'none';
document.getElementById('fetch-whisper-small-en-q4_0' ).style.display = 'none';
document.getElementById('fetch-whisper-small-q4_0' ).style.display = 'none';
document.getElementById('fetch-whisper-medium-en-q4_0').style.display = 'none';
document.getElementById('fetch-whisper-medium-q4_0' ).style.display = 'none';
document.getElementById('fetch-whisper-large-q4_0' ).style.display = 'none';

document.getElementById('whisper-file' ).style.display = 'none';
document.getElementById('model-whisper-status' ).innerHTML = 'loaded model: ' + file.name;
}
Expand All @@ -304,6 +334,14 @@
'base': 'https://whisper.ggerganov.com/ggml-model-whisper-base.bin',
'small.en': 'https://whisper.ggerganov.com/ggml-model-whisper-small.en.bin',
'small': 'https://whisper.ggerganov.com/ggml-model-whisper-small.bin',

'base-en-q4_0': 'https://whisper.ggerganov.com/ggml-model-whisper-base.en-q4_0.bin',
'base-q4_0': 'https://whisper.ggerganov.com/ggml-model-whisper-base-q4_0.bin',
'small-en-q4_0': 'https://whisper.ggerganov.com/ggml-model-whisper-small.en-q4_0.bin',
'small-q4_0': 'https://whisper.ggerganov.com/ggml-model-whisper-small-q4_0.bin',
'medium-en-q4_0':'https://whisper.ggerganov.com/ggml-model-whisper-medium.en-q4_0.bin',
'medium-q4_0': 'https://whisper.ggerganov.com/ggml-model-whisper-medium-q4_0.bin',
'large-q4_0': 'https://whisper.ggerganov.com/ggml-model-whisper-large-q4_0.bin',
};

let sizes = {
Expand All @@ -313,6 +351,14 @@
'base': 142,
'small.en': 466,
'small': 466,

'base-en-q4_0': 49,
'base-q4_0': 49,
'small-en-q4_0': 152,
'small-q4_0': 152,
'medium-en-q4_0': 469,
'medium-q4_0': 469,
'large-q4_0': 985,
};

let url = urls[model];
Expand All @@ -327,6 +373,15 @@
document.getElementById('fetch-whisper-tiny' ).style.display = 'none';
document.getElementById('fetch-whisper-base' ).style.display = 'none';
document.getElementById('fetch-whisper-small' ).style.display = 'none';

document.getElementById('fetch-whisper-base-en-q4_0' ).style.display = 'none';
document.getElementById('fetch-whisper-base-q4_0' ).style.display = 'none';
document.getElementById('fetch-whisper-small-en-q4_0' ).style.display = 'none';
document.getElementById('fetch-whisper-small-q4_0' ).style.display = 'none';
document.getElementById('fetch-whisper-medium-en-q4_0').style.display = 'none';
document.getElementById('fetch-whisper-medium-q4_0' ).style.display = 'none';
document.getElementById('fetch-whisper-large-q4_0' ).style.display = 'none';

document.getElementById('whisper-file' ).style.display = 'none';
document.getElementById('model-whisper-status' ).innerHTML = 'loading model: ' + model;

Expand All @@ -337,12 +392,22 @@

cbCancel = function() {
var el;

el = document.getElementById('fetch-whisper-tiny-en' ); if (el) el.style.display = 'inline-block';
el = document.getElementById('fetch-whisper-base-en' ); if (el) el.style.display = 'inline-block';
el = document.getElementById('fetch-whisper-small-en'); if (el) el.style.display = 'inline-block';
el = document.getElementById('fetch-whisper-tiny' ); if (el) el.style.display = 'inline-block';
el = document.getElementById('fetch-whisper-base' ); if (el) el.style.display = 'inline-block';
el = document.getElementById('fetch-whisper-small' ); if (el) el.style.display = 'inline-block';

el = document.getElementById('fetch-whisper-base-en-q4_0' ); if (el) el.style.display = 'inline-block';
el = document.getElementById('fetch-whisper-base-q4_0' ); if (el) el.style.display = 'inline-block';
el = document.getElementById('fetch-whisper-small-en-q4_0' ); if (el) el.style.display = 'inline-block';
el = document.getElementById('fetch-whisper-small-q4_0' ); if (el) el.style.display = 'inline-block';
el = document.getElementById('fetch-whisper-medium-en-q4_0'); if (el) el.style.display = 'inline-block';
el = document.getElementById('fetch-whisper-medium-q4_0' ); if (el) el.style.display = 'inline-block';
el = document.getElementById('fetch-whisper-large-q4_0' ); if (el) el.style.display = 'inline-block';

el = document.getElementById('whisper-file' ); if (el) el.style.display = 'inline-block';
el = document.getElementById('model-whisper-status' ); if (el) el.innerHTML = '';
};
Expand All @@ -354,7 +419,8 @@
// audio file
//

const kMaxAudio_s = 120;
const kMaxAudio_s = 30*60;
const kMaxRecording_s = 2*60;
const kSampleRate = 16000;

window.AudioContext = window.AudioContext || window.webkitAudioContext;
Expand Down Expand Up @@ -423,7 +489,7 @@
doRecording = false;
}

// record up to kMaxAudio_s seconds of audio from the microphone
// record up to kMaxRecording_s seconds of audio from the microphone
// check if doRecording is false every 1000 ms and stop recording if so
// update progress information
function startRecording() {
Expand Down Expand Up @@ -479,9 +545,9 @@
printTextarea('js: audio recorded, size: ' + audio.length);

// truncate to first 30 seconds
if (audio.length > kMaxAudio_s*kSampleRate) {
audio = audio.slice(0, kMaxAudio_s*kSampleRate);
printTextarea('js: truncated audio to first ' + kMaxAudio_s + ' seconds');
if (audio.length > kMaxRecording_s*kSampleRate) {
audio = audio.slice(0, kMaxRecording_s*kSampleRate);
printTextarea('js: truncated audio to first ' + kMaxRecording_s + ' seconds');
}
setAudio(audio);
});
Expand Down Expand Up @@ -509,24 +575,31 @@
});
}

document.getElementById('progress-bar').style.width = (100*(Date.now() - startTime)/1000/kMaxAudio_s) + '%';
document.getElementById('progress-text').innerHTML = (100*(Date.now() - startTime)/1000/kMaxAudio_s).toFixed(0) + '%';
document.getElementById('progress-bar').style.width = (100*(Date.now() - startTime)/1000/kMaxRecording_s) + '%';
document.getElementById('progress-text').innerHTML = (100*(Date.now() - startTime)/1000/kMaxRecording_s).toFixed(0) + '%';
}, 1000);

printTextarea('js: recording ...');

setTimeout(function() {
if (doRecording) {
printTextarea('js: recording stopped after ' + kMaxAudio_s + ' seconds');
printTextarea('js: recording stopped after ' + kMaxRecording_s + ' seconds');
stopRecording();
}
}, kMaxAudio_s*1000);
}, kMaxRecording_s*1000);
}

//
// transcribe
//

var nthreads = 8;

function changeThreads(value) {
nthreads = value;
document.getElementById('threads-value').innerHTML = nthreads;
}

function onProcess(translate) {
if (!instance) {
instance = Module.init('whisper.bin');
Expand All @@ -553,7 +626,7 @@
printTextarea('');

setTimeout(function() {
var ret = Module.full_default(instance, audio, document.getElementById('language').value, translate);
var ret = Module.full_default(instance, audio, document.getElementById('language').value, nthreads, translate);
console.log('js: full_default returned: ' + ret);
if (ret) {
printTextarea("js: whisper returned: " + ret);
Expand Down
Loading

0 comments on commit 454d97d

Please sign in to comment.