diff --git a/speech/README.md b/speech/README.md index 4390f45b43..b986e85d50 100644 --- a/speech/README.md +++ b/speech/README.md @@ -39,18 +39,28 @@ __Usage:__ `node recognize.js --help` ``` Commands: - sync Detects speech in a local audio file. - sync-gcs Detects speech in an audio file located in a Google Cloud Storage bucket. - sync-words Detects speech in a local audio file with word time offset. - async Creates a job to detect speech in a local audio file, and waits for the job to complete. - async-gcs Creates a job to detect speech in an audio file located in a Google Cloud Storage bucket, - and waits for the job to complete. - async-gcs-words Creates a job to detect speech with word time offset in an audio file located in a Google - Cloud Storage bucket, and waits for the job to complete. - stream Detects speech in a local audio file by streaming it to the Speech API. - listen Detects speech in a microphone input stream. This command requires that you have SoX - installed and available in your $PATH. See - https://www.npmjs.com/package/node-record-lpcm16#dependencies + sync Detects speech in a local audio file. + sync-gcs Detects speech in an audio file located in a Google Cloud Storage bucket. + sync-words Detects speech in a local audio file with word time offset. + sync-punctuation Detects speech in a local audio file with automatic punctuation enabled. + sync-video Detects speech in a local audio file with original media type set to VIDEO. + sync-metadata Detects speech in a local audio file with audio metadata paramters. + async Creates a job to detect speech in a local audio file, and waits for the job to + complete. + async-gcs Creates a job to detect speech in an audio file located in a Google Cloud Storage + bucket, and waits for the job to complete. + async-gcs-words Creates a job to detect speech with word time offset in an audio file located in a + Google Cloud Storage bucket, and waits for the job to complete. + async-gcs-punctuation Creates a job to detect speech automatic punctuation enabled in an audio file located + in a Google Cloud Storage bucket, and waits for the job to complete. + async-gcs-video Creates a job to detect speech with original media type set to VIDEO in an audio file + located in a Google Cloud Storage bucket, and waits for the job to complete. + async-gcs-metadata Creates a job to detect speech with audio metadata paramaters in an audio file located + in a Google Cloud Storage bucket, and waits for the job to complete. + stream Detects speech in a local audio file by streaming it to the Speech API. + listen Detects speech in a microphone input stream. This command requires that you have SoX + installed and available in your $PATH. See + https://www.npmjs.com/package/node-record-lpcm16#dependencies Options: --help Show help [boolean] diff --git a/speech/recognize.js b/speech/recognize.js index 6b5338f856..b4422a38c0 100644 --- a/speech/recognize.js +++ b/speech/recognize.js @@ -29,7 +29,7 @@ function syncRecognize (filename, encoding, sampleRateHertz, languageCode) { const fs = require('fs'); const Speech = require('@google-cloud/speech'); - // Instantiates a client + // Creates a client const speech = Speech(); // The path to the local file on which to perform speech recognition, e.g. /path/to/audio.raw @@ -60,11 +60,15 @@ function syncRecognize (filename, encoding, sampleRateHertz, languageCode) { // Detects speech in the audio file speech.recognize(request) - .then((data) => { - const response = data[0]; - const transcription = response.results.map(result => - result.alternatives[0].transcript).join('\n'); - console.log(`Transcription: `, transcription); + .then((results) => { + const response = results[0]; + response.results.forEach((result) => { + console.log('Result:'); + result.alternatives.forEach((alternative) => { + const confidence = (alternative.confidence * 100).toPrecision(3); + console.log(` Transcript (${confidence}% confidence): ${alternative.transcript}`); + }); + }); }) .catch((err) => { console.error('ERROR:', err); @@ -77,7 +81,7 @@ function syncRecognizeGCS (gcsUri, encoding, sampleRateHertz, languageCode) { // Imports the Google Cloud client library const Speech = require('@google-cloud/speech'); - // Instantiates a client + // Creates a client const speech = Speech(); // The Google Cloud Storage URI of the file on which to perform speech recognition, e.g. gs://my-bucket/audio.raw @@ -108,11 +112,15 @@ function syncRecognizeGCS (gcsUri, encoding, sampleRateHertz, languageCode) { // Detects speech in the audio file speech.recognize(request) - .then((data) => { - const response = data[0]; - const transcription = response.results.map(result => - result.alternatives[0].transcript).join('\n'); - console.log(`Transcription: `, transcription); + .then((results) => { + const response = results[0]; + response.results.forEach((result) => { + console.log('Result:'); + result.alternatives.forEach((alternative) => { + const confidence = (alternative.confidence * 100).toPrecision(3); + console.log(` Transcript (${confidence}% confidence): ${alternative.transcript}`); + }); + }); }) .catch((err) => { console.error('ERROR:', err); @@ -126,7 +134,7 @@ function syncRecognizeWords (filename, encoding, sampleRateHertz, languageCode) const fs = require('fs'); const Speech = require('@google-cloud/speech'); - // Instantiates a client + // Creates a client const speech = Speech(); // The path to the local file on which to perform speech recognition, e.g. /path/to/audio.raw @@ -158,19 +166,23 @@ function syncRecognizeWords (filename, encoding, sampleRateHertz, languageCode) // Detects speech in the audio file speech.recognize(request) - .then((data) => { - const response = data[0]; + .then((results) => { + const response = results[0]; response.results.forEach((result) => { - console.log(`Transcription: `, result.alternatives[0].transcript); - result.alternatives[0].words.forEach((wordInfo) => { - // NOTE: If you have a time offset exceeding 2^32 seconds, use the - // wordInfo.{x}Time.seconds.high to calculate seconds. - const startSecs = `${wordInfo.startTime.seconds}` + `.` + - (wordInfo.startTime.nanos / 100000000); - const endSecs = `${wordInfo.endTime.seconds}` + `.` + - (wordInfo.endTime.nanos / 100000000); - console.log(`Word: ${wordInfo.word}`); - console.log(`\t ${startSecs} secs - ${endSecs} secs`); + console.log('Result:'); + result.alternatives.forEach((alternative) => { + const confidence = (alternative.confidence * 100).toPrecision(3); + console.log(` Transcript (${confidence}% confidence): ${alternative.transcript}`); + alternative.words.forEach((wordInfo) => { + // NOTE: If you have a time offset exceeding 2^32 seconds, use the + // wordInfo.{x}Time.seconds.high to calculate seconds. + const startSecs = `${wordInfo.startTime.seconds}` + `.` + + (wordInfo.startTime.nanos / 100000000); + const endSecs = `${wordInfo.endTime.seconds}` + `.` + + (wordInfo.endTime.nanos / 100000000); + console.log(` Word: ${wordInfo.word}`); + console.log(` \t ${startSecs} secs - ${endSecs} secs`); + }); }); }); }) @@ -180,13 +192,185 @@ function syncRecognizeWords (filename, encoding, sampleRateHertz, languageCode) // [END speech_sync_recognize_words] } +function syncRecognizePunctuation (filename, encoding, sampleRateHertz, languageCode) { + // [START sync_recognize_punctuation] + // Imports the Google Cloud client library + const fs = require('fs'); + const Speech = require('@google-cloud/speech'); + + // Creates a client + const speech = Speech.v1_1beta1(); + + // The path to the local file on which to perform speech recognition, e.g. /path/to/audio.raw + // const filename = '/path/to/audio.raw'; + + // The encoding of the audio file, e.g. 'LINEAR16' + // const encoding = 'LINEAR16'; + + // The sample rate of the audio file in hertz, e.g. 16000 + // const sampleRateHertz = 16000; + + // The BCP-47 language code to use, e.g. 'en-US' + // const languageCode = 'en-US'; + + const config = { + encoding: encoding, + sampleRateHertz: sampleRateHertz, + languageCode: languageCode, + enableAutomaticPunctuation: true + }; + const audio = { + content: fs.readFileSync(filename).toString('base64') + }; + + const request = { + config: config, + audio: audio + }; + + // Detects speech in the audio file + speech.recognize(request) + .then((results) => { + const response = results[0]; + response.results.forEach((result) => { + console.log('Result:'); + result.alternatives.forEach((alternative) => { + const confidence = (alternative.confidence * 100).toPrecision(3); + console.log(` Transcript (${confidence}% confidence): ${alternative.transcript}`); + }); + }); + }) + .catch((err) => { + console.error('ERROR:', err); + }); + // [END sync_recognize_punctuation] +} + +function syncRecognizeVideo (filename, encoding, sampleRateHertz, languageCode) { + // [START sync_recognize_video] + // Imports the Google Cloud client library + const fs = require('fs'); + const Speech = require('@google-cloud/speech'); + + // Creates a client + const speech = Speech.v1_1beta1(); + + // The path to the local file on which to perform speech recognition, e.g. /path/to/Google_Gnome.raw + // const filename = '/path/to/Google_Gnome.raw'; + + // The encoding of the audio file, e.g. 'LINEAR16' + // const encoding = 'LINEAR16'; + + // The sample rate of the audio file in hertz, e.g. 16000 + // const sampleRateHertz = 16000; + + // The BCP-47 language code to use, e.g. 'en-US' + // const languageCode = 'en-US'; + + + const config = { + encoding: encoding, + sampleRateHertz: sampleRateHertz, + languageCode: languageCode, + metadata: { + originalMediaType: Speech.v1_1beta1.types.RecognitionMetadata.OriginalMediaType.Video + } + }; + const audio = { + content: fs.readFileSync(filename).toString('base64') + }; + + const request = { + config: config, + audio: audio + }; + + // Detects speech in the audio file + speech.recognize(request) + .then((results) => { + const response = results[0]; + response.results.forEach((result) => { + console.log('Result:'); + result.alternatives.forEach((alternative) => { + const confidence = (alternative.confidence * 100).toPrecision(3); + console.log(` Transcript (${confidence}% confidence): ${alternative.transcript}`); + }); + }); + }) + .catch((err) => { + console.error('ERROR:', err); + }); + // [END sync_recognize_video] +} + +function syncRecognizeMetadata (filename, encoding, sampleRateHertz, languageCode) { + // [START sync_recognize_metadata] + // Imports the Google Cloud client library + const fs = require('fs'); + const Speech = require('@google-cloud/speech'); + + // Creates a client + const speech = Speech.v1_1beta1(); + + // The path to the local file on which to perform speech recognition, e.g. /path/to/Google_Gnome.raw + // const filename = '/path/to/Google_Gnome.raw'; + + // The encoding of the audio file, e.g. 'LINEAR16' + // const encoding = 'LINEAR16'; + + // The sample rate of the audio file in hertz, e.g. 16000 + // const sampleRateHertz = 16000; + + // The BCP-47 language code to use, e.g. 'en-US' + // const languageCode = 'en-US'; + + const config = { + encoding: encoding, + sampleRateHertz: sampleRateHertz, + languageCode: languageCode, + metadata: { + audioTopic: 'electronics', + interactionType: Speech.v1_1beta1.types.RecognitionMetadata.InteractionType.DISCUSSION, + microphoneDistance: Speech.v1_1beta1.types.RecognitionMetadata.MicrophoneDistance.MIDFIELD, + numberOfSpeakers: Speech.v1_1beta1.types.RecognitionMetadata.NumberOfSpeakers.MULTIPLE_SPEAKERS, + originalMediaType: Speech.v1_1beta1.types.RecognitionMetadata.OriginalMediaType.Video, + recordingDeviceType: Speech.v1_1beta1.types.RecognitionMetadata.RecordingDeviceType.OTHER_OUTDOOR_DEVICE, + } + }; + const audio = { + content: fs.readFileSync(filename).toString('base64') + }; + + const request = { + config: config, + audio: audio + }; + + // Detects speech in the audio file + speech.recognize(request) + .then((results) => { + const response = results[0]; + response.results.forEach((result) => { + console.log('Result:'); + result.alternatives.forEach((alternative) => { + const confidence = (alternative.confidence * 100).toPrecision(3); + console.log(` Transcript (${confidence}% confidence): ${alternative.transcript}`); + }); + }); + }) + .catch((err) => { + console.error('ERROR:', err); + }); + // [END sync_recognize_metadata] +} + function asyncRecognize (filename, encoding, sampleRateHertz, languageCode) { // [START speech_async_recognize] // Imports the Google Cloud client library const Speech = require('@google-cloud/speech'); const fs = require('fs'); - // Instantiates a client + // Creates a client const speech = Speech(); // The path to the local file on which to perform speech recognition, e.g. /path/to/audio.raw @@ -218,17 +402,21 @@ function asyncRecognize (filename, encoding, sampleRateHertz, languageCode) { // Detects speech in the audio file. This creates a recognition job that you // can wait for now, or get its result later. speech.longRunningRecognize(request) - .then((data) => { - const response = data[0]; + .then((results) => { + const response = results[0]; const operation = response; // Get a Promise representation of the final result of the job return operation.promise(); }) - .then((data) => { - const response = data[0]; - const transcription = response.results.map(result => - result.alternatives[0].transcript).join('\n'); - console.log(`Transcription: ${transcription}`); + .then((results) => { + const response = results[0]; + response.results.forEach((result) => { + console.log('Result:'); + result.alternatives.forEach((alternative) => { + const confidence = (alternative.confidence * 100).toPrecision(3); + console.log(` Transcript (${confidence}% confidence): ${alternative.transcript}`); + }); + }); }) .catch((err) => { console.error('ERROR:', err); @@ -241,7 +429,7 @@ function asyncRecognizeGCS (gcsUri, encoding, sampleRateHertz, languageCode) { // Imports the Google Cloud client library const Speech = require('@google-cloud/speech'); - // Instantiates a client + // Creates a client const speech = Speech(); // The Google Cloud Storage URI of the file on which to perform speech recognition, e.g. gs://my-bucket/audio.raw @@ -274,16 +462,20 @@ function asyncRecognizeGCS (gcsUri, encoding, sampleRateHertz, languageCode) { // Detects speech in the audio file. This creates a recognition job that you // can wait for now, or get its result later. speech.longRunningRecognize(request) - .then((data) => { - const operation = data[0]; + .then((results) => { + const operation = results[0]; // Get a Promise representation of the final result of the job return operation.promise(); }) - .then((data) => { - const response = data[0]; - const transcription = response.results.map(result => - result.alternatives[0].transcript).join('\n'); - console.log(`Transcription: ${transcription}`); + .then((results) => { + const response = results[0]; + response.results.forEach((result) => { + console.log('Result:'); + result.alternatives.forEach((alternative) => { + const confidence = (alternative.confidence * 100).toPrecision(3); + console.log(` Transcript (${confidence}% confidence): ${alternative.transcript}`); + }); + }); }) .catch((err) => { console.error('ERROR:', err); @@ -296,7 +488,7 @@ function asyncRecognizeGCSWords (gcsUri, encoding, sampleRateHertz, languageCode // Imports the Google Cloud client library const Speech = require('@google-cloud/speech'); - // Instantiates a client + // Creates a client const speech = Speech(); // The Google Cloud Storage URI of the file on which to perform speech recognition, e.g. gs://my-bucket/audio.raw @@ -330,24 +522,28 @@ function asyncRecognizeGCSWords (gcsUri, encoding, sampleRateHertz, languageCode // Detects speech in the audio file. This creates a recognition job that you // can wait for now, or get its result later. speech.longRunningRecognize(request) - .then((data) => { - const operation = data[0]; + .then((results) => { + const operation = results[0]; // Get a Promise representation of the final result of the job return operation.promise(); }) - .then((data) => { - const response = data[0]; + .then((results) => { + const response = results[0]; response.results.forEach((result) => { - console.log(`Transcription: ${result.alternatives[0].transcript}`); - result.alternatives[0].words.forEach((wordInfo) => { - // NOTE: If you have a time offset exceeding 2^32 seconds, use the - // wordInfo.{x}Time.seconds.high to calculate seconds. - const startSecs = `${wordInfo.startTime.seconds}` + `.` + - (wordInfo.startTime.nanos / 100000000); - const endSecs = `${wordInfo.endTime.seconds}` + `.` + - (wordInfo.endTime.nanos / 100000000); - console.log(`Word: ${wordInfo.word}`); - console.log(`\t ${startSecs} secs - ${endSecs} secs`); + console.log('Result:'); + result.alternatives.forEach((alternative) => { + const confidence = (alternative.confidence * 100).toPrecision(3); + console.log(` Transcript (${confidence}% confidence): ${alternative.transcript}`); + alternative.words.forEach((wordInfo) => { + // NOTE: If you have a time offset exceeding 2^32 seconds, use the + // wordInfo.{x}Time.seconds.high to calculate seconds. + const startSecs = `${wordInfo.startTime.seconds}` + `.` + + (wordInfo.startTime.nanos / 100000000); + const endSecs = `${wordInfo.endTime.seconds}` + `.` + + (wordInfo.endTime.nanos / 100000000); + console.log(` Word: ${wordInfo.word}`); + console.log(` \t ${startSecs} secs - ${endSecs} secs`); + }); }); }); }) @@ -357,6 +553,195 @@ function asyncRecognizeGCSWords (gcsUri, encoding, sampleRateHertz, languageCode // [END speech_async_recognize_gcs_words] } +function asyncRecognizeGCSPunctuation (gcsUri, encoding, sampleRateHertz, languageCode) { + // [START speech_async_recognize_gcs_punctuation] + // Imports the Google Cloud client library + const Speech = require('@google-cloud/speech'); + + // Creates a client + const speech = Speech.v1_1beta1(); + + // The Google Cloud Storage URI of the file on which to perform speech recognition, e.g. gs://my-bucket/audio.raw + // const gcsUri = 'gs://my-bucket/audio.raw'; + + // The encoding of the audio file, e.g. 'LINEAR16' + // const encoding = 'LINEAR16'; + + // The sample rate of the audio file in hertz, e.g. 16000 + // const sampleRateHertz = 16000; + + // The BCP-47 language code to use, e.g. 'en-US' + // const languageCode = 'en-US'; + + const config = { + encoding: encoding, + sampleRateHertz: sampleRateHertz, + languageCode: languageCode, + enableAutomaticPunctuation: true + }; + + const audio = { + uri: gcsUri + }; + + const request = { + config: config, + audio: audio + }; + + // Detects speech in the audio file. This creates a recognition job that you + // can wait for now, or get its result later. + speech.longRunningRecognize(request) + .then((results) => { + const operation = results[0]; + // Get a Promise representation of the final result of the job + return operation.promise(); + }) + .then((results) => { + const response = results[0]; + response.results.forEach((result) => { + console.log('Result:'); + result.alternatives.forEach((alternative) => { + const confidence = (alternative.confidence * 100).toPrecision(3); + console.log(` Transcript (${confidence}% confidence): ${alternative.transcript}`); + }); + }); + }) + .catch((err) => { + console.error('ERROR:', err); + }); + // [END speech_async_recognize_gcs_punctuation] +} + +function asyncRecognizeGCSVideo (gcsUri, encoding, sampleRateHertz, languageCode) { + // [START speech_async_recognize_gcs_video] + // Imports the Google Cloud client library + const Speech = require('@google-cloud/speech'); + + // Creates a client + const speech = Speech.v1_1beta1(); + + // The Google Cloud Storage URI of the file on which to perform speech recognition, e.g. gs://my-bucket/audio.raw + // const gcsUri = 'gs://my-bucket/audio.raw'; + + // The encoding of the audio file, e.g. 'LINEAR16' + // const encoding = 'LINEAR16'; + + // The sample rate of the audio file in hertz, e.g. 16000 + // const sampleRateHertz = 16000; + + // The BCP-47 language code to use, e.g. 'en-US' + // const languageCode = 'en-US'; + + const config = { + encoding: encoding, + sampleRateHertz: sampleRateHertz, + languageCode: languageCode, + metadata: { + originalMediaType: Speech.v1_1beta1.types.RecognitionMetadata.OriginalMediaType.Video + } + }; + + const audio = { + uri: gcsUri + }; + + const request = { + config: config, + audio: audio + }; + + // Detects speech in the audio file. This creates a recognition job that you + // can wait for now, or get its result later. + speech.longRunningRecognize(request) + .then((results) => { + const operation = results[0]; + // Get a Promise representation of the final result of the job + return operation.promise(); + }) + .then((results) => { + const response = results[0]; + response.results.forEach((result) => { + console.log('Result:'); + result.alternatives.forEach((alternative) => { + const confidence = (alternative.confidence * 100).toPrecision(3); + console.log(` Transcript (${confidence}% confidence): ${alternative.transcript}`); + }); + }); + }) + .catch((err) => { + console.error('ERROR:', err); + }); + // [END speech_async_recognize_gcs_video] +} + +function asyncRecognizeGCSMetadata (gcsUri, encoding, sampleRateHertz, languageCode) { + // [START speech_async_recognize_gcs_metadata] + // Imports the Google Cloud client library + const Speech = require('@google-cloud/speech'); + + // Creates a client + const speech = Speech.v1_1beta1(); + + // The Google Cloud Storage URI of the file on which to perform speech recognition, e.g. gs://my-bucket/audio.raw + // const gcsUri = 'gs://my-bucket/audio.raw'; + + // The encoding of the audio file, e.g. 'LINEAR16' + // const encoding = 'LINEAR16'; + + // The sample rate of the audio file in hertz, e.g. 16000 + // const sampleRateHertz = 16000; + + // The BCP-47 language code to use, e.g. 'en-US' + // const languageCode = 'en-US'; + + const config = { + encoding: encoding, + sampleRateHertz: sampleRateHertz, + languageCode: languageCode, + metadata: { + audioTopic: 'electronics', + interactionType: Speech.v1_1beta1.types.RecognitionMetadata.InteractionType.DISCUSSION, + microphoneDistance: Speech.v1_1beta1.types.RecognitionMetadata.MicrophoneDistance.MIDFIELD, + numberOfSpeakers: Speech.v1_1beta1.types.RecognitionMetadata.NumberOfSpeakers.MULTIPLE_SPEAKERS, + originalMediaType: Speech.v1_1beta1.types.RecognitionMetadata.OriginalMediaType.Video, + recordingDeviceType: Speech.v1_1beta1.types.RecognitionMetadata.RecordingDeviceType.OTHER_OUTDOOR_DEVICE, + } + }; + + const audio = { + uri: gcsUri + }; + + const request = { + config: config, + audio: audio + }; + + // Detects speech in the audio file. This creates a recognition job that you + // can wait for now, or get its result later. + speech.longRunningRecognize(request) + .then((results) => { + const operation = results[0]; + // Get a Promise representation of the final result of the job + return operation.promise(); + }) + .then((results) => { + const response = results[0]; + response.results.forEach((result) => { + console.log('Result:'); + result.alternatives.forEach((alternative) => { + const confidence = (alternative.confidence * 100).toPrecision(3); + console.log(` Transcript (${confidence}% confidence): ${alternative.transcript}`); + }); + }); + }) + .catch((err) => { + console.error('ERROR:', err); + }); + // [END speech_async_recognize_gcs_metadata] +} + function streamingRecognize (filename, encoding, sampleRateHertz, languageCode) { // [START speech_streaming_recognize] const fs = require('fs'); @@ -364,7 +749,7 @@ function streamingRecognize (filename, encoding, sampleRateHertz, languageCode) // Imports the Google Cloud client library const Speech = require('@google-cloud/speech'); - // Instantiates a client + // Creates a client const speech = Speech(); // The path to the local file on which to perform speech recognition, e.g. /path/to/audio.raw @@ -392,8 +777,13 @@ function streamingRecognize (filename, encoding, sampleRateHertz, languageCode) const recognizeStream = speech.streamingRecognize(request) .on('error', console.error) .on('data', (data) => { - console.log( - `Transcription: ${data.results[0].alternatives[0].transcript}`); + data.results.forEach((result) => { + console.log('Result:'); + result.alternatives.forEach((alternative) => { + const confidence = (alternative.confidence * 100).toPrecision(3); + console.log(` Transcript (${confidence}% confidence): ${alternative.transcript}`); + }); + }); }); // Stream an audio file from disk to the Speech API, e.g. "./resources/audio.raw" @@ -408,7 +798,7 @@ function streamingMicRecognize (encoding, sampleRateHertz, languageCode) { // Imports the Google Cloud client library const Speech = require('@google-cloud/speech'); - // Instantiates a client + // Creates a client const speech = Speech(); // The encoding of the audio file, e.g. 'LINEAR16' @@ -475,6 +865,24 @@ const cli = require(`yargs`) {}, (opts) => syncRecognizeWords(opts.filename, opts.encoding, opts.sampleRateHertz, opts.languageCode) ) + .command( + `sync-punctuation `, + `Detects speech in a local audio file with automatic punctuation enabled.`, + {}, + (opts) => syncRecognizePunctuation(opts.filename, opts.encoding, opts.sampleRateHertz, opts.languageCode) + ) + .command( + `sync-video `, + `Detects speech in a local audio file with original media type set to VIDEO.`, + {}, + (opts) => syncRecognizeVideo(opts.filename, opts.encoding, opts.sampleRateHertz, opts.languageCode) + ) + .command( + `sync-metadata `, + `Detects speech in a local audio file with audio metadata paramters.`, + {}, + (opts) => syncRecognizeMetadata(opts.filename, opts.encoding, opts.sampleRateHertz, opts.languageCode) + ) .command( `async `, `Creates a job to detect speech in a local audio file, and waits for the job to complete.`, @@ -489,10 +897,28 @@ const cli = require(`yargs`) ) .command( `async-gcs-words `, - `Creates a job to detect speech with word time offset in an audio file located in a Google Cloud Storage bucket, and waits for the job to complete.`, + `Creates a job to detect speech with word time offset in an audio file located in a Google Cloud Storage bucket, and waits for the job to complete.`, {}, (opts) => asyncRecognizeGCSWords(opts.gcsUri, opts.encoding, opts.sampleRateHertz, opts.languageCode) ) + .command( + `async-gcs-punctuation `, + `Creates a job to detect speech automatic punctuation enabled in an audio file located in a Google Cloud Storage bucket, and waits for the job to complete.`, + {}, + (opts) => asyncRecognizeGCSPunctuation(opts.gcsUri, opts.encoding, opts.sampleRateHertz, opts.languageCode) + ) + .command( + `async-gcs-video `, + `Creates a job to detect speech with original media type set to VIDEO in an audio file located in a Google Cloud Storage bucket, and waits for the job to complete.`, + {}, + (opts) => asyncRecognizeGCSVideo(opts.gcsUri, opts.encoding, opts.sampleRateHertz, opts.languageCode) + ) + .command( + `async-gcs-metadata `, + `Creates a job to detect speech with audio metadata paramaters in an audio file located in a Google Cloud Storage bucket, and waits for the job to complete.`, + {}, + (opts) => asyncRecognizeGCSMetadata(opts.gcsUri, opts.encoding, opts.sampleRateHertz, opts.languageCode) + ) .command( `stream `, `Detects speech in a local audio file by streaming it to the Speech API.`, diff --git a/speech/resources/Google_Gnome.wav b/speech/resources/Google_Gnome.wav new file mode 100644 index 0000000000..2f497b7fbe Binary files /dev/null and b/speech/resources/Google_Gnome.wav differ diff --git a/speech/system-test/recognize.test.js b/speech/system-test/recognize.test.js index 7c729d5b53..770e853312 100644 --- a/speech/system-test/recognize.test.js +++ b/speech/system-test/recognize.test.js @@ -29,11 +29,14 @@ const cmd = `node recognize.js`; const cwd = path.join(__dirname, `..`); const filename = `audio.raw`; const filepath = path.join(__dirname, `../resources/${filename}`); -const text = `how old is the Brooklyn Bridge`; +const videofilename = `Google_Gnome.wav`; +const videofilepath = path.join(__dirname, `../resources/${videofilename}`); +const text = `ow old is the Brooklyn Bridge`; test.before(async () => { const [bucket] = await storage.createBucket(bucketName); await bucket.upload(filepath); + await bucket.upload(videofilepath); }); test.after.always(async () => { @@ -45,38 +48,85 @@ test.after.always(async () => { test(`should run sync recognize`, async (t) => { const output = await runAsync(`${cmd} sync ${filepath}`, cwd); - t.true(output.includes(`Transcription: ${text}`)); + t.regex(output, new RegExp(`Transcript`)); + t.regex(output, new RegExp(text)); }); test(`should run sync recognize on a GCS file`, async (t) => { const output = await runAsync(`${cmd} sync-gcs gs://${bucketName}/${filename}`, cwd); - t.true(output.includes(`Transcription: ${text}`)); + t.regex(output, new RegExp(`Transcript`)); + t.regex(output, new RegExp(text)); }); test(`should run sync recognize with word time offset`, async (t) => { const output = await runAsync(`${cmd} sync-words ${filepath}`, cwd); - t.true(output.includes(`Transcription: ${text}`)); + t.regex(output, new RegExp(`Transcript`)); + t.regex(output, new RegExp(text)); t.true(new RegExp(`\\d+\\.\\d+ secs - \\d+\\.\\d+ secs`).test(output)); }); +test(`should run sync recognize with punctuation`, async (t) => { + const output = await runAsync(`${cmd} sync-punctuation ${filepath}`, cwd); + t.regex(output, new RegExp(`Transcript`)); + t.regex(output, new RegExp(text)); +}); + +test(`should run sync recognize with video`, async (t) => { + const output = await runAsync(`${cmd} sync-video ${videofilepath}`, cwd); + t.regex(output, new RegExp(`Transcript`)); + t.regex(output, new RegExp(`OK Google`)); + t.regex(output, new RegExp(`eat this lemon`)); + t.regex(output, new RegExp(`everything is made up`)); +}); + +test(`should run sync recognize with metadata`, async (t) => { + const output = await runAsync(`${cmd} sync-metadata ${filepath}`, cwd); + t.regex(output, new RegExp(`Transcript`)); + t.regex(output, new RegExp(`how old is the Brooklyn Bridge`)); +}); + test(`should run async recognize on a local file`, async (t) => { const output = await runAsync(`${cmd} async ${filepath}`, cwd); - t.true(output.includes(`Transcription: ${text}`)); + t.regex(output, new RegExp(`Transcript`)); + t.regex(output, new RegExp(text)); }); test(`should run async recognize on a GCS file`, async (t) => { const output = await runAsync(`${cmd} async-gcs gs://${bucketName}/${filename}`, cwd); - t.true(output.includes(`Transcription: ${text}`)); + t.regex(output, new RegExp(`Transcript`)); + t.regex(output, new RegExp(text)); }); test(`should run async recognize on a GCS file with word time offset`, async (t) => { const output = await runAsync(`${cmd} async-gcs-words gs://${bucketName}/${filename}`, cwd); - t.true(output.includes(`Transcription: ${text}`)); + t.regex(output, new RegExp(`Transcript`)); + t.regex(output, new RegExp(text)); // Check for word time offsets t.true(new RegExp(`\\d+\\.\\d+ secs - \\d+\\.\\d+ secs`).test(output)); }); +test(`should run async recognize on a GCS file with punctuation`, async (t) => { + const output = await runAsync(`${cmd} async-gcs-punctuation gs://${bucketName}/${filename}`, cwd); + t.regex(output, new RegExp(`Transcript`)); + t.regex(output, new RegExp(text)); +}); + +test(`should run async recognize on a GCS file with video`, async (t) => { + const output = await runAsync(`${cmd} async-gcs-video gs://${bucketName}/${videofilename}`, cwd); + t.regex(output, new RegExp(`Transcript`)); + t.regex(output, new RegExp(`OK Google`)); + t.regex(output, new RegExp(`eat this lemon`)); + t.regex(output, new RegExp(`everything is made up`)); +}); + +test(`should run async recognize on a GCS file with metadata`, async (t) => { + const output = await runAsync(`${cmd} async-gcs-metadata gs://${bucketName}/${filename}`, cwd); + t.regex(output, new RegExp(`Transcript`)); + t.regex(output, new RegExp('how old is the Brooklyn Bridge')); +}); + test(`should run streaming recognize`, async (t) => { const output = await runAsync(`${cmd} stream ${filepath}`, cwd); - t.true(output.includes(`Transcription: ${text}`)); + t.regex(output, new RegExp(`Transcript`)); + t.regex(output, new RegExp(text)); });