From 98530e6ef21d7cb8e2d719ef8d84cc52c4cb490d Mon Sep 17 00:00:00 2001 From: nirupa-kumar Date: Mon, 11 Jun 2018 09:43:38 -0700 Subject: [PATCH 1/4] Add streaming microphone sample for Speech --- speech/cloud-client/pom.xml | 45 ++++ .../example/speech/MicStreamRecognize.java | 193 ++++++++++++++++++ .../example/speech/MicStreamRecognizeIT.java | 62 ++++++ 3 files changed, 300 insertions(+) create mode 100644 speech/cloud-client/src/main/java/com/example/speech/MicStreamRecognize.java create mode 100644 speech/cloud-client/src/test/java/com/example/speech/MicStreamRecognizeIT.java diff --git a/speech/cloud-client/pom.xml b/speech/cloud-client/pom.xml index 1eabb83f7fd..bf3bdcd1893 100644 --- a/speech/cloud-client/pom.xml +++ b/speech/cloud-client/pom.xml @@ -57,6 +57,22 @@ 0.40 test + + + com.google.protobuf + protobuf-java + 3.2.0 + + + com.google.protobuf + protobuf-java-util + 3.2.0 + + + com.google.guava + guava + 21.0 + @@ -135,5 +151,34 @@ + + + MicStreamRecognize + + + MicStreamRecognize + + + + + + org.codehaus.mojo + exec-maven-plugin + 1.6.0 + + + + java + + + + + com.example.speech.MicStreamRecognize + false + + + + + diff --git a/speech/cloud-client/src/main/java/com/example/speech/MicStreamRecognize.java b/speech/cloud-client/src/main/java/com/example/speech/MicStreamRecognize.java new file mode 100644 index 00000000000..df3b45dcb5e --- /dev/null +++ b/speech/cloud-client/src/main/java/com/example/speech/MicStreamRecognize.java @@ -0,0 +1,193 @@ +/* + * Copyright 2018 Google Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.example.speech; + +//Imports the Google Cloud client library + +import com.google.api.gax.rpc.ApiStreamObserver; +import com.google.api.gax.rpc.BidiStreamingCallable; +import com.google.cloud.speech.v1p1beta1.RecognitionConfig; +import com.google.cloud.speech.v1p1beta1.RecognitionConfig.AudioEncoding; +import com.google.cloud.speech.v1p1beta1.SpeechClient; +import com.google.cloud.speech.v1p1beta1.SpeechRecognitionAlternative; +import com.google.cloud.speech.v1p1beta1.StreamingRecognitionConfig; +import com.google.cloud.speech.v1p1beta1.StreamingRecognitionResult; +import com.google.cloud.speech.v1p1beta1.StreamingRecognizeRequest; +import com.google.cloud.speech.v1p1beta1.StreamingRecognizeResponse; +import com.google.common.util.concurrent.SettableFuture; +import com.google.protobuf.ByteString; + +import javax.sound.sampled.AudioFormat; +import javax.sound.sampled.AudioSystem; +import javax.sound.sampled.DataLine; +import javax.sound.sampled.TargetDataLine; +import java.io.ByteArrayOutputStream; +import java.io.IOException; + +import java.util.List; + +/** + * Google Cloud SpeechToText API sample application. Example usage: mvn package exec:java + * -Dexec.mainClass='com.example.speech.MicStreamRecognize' -Dexec.args="micstreamrecognize " + */ +public class MicStreamRecognize { + + // [START speech_mic_streaming] + /** + * Demonstrates using the Speech to Text client to convert Microphone streaming speech to text. + * + * @throws Exception on SpeechToTextClient Errors. + */ + + private static final int BYTES_PER_BUFFER = 6400; //buffer size in bytes + + + public static void main(String... args) throws Exception { + if (args.length < 1) { + System.out.println("Usage:"); + System.out.printf( + "\tjava %s \"\" \"\"\n" + + "Command:\n" + + "\tmicstreamrecognize\n" + + "Duration(optional):\n\tIn seconds.(Maximum of 60 seconds)\n", + MicStreamRecognize.class.getCanonicalName()); + return; + } + String command = args[0]; + Integer duration = args.length > 1 ? Integer.parseInt(args[1]) : 10; + + // Use command to invoke transcription + if (command.equals("micstreamrecognize")) { + micRecognize(duration); + } + } + + /** + * Performs streaming speech recognition on microphone audio data. + * + * @param duration the time duration for the microphone streaming + */ + public static void micRecognize(Integer duration) throws InterruptedException, IOException { + //Microphone audio format specification + AudioFormat format = new AudioFormat(16000, 16, 1, true, false); + DataLine.Info info = new DataLine.Info(TargetDataLine.class, format); + // checks if system supports the data line + if (!AudioSystem.isLineSupported(info)) { + System.out.println("Line not supported"); + System.exit(0); + } + byte[] data = new byte[BYTES_PER_BUFFER]; + + // Instantiates a client with GOOGLE_APPLICATION_CREDENTIALS + try (SpeechClient speech = SpeechClient.create()) { + + //Configure request with microphone audio + RecognitionConfig recConfig = RecognitionConfig.newBuilder() + .setEncoding(AudioEncoding.LINEAR16) + .setLanguageCode("en-US") + .setSampleRateHertz(16000) + .setModel("default") + .build(); + StreamingRecognitionConfig config = StreamingRecognitionConfig.newBuilder() + .setConfig(recConfig) + .setInterimResults(true) + .build(); + + class ResponseApiStreamingObserver implements ApiStreamObserver { + private final SettableFuture> future = SettableFuture.create(); + private final List messages = new java.util.ArrayList(); + + @Override + public void onNext(T message) { messages.add(message); } + + @Override + public void onError(Throwable t) { + future.setException(t); + } + + @Override + public void onCompleted() { future.set(messages); } + + // Returns the SettableFuture object to get received messages / exceptions. + public SettableFuture> future() { + return future; + } + } + + ResponseApiStreamingObserver responseObserver = + new ResponseApiStreamingObserver<>(); + + BidiStreamingCallable callable = + speech.streamingRecognizeCallable(); + + ApiStreamObserver requestObserver = + callable.bidiStreamingCall(responseObserver); + + TargetDataLine targetLine = (TargetDataLine) AudioSystem.getLine(info); + targetLine.open(format); + + long startTime = System.currentTimeMillis(); + ByteArrayOutputStream out = new ByteArrayOutputStream(); + + // The first request must **only** contain the audio configuration: + requestObserver.onNext(StreamingRecognizeRequest.newBuilder() + .setStreamingConfig(config) + .build()); + System.out.println("Start speaking..."); + + while (true) { + targetLine.start(); + int numBytesRead = targetLine.read(data, 0, data.length); + + if (numBytesRead <= 0) continue; + + out.write(data,0,data.length); + // Subsequent requests must **only** contain the audio data. + requestObserver.onNext(StreamingRecognizeRequest.newBuilder() + .setAudioContent(ByteString.copyFrom(out.toByteArray())) + .build()); + Thread.sleep(1000); + long estimatedTime = System.currentTimeMillis() - startTime; + out.flush(); + if (estimatedTime > (duration*1000)) { + targetLine.stop(); + targetLine.close(); + out.close(); + System.out.println("Stop speaking"); + break; + } + } + // Mark transmission as completed after sending the data. + requestObserver.onCompleted(); + List responses = responseObserver.future().get(); + for (StreamingRecognizeResponse response : responses) { + // For streaming recognize, the results list has one is_final result (if available) followed + // by a number of in-progress results (if interim_results is true) for subsequent utterances. + // Just print the first result here. + StreamingRecognitionResult result = response.getResultsList().get(0); + // There can be several alternative transcripts for a given chunk of speech. Just use the + // first (most likely) one here. + SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0); + System.out.printf("Transcript : %s\n", alternative.getTranscript()); + } + + } catch (Exception e) { + System.err.println(e); + } + } + +} diff --git a/speech/cloud-client/src/test/java/com/example/speech/MicStreamRecognizeIT.java b/speech/cloud-client/src/test/java/com/example/speech/MicStreamRecognizeIT.java new file mode 100644 index 00000000000..ed7c71f7e4f --- /dev/null +++ b/speech/cloud-client/src/test/java/com/example/speech/MicStreamRecognizeIT.java @@ -0,0 +1,62 @@ +/* + * Copyright 2018 Google Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.example.speech; + +import static com.google.common.truth.Truth.assertThat; + +import java.io.ByteArrayOutputStream; +import java.io.PrintStream; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +/** + * Tests for microphone streaming recognize sample. + */ +@RunWith(JUnit4.class) +@SuppressWarnings("checkstyle:abbreviationaswordinname") +public class MicStreamRecognizeIT { + private static final String PROJECT_ID = System.getenv("GOOGLE_CLOUD_PROJECT"); + + private ByteArrayOutputStream bout; + private PrintStream out; + + // The path to the audio file to transcribe + private String audioFileName = "./resources/audio.raw"; + + @Before + public void setUp() { + bout = new ByteArrayOutputStream(); + out = new PrintStream(bout); + System.setOut(out); + } + + @After + public void tearDown() { + System.setOut(null); + } + + @Test + public void testMicStreamRecognize() throws Exception { + Recognize.streamingRecognizeFile(audioFileName); + String got = bout.toString(); + assertThat(got).contains("how old is the Brooklyn Bridge"); + } + +} From 6d0641e6a5b1e701750dca48fb4a192fe067b906 Mon Sep 17 00:00:00 2001 From: nirupa-kumar Date: Thu, 14 Jun 2018 10:48:38 -0700 Subject: [PATCH 2/4] Add streaming microphone sample for Speech - multi-threaded --- .../example/speech/MicStreamRecognize.java | 228 ++++++++++-------- 1 file changed, 124 insertions(+), 104 deletions(-) diff --git a/speech/cloud-client/src/main/java/com/example/speech/MicStreamRecognize.java b/speech/cloud-client/src/main/java/com/example/speech/MicStreamRecognize.java index df3b45dcb5e..9f034fa6501 100644 --- a/speech/cloud-client/src/main/java/com/example/speech/MicStreamRecognize.java +++ b/speech/cloud-client/src/main/java/com/example/speech/MicStreamRecognize.java @@ -16,8 +16,8 @@ package com.example.speech; -//Imports the Google Cloud client library - +// [START speech_mic_streaming] +// Imports the Google Cloud client library import com.google.api.gax.rpc.ApiStreamObserver; import com.google.api.gax.rpc.BidiStreamingCallable; import com.google.cloud.speech.v1p1beta1.RecognitionConfig; @@ -28,6 +28,7 @@ import com.google.cloud.speech.v1p1beta1.StreamingRecognitionResult; import com.google.cloud.speech.v1p1beta1.StreamingRecognizeRequest; import com.google.cloud.speech.v1p1beta1.StreamingRecognizeResponse; + import com.google.common.util.concurrent.SettableFuture; import com.google.protobuf.ByteString; @@ -35,26 +36,31 @@ import javax.sound.sampled.AudioSystem; import javax.sound.sampled.DataLine; import javax.sound.sampled.TargetDataLine; -import java.io.ByteArrayOutputStream; -import java.io.IOException; +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.LinkedBlockingQueue; import java.util.List; /** * Google Cloud SpeechToText API sample application. Example usage: mvn package exec:java - * -Dexec.mainClass='com.example.speech.MicStreamRecognize' -Dexec.args="micstreamrecognize " + * -Dexec.mainClass='com.example.speech.MicStreamRecognize' -Dexec.args="micstreamrecognize + * " */ public class MicStreamRecognize { - // [START speech_mic_streaming] /** * Demonstrates using the Speech to Text client to convert Microphone streaming speech to text. * * @throws Exception on SpeechToTextClient Errors. */ + // Microphone audio format specification + private static AudioFormat format = new AudioFormat(16000, 16, 1, true, false); - private static final int BYTES_PER_BUFFER = 6400; //buffer size in bytes - + private static DataLine.Info targetInfo = new DataLine.Info(TargetDataLine.class, format); + private static TargetDataLine targetLine; + private static int BYTES_PER_BUFFER = 6400; // buffer size in bytes + // Creating shared object + private static volatile BlockingQueue sharedQueue = new LinkedBlockingQueue(); public static void main(String... args) throws Exception { if (args.length < 1) { @@ -81,113 +87,127 @@ public static void main(String... args) throws Exception { * * @param duration the time duration for the microphone streaming */ - public static void micRecognize(Integer duration) throws InterruptedException, IOException { - //Microphone audio format specification - AudioFormat format = new AudioFormat(16000, 16, 1, true, false); - DataLine.Info info = new DataLine.Info(TargetDataLine.class, format); - // checks if system supports the data line - if (!AudioSystem.isLineSupported(info)) { - System.out.println("Line not supported"); - System.exit(0); - } - byte[] data = new byte[BYTES_PER_BUFFER]; - - // Instantiates a client with GOOGLE_APPLICATION_CREDENTIALS - try (SpeechClient speech = SpeechClient.create()) { + public static void micRecognize(Integer duration) throws Exception { + // Creating microphone input buffer thread + micBuffer micrunnable = new micBuffer(); + Thread micThread = new Thread(micrunnable); + int durationMillSec = duration * 1000; + if (!AudioSystem.isLineSupported(targetInfo)) { + System.out.println("Microphone not supported"); + System.exit(0); + } + // Instantiates a client with GOOGLE_APPLICATION_CREDENTIALS + try (SpeechClient speech = SpeechClient.create()) { - //Configure request with microphone audio - RecognitionConfig recConfig = RecognitionConfig.newBuilder() + // Configure request with local raw PCM audio + RecognitionConfig recConfig = + RecognitionConfig.newBuilder() .setEncoding(AudioEncoding.LINEAR16) .setLanguageCode("en-US") .setSampleRateHertz(16000) .setModel("default") .build(); - StreamingRecognitionConfig config = StreamingRecognitionConfig.newBuilder() - .setConfig(recConfig) - .setInterimResults(true) - .build(); - - class ResponseApiStreamingObserver implements ApiStreamObserver { - private final SettableFuture> future = SettableFuture.create(); - private final List messages = new java.util.ArrayList(); - - @Override - public void onNext(T message) { messages.add(message); } - - @Override - public void onError(Throwable t) { - future.setException(t); - } - - @Override - public void onCompleted() { future.set(messages); } + StreamingRecognitionConfig config = + StreamingRecognitionConfig.newBuilder().setConfig(recConfig).build(); + + class ResponseApiStreamingObserver implements ApiStreamObserver { + private final SettableFuture> future = SettableFuture.create(); + private final List messages = new java.util.ArrayList(); + + @Override + public void onNext(T message) { + messages.add(message); + } + + @Override + public void onError(Throwable t) { + future.setException(t); + } + + @Override + public void onCompleted() { + future.set(messages); + } + + // Returns the SettableFuture object to get received messages / exceptions. + public SettableFuture> future() { + return future; + } + } - // Returns the SettableFuture object to get received messages / exceptions. - public SettableFuture> future() { - return future; - } + ResponseApiStreamingObserver responseObserver = + new ResponseApiStreamingObserver<>(); + + BidiStreamingCallable callable = + speech.streamingRecognizeCallable(); + + ApiStreamObserver requestObserver = + callable.bidiStreamingCall(responseObserver); + targetLine = (TargetDataLine) AudioSystem.getLine(targetInfo); + targetLine.open(format); + + // The first request must **only** contain the audio configuration: + requestObserver.onNext( + StreamingRecognizeRequest.newBuilder().setStreamingConfig(config).build()); + micThread.start(); + try { + long startTime = System.currentTimeMillis(); + while (true) { + Thread.sleep(100); + long estimatedTime = System.currentTimeMillis() - startTime; + if (estimatedTime > durationMillSec) { + System.out.println("Stop speaking."); + targetLine.stop(); + targetLine.close(); + break; } - - ResponseApiStreamingObserver responseObserver = - new ResponseApiStreamingObserver<>(); - - BidiStreamingCallable callable = - speech.streamingRecognizeCallable(); - - ApiStreamObserver requestObserver = - callable.bidiStreamingCall(responseObserver); - - TargetDataLine targetLine = (TargetDataLine) AudioSystem.getLine(info); - targetLine.open(format); - - long startTime = System.currentTimeMillis(); - ByteArrayOutputStream out = new ByteArrayOutputStream(); - - // The first request must **only** contain the audio configuration: - requestObserver.onNext(StreamingRecognizeRequest.newBuilder() - .setStreamingConfig(config) - .build()); - System.out.println("Start speaking..."); - - while (true) { - targetLine.start(); - int numBytesRead = targetLine.read(data, 0, data.length); - - if (numBytesRead <= 0) continue; - - out.write(data,0,data.length); - // Subsequent requests must **only** contain the audio data. - requestObserver.onNext(StreamingRecognizeRequest.newBuilder() - .setAudioContent(ByteString.copyFrom(out.toByteArray())) + // Subsequent requests must **only** contain the audio data. + requestObserver.onNext( + StreamingRecognizeRequest.newBuilder() + .setAudioContent(ByteString.copyFrom(sharedQueue.take())) .build()); - Thread.sleep(1000); - long estimatedTime = System.currentTimeMillis() - startTime; - out.flush(); - if (estimatedTime > (duration*1000)) { - targetLine.stop(); - targetLine.close(); - out.close(); - System.out.println("Stop speaking"); - break; - } - } - // Mark transmission as completed after sending the data. - requestObserver.onCompleted(); - List responses = responseObserver.future().get(); - for (StreamingRecognizeResponse response : responses) { - // For streaming recognize, the results list has one is_final result (if available) followed - // by a number of in-progress results (if interim_results is true) for subsequent utterances. - // Just print the first result here. - StreamingRecognitionResult result = response.getResultsList().get(0); - // There can be several alternative transcripts for a given chunk of speech. Just use the - // first (most likely) one here. - SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0); - System.out.printf("Transcript : %s\n", alternative.getTranscript()); - } - + } } catch (Exception e) { - System.err.println(e); + System.out.println("Error in MicrophoneStreamRecognize : " + e.getMessage()); + } + // Mark transmission as completed after sending the data. + requestObserver.onCompleted(); + + List responses = responseObserver.future().get(); + + for (StreamingRecognizeResponse response : responses) { + // For streaming recognize, the results list has one is_final result (if available) followed + // by a number of in-progress results (if interim_results is true) for subsequent + // utterances. + // Just print the first result here. + StreamingRecognitionResult result = response.getResultsList().get(0); + // There can be several alternative transcripts for a given chunk of speech. Just use the + // first (most likely) one here. + SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0); + System.out.printf("Transcript : %s\n", alternative.getTranscript()); } + } } + // Microphone Input buffering + static class micBuffer implements Runnable { + + @Override + public void run() { + System.out.println("Start speaking..."); + targetLine.start(); + byte[] data = new byte[BYTES_PER_BUFFER]; + while (targetLine.isOpen()) { + try { + int numBytesRead = targetLine.read(data, 0, data.length); + if (numBytesRead <= 0) continue; + sharedQueue.put(data.clone()); + + } catch (InterruptedException e) { + System.out.println("Microphone input buffering interrupted : " + e.getMessage()); + } + } + } + } + // [END speech_mic_streaming] } From 2988d29461b48f880f51a4dc1c0dc3ac0366817c Mon Sep 17 00:00:00 2001 From: nirupa-kumar Date: Mon, 18 Jun 2018 15:32:45 -0700 Subject: [PATCH 3/4] Fixing as per pull request review comments --- .../src/test/java/com/example/speech/MicStreamRecognizeIT.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/speech/cloud-client/src/test/java/com/example/speech/MicStreamRecognizeIT.java b/speech/cloud-client/src/test/java/com/example/speech/MicStreamRecognizeIT.java index ed7c71f7e4f..1566ba82e9e 100644 --- a/speech/cloud-client/src/test/java/com/example/speech/MicStreamRecognizeIT.java +++ b/speech/cloud-client/src/test/java/com/example/speech/MicStreamRecognizeIT.java @@ -1,5 +1,5 @@ /* - * Copyright 2018 Google Inc. + * Copyright 2018 Google LLC * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. From 1f3c4c1ba63c25fad27f615d40b29f1ef11e679c Mon Sep 17 00:00:00 2001 From: nirupa-kumar Date: Mon, 18 Jun 2018 15:35:10 -0700 Subject: [PATCH 4/4] Fixing as per pull request review comments --- .../example/speech/MicStreamRecognize.java | 52 +++++++++---------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/speech/cloud-client/src/main/java/com/example/speech/MicStreamRecognize.java b/speech/cloud-client/src/main/java/com/example/speech/MicStreamRecognize.java index 9f034fa6501..618625053fa 100644 --- a/speech/cloud-client/src/main/java/com/example/speech/MicStreamRecognize.java +++ b/speech/cloud-client/src/main/java/com/example/speech/MicStreamRecognize.java @@ -1,5 +1,5 @@ /* - * Copyright 2018 Google Inc. + * Copyright 2018 Google LLC * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -110,31 +110,6 @@ public static void micRecognize(Integer duration) throws Exception { StreamingRecognitionConfig config = StreamingRecognitionConfig.newBuilder().setConfig(recConfig).build(); - class ResponseApiStreamingObserver implements ApiStreamObserver { - private final SettableFuture> future = SettableFuture.create(); - private final List messages = new java.util.ArrayList(); - - @Override - public void onNext(T message) { - messages.add(message); - } - - @Override - public void onError(Throwable t) { - future.setException(t); - } - - @Override - public void onCompleted() { - future.set(messages); - } - - // Returns the SettableFuture object to get received messages / exceptions. - public SettableFuture> future() { - return future; - } - } - ResponseApiStreamingObserver responseObserver = new ResponseApiStreamingObserver<>(); @@ -189,6 +164,31 @@ public SettableFuture> future() { } } + static class ResponseApiStreamingObserver implements ApiStreamObserver { + private final SettableFuture> future = SettableFuture.create(); + private final List messages = new java.util.ArrayList(); + + @Override + public void onNext(T message) { + messages.add(message); + } + + @Override + public void onError(Throwable t) { + future.setException(t); + } + + @Override + public void onCompleted() { + future.set(messages); + } + + // Returns the SettableFuture object to get received messages / exceptions. + public SettableFuture> future() { + return future; + } + } + // Microphone Input buffering static class micBuffer implements Runnable {