-
Notifications
You must be signed in to change notification settings - Fork 2.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[DO NOT MERGE] Microphone streaming - multi-threaded #1130
Changes from 6 commits
98530e6
a63462e
5cebc15
c06ea50
6d0641e
ab9b92a
8cec60d
2988d29
1f3c4c1
0651db4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,213 @@ | ||
/* | ||
* Copyright 2018 Google Inc. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. s/Inc./LLC/ This applies to all files. |
||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package com.example.speech; | ||
|
||
// [START speech_mic_streaming] | ||
// Imports the Google Cloud client library | ||
import com.google.api.gax.rpc.ApiStreamObserver; | ||
import com.google.api.gax.rpc.BidiStreamingCallable; | ||
import com.google.cloud.speech.v1p1beta1.RecognitionConfig; | ||
import com.google.cloud.speech.v1p1beta1.RecognitionConfig.AudioEncoding; | ||
import com.google.cloud.speech.v1p1beta1.SpeechClient; | ||
import com.google.cloud.speech.v1p1beta1.SpeechRecognitionAlternative; | ||
import com.google.cloud.speech.v1p1beta1.StreamingRecognitionConfig; | ||
import com.google.cloud.speech.v1p1beta1.StreamingRecognitionResult; | ||
import com.google.cloud.speech.v1p1beta1.StreamingRecognizeRequest; | ||
import com.google.cloud.speech.v1p1beta1.StreamingRecognizeResponse; | ||
|
||
import com.google.common.util.concurrent.SettableFuture; | ||
import com.google.protobuf.ByteString; | ||
|
||
import javax.sound.sampled.AudioFormat; | ||
import javax.sound.sampled.AudioSystem; | ||
import javax.sound.sampled.DataLine; | ||
import javax.sound.sampled.TargetDataLine; | ||
|
||
import java.util.concurrent.BlockingQueue; | ||
import java.util.concurrent.LinkedBlockingQueue; | ||
import java.util.List; | ||
|
||
/** | ||
* Google Cloud SpeechToText API sample application. Example usage: mvn package exec:java | ||
* -Dexec.mainClass='com.example.speech.MicStreamRecognize' -Dexec.args="micstreamrecognize | ||
* <duration>" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can use the short version: |
||
*/ | ||
public class MicStreamRecognize { | ||
|
||
/** | ||
* Demonstrates using the Speech to Text client to convert Microphone streaming speech to text. | ||
* | ||
* @throws Exception on SpeechToTextClient Errors. | ||
*/ | ||
// Microphone audio format specification | ||
private static AudioFormat format = new AudioFormat(16000, 16, 1, true, false); | ||
|
||
private static DataLine.Info targetInfo = new DataLine.Info(TargetDataLine.class, format); | ||
private static TargetDataLine targetLine; | ||
private static int BYTES_PER_BUFFER = 6400; // buffer size in bytes | ||
// Creating shared object | ||
private static volatile BlockingQueue<byte[]> sharedQueue = new LinkedBlockingQueue(); | ||
|
||
public static void main(String... args) throws Exception { | ||
if (args.length < 1) { | ||
System.out.println("Usage:"); | ||
System.out.printf( | ||
"\tjava %s \"<command>\" \"<duration>\"\n" | ||
+ "Command:\n" | ||
+ "\tmicstreamrecognize\n" | ||
+ "Duration(optional):\n\tIn seconds.(Maximum of 60 seconds)\n", | ||
MicStreamRecognize.class.getCanonicalName()); | ||
return; | ||
} | ||
String command = args[0]; | ||
Integer duration = args.length > 1 ? Integer.parseInt(args[1]) : 10; | ||
|
||
// Use command to invoke transcription | ||
if (command.equals("micstreamrecognize")) { | ||
micRecognize(duration); | ||
} | ||
} | ||
|
||
/** | ||
* Performs streaming speech recognition on microphone audio data. | ||
* | ||
* @param duration the time duration for the microphone streaming | ||
*/ | ||
public static void micRecognize(Integer duration) throws Exception { | ||
// Creating microphone input buffer thread | ||
micBuffer micrunnable = new micBuffer(); | ||
Thread micThread = new Thread(micrunnable); | ||
int durationMillSec = duration * 1000; | ||
if (!AudioSystem.isLineSupported(targetInfo)) { | ||
System.out.println("Microphone not supported"); | ||
System.exit(0); | ||
} | ||
// Instantiates a client with GOOGLE_APPLICATION_CREDENTIALS | ||
try (SpeechClient speech = SpeechClient.create()) { | ||
|
||
// Configure request with local raw PCM audio | ||
RecognitionConfig recConfig = | ||
RecognitionConfig.newBuilder() | ||
.setEncoding(AudioEncoding.LINEAR16) | ||
.setLanguageCode("en-US") | ||
.setSampleRateHertz(16000) | ||
.setModel("default") | ||
.build(); | ||
StreamingRecognitionConfig config = | ||
StreamingRecognitionConfig.newBuilder().setConfig(recConfig).build(); | ||
|
||
class ResponseApiStreamingObserver<T> implements ApiStreamObserver<T> { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Unless I'm missing something, I don't see any need for this class to be defined during the execution of this method. Please move the definition outside of the method body (or provide comments explaining why it is defined here, if necessary). |
||
private final SettableFuture<List<T>> future = SettableFuture.create(); | ||
private final List<T> messages = new java.util.ArrayList<T>(); | ||
|
||
@Override | ||
public void onNext(T message) { | ||
messages.add(message); | ||
} | ||
|
||
@Override | ||
public void onError(Throwable t) { | ||
future.setException(t); | ||
} | ||
|
||
@Override | ||
public void onCompleted() { | ||
future.set(messages); | ||
} | ||
|
||
// Returns the SettableFuture object to get received messages / exceptions. | ||
public SettableFuture<List<T>> future() { | ||
return future; | ||
} | ||
} | ||
|
||
ResponseApiStreamingObserver<StreamingRecognizeResponse> responseObserver = | ||
new ResponseApiStreamingObserver<>(); | ||
|
||
BidiStreamingCallable<StreamingRecognizeRequest, StreamingRecognizeResponse> callable = | ||
speech.streamingRecognizeCallable(); | ||
|
||
ApiStreamObserver<StreamingRecognizeRequest> requestObserver = | ||
callable.bidiStreamingCall(responseObserver); | ||
targetLine = (TargetDataLine) AudioSystem.getLine(targetInfo); | ||
targetLine.open(format); | ||
|
||
// The first request must **only** contain the audio configuration: | ||
requestObserver.onNext( | ||
StreamingRecognizeRequest.newBuilder().setStreamingConfig(config).build()); | ||
micThread.start(); | ||
try { | ||
long startTime = System.currentTimeMillis(); | ||
while (true) { | ||
Thread.sleep(100); | ||
long estimatedTime = System.currentTimeMillis() - startTime; | ||
if (estimatedTime > durationMillSec) { | ||
System.out.println("Stop speaking."); | ||
targetLine.stop(); | ||
targetLine.close(); | ||
break; | ||
} | ||
// Subsequent requests must **only** contain the audio data. | ||
requestObserver.onNext( | ||
StreamingRecognizeRequest.newBuilder() | ||
.setAudioContent(ByteString.copyFrom(sharedQueue.take())) | ||
.build()); | ||
} | ||
} catch (Exception e) { | ||
System.out.println("Error in MicrophoneStreamRecognize : " + e.getMessage()); | ||
} | ||
// Mark transmission as completed after sending the data. | ||
requestObserver.onCompleted(); | ||
|
||
List<StreamingRecognizeResponse> responses = responseObserver.future().get(); | ||
|
||
for (StreamingRecognizeResponse response : responses) { | ||
// For streaming recognize, the results list has one is_final result (if available) followed | ||
// by a number of in-progress results (if interim_results is true) for subsequent | ||
// utterances. | ||
// Just print the first result here. | ||
StreamingRecognitionResult result = response.getResultsList().get(0); | ||
// There can be several alternative transcripts for a given chunk of speech. Just use the | ||
// first (most likely) one here. | ||
SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0); | ||
System.out.printf("Transcript : %s\n", alternative.getTranscript()); | ||
} | ||
} | ||
} | ||
|
||
// Microphone Input buffering | ||
static class micBuffer implements Runnable { | ||
|
||
@Override | ||
public void run() { | ||
System.out.println("Start speaking..."); | ||
targetLine.start(); | ||
byte[] data = new byte[BYTES_PER_BUFFER]; | ||
while (targetLine.isOpen()) { | ||
try { | ||
int numBytesRead = targetLine.read(data, 0, data.length); | ||
if (numBytesRead <= 0) continue; | ||
sharedQueue.put(data.clone()); | ||
|
||
} catch (InterruptedException e) { | ||
System.out.println("Microphone input buffering interrupted : " + e.getMessage()); | ||
} | ||
} | ||
} | ||
} | ||
// [END speech_mic_streaming] | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
/* | ||
* Copyright 2018 Google Inc. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package com.example.speech; | ||
|
||
import static com.google.common.truth.Truth.assertThat; | ||
|
||
import java.io.ByteArrayOutputStream; | ||
import java.io.PrintStream; | ||
import org.junit.After; | ||
import org.junit.Before; | ||
import org.junit.Test; | ||
import org.junit.runner.RunWith; | ||
import org.junit.runners.JUnit4; | ||
|
||
/** | ||
* Tests for microphone streaming recognize sample. | ||
*/ | ||
@RunWith(JUnit4.class) | ||
@SuppressWarnings("checkstyle:abbreviationaswordinname") | ||
public class MicStreamRecognizeIT { | ||
private static final String PROJECT_ID = System.getenv("GOOGLE_CLOUD_PROJECT"); | ||
|
||
private ByteArrayOutputStream bout; | ||
private PrintStream out; | ||
|
||
// The path to the audio file to transcribe | ||
private String audioFileName = "./resources/audio.raw"; | ||
|
||
@Before | ||
public void setUp() { | ||
bout = new ByteArrayOutputStream(); | ||
out = new PrintStream(bout); | ||
System.setOut(out); | ||
} | ||
|
||
@After | ||
public void tearDown() { | ||
System.setOut(null); | ||
} | ||
|
||
@Test | ||
public void testMicStreamRecognize() throws Exception { | ||
Recognize.streamingRecognizeFile(audioFileName); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this testing the above file or just the Recognize.java streaming? |
||
String got = bout.toString(); | ||
assertThat(got).contains("how old is the Brooklyn Bridge"); | ||
} | ||
|
||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Remove these: 60-75. Seem to be breaking the samples and are not used.