Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[DO NOT MERGE] Microphone streaming - multi-threaded #1130

45 changes: 45 additions & 0 deletions speech/cloud-client/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,22 @@
<version>0.40</version>
<scope>test</scope>
</dependency>
<!-- https://mvnrepository.com/artifact/com.google.protobuf/protobuf-java -->
<dependency>
<groupId>com.google.protobuf</groupId>
<artifactId>protobuf-java</artifactId>
<version>3.2.0</version>
</dependency>
<dependency>
<groupId>com.google.protobuf</groupId>
<artifactId>protobuf-java-util</artifactId>
<version>3.2.0</version>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>21.0</version>
</dependency>
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Remove these: 60-75. Seem to be breaking the samples and are not used.

</dependencies>

<build>
Expand Down Expand Up @@ -135,5 +151,34 @@
</plugins>
</build>
</profile>

<profile>
<id>MicStreamRecognize</id>
<activation>
<property>
<name>MicStreamRecognize</name>
</property>
</activation>
<build>
<plugins>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>exec-maven-plugin</artifactId>
<version>1.6.0</version>
<executions>
<execution>
<goals>
<goal>java</goal>
</goals>
</execution>
</executions>
<configuration>
<mainClass>com.example.speech.MicStreamRecognize</mainClass>
<cleanupDaemonThreads>false</cleanupDaemonThreads>
</configuration>
</plugin>
</plugins>
</build>
</profile>
</profiles>
</project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,213 @@
/*
* Copyright 2018 Google Inc.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

s/Inc./LLC/

This applies to all files.

*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.example.speech;

// [START speech_mic_streaming]
// Imports the Google Cloud client library
import com.google.api.gax.rpc.ApiStreamObserver;
import com.google.api.gax.rpc.BidiStreamingCallable;
import com.google.cloud.speech.v1p1beta1.RecognitionConfig;
import com.google.cloud.speech.v1p1beta1.RecognitionConfig.AudioEncoding;
import com.google.cloud.speech.v1p1beta1.SpeechClient;
import com.google.cloud.speech.v1p1beta1.SpeechRecognitionAlternative;
import com.google.cloud.speech.v1p1beta1.StreamingRecognitionConfig;
import com.google.cloud.speech.v1p1beta1.StreamingRecognitionResult;
import com.google.cloud.speech.v1p1beta1.StreamingRecognizeRequest;
import com.google.cloud.speech.v1p1beta1.StreamingRecognizeResponse;

import com.google.common.util.concurrent.SettableFuture;
import com.google.protobuf.ByteString;

import javax.sound.sampled.AudioFormat;
import javax.sound.sampled.AudioSystem;
import javax.sound.sampled.DataLine;
import javax.sound.sampled.TargetDataLine;

import java.util.concurrent.BlockingQueue;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.List;

/**
* Google Cloud SpeechToText API sample application. Example usage: mvn package exec:java
* -Dexec.mainClass='com.example.speech.MicStreamRecognize' -Dexec.args="micstreamrecognize
* <duration>"
Copy link
Contributor

@nnegrey nnegrey Jun 22, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can use the short version: mvn exec:java -DMicStreamRecognize -Dexec.args="micstreamrecognize <duration>"

*/
public class MicStreamRecognize {

/**
* Demonstrates using the Speech to Text client to convert Microphone streaming speech to text.
*
* @throws Exception on SpeechToTextClient Errors.
*/
// Microphone audio format specification
private static AudioFormat format = new AudioFormat(16000, 16, 1, true, false);

private static DataLine.Info targetInfo = new DataLine.Info(TargetDataLine.class, format);
private static TargetDataLine targetLine;
private static int BYTES_PER_BUFFER = 6400; // buffer size in bytes
// Creating shared object
private static volatile BlockingQueue<byte[]> sharedQueue = new LinkedBlockingQueue();

public static void main(String... args) throws Exception {
if (args.length < 1) {
System.out.println("Usage:");
System.out.printf(
"\tjava %s \"<command>\" \"<duration>\"\n"
+ "Command:\n"
+ "\tmicstreamrecognize\n"
+ "Duration(optional):\n\tIn seconds.(Maximum of 60 seconds)\n",
MicStreamRecognize.class.getCanonicalName());
return;
}
String command = args[0];
Integer duration = args.length > 1 ? Integer.parseInt(args[1]) : 10;

// Use command to invoke transcription
if (command.equals("micstreamrecognize")) {
micRecognize(duration);
}
}

/**
* Performs streaming speech recognition on microphone audio data.
*
* @param duration the time duration for the microphone streaming
*/
public static void micRecognize(Integer duration) throws Exception {
// Creating microphone input buffer thread
micBuffer micrunnable = new micBuffer();
Thread micThread = new Thread(micrunnable);
int durationMillSec = duration * 1000;
if (!AudioSystem.isLineSupported(targetInfo)) {
System.out.println("Microphone not supported");
System.exit(0);
}
// Instantiates a client with GOOGLE_APPLICATION_CREDENTIALS
try (SpeechClient speech = SpeechClient.create()) {

// Configure request with local raw PCM audio
RecognitionConfig recConfig =
RecognitionConfig.newBuilder()
.setEncoding(AudioEncoding.LINEAR16)
.setLanguageCode("en-US")
.setSampleRateHertz(16000)
.setModel("default")
.build();
StreamingRecognitionConfig config =
StreamingRecognitionConfig.newBuilder().setConfig(recConfig).build();

class ResponseApiStreamingObserver<T> implements ApiStreamObserver<T> {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unless I'm missing something, I don't see any need for this class to be defined during the execution of this method. Please move the definition outside of the method body (or provide comments explaining why it is defined here, if necessary).

private final SettableFuture<List<T>> future = SettableFuture.create();
private final List<T> messages = new java.util.ArrayList<T>();

@Override
public void onNext(T message) {
messages.add(message);
}

@Override
public void onError(Throwable t) {
future.setException(t);
}

@Override
public void onCompleted() {
future.set(messages);
}

// Returns the SettableFuture object to get received messages / exceptions.
public SettableFuture<List<T>> future() {
return future;
}
}

ResponseApiStreamingObserver<StreamingRecognizeResponse> responseObserver =
new ResponseApiStreamingObserver<>();

BidiStreamingCallable<StreamingRecognizeRequest, StreamingRecognizeResponse> callable =
speech.streamingRecognizeCallable();

ApiStreamObserver<StreamingRecognizeRequest> requestObserver =
callable.bidiStreamingCall(responseObserver);
targetLine = (TargetDataLine) AudioSystem.getLine(targetInfo);
targetLine.open(format);

// The first request must **only** contain the audio configuration:
requestObserver.onNext(
StreamingRecognizeRequest.newBuilder().setStreamingConfig(config).build());
micThread.start();
try {
long startTime = System.currentTimeMillis();
while (true) {
Thread.sleep(100);
long estimatedTime = System.currentTimeMillis() - startTime;
if (estimatedTime > durationMillSec) {
System.out.println("Stop speaking.");
targetLine.stop();
targetLine.close();
break;
}
// Subsequent requests must **only** contain the audio data.
requestObserver.onNext(
StreamingRecognizeRequest.newBuilder()
.setAudioContent(ByteString.copyFrom(sharedQueue.take()))
.build());
}
} catch (Exception e) {
System.out.println("Error in MicrophoneStreamRecognize : " + e.getMessage());
}
// Mark transmission as completed after sending the data.
requestObserver.onCompleted();

List<StreamingRecognizeResponse> responses = responseObserver.future().get();

for (StreamingRecognizeResponse response : responses) {
// For streaming recognize, the results list has one is_final result (if available) followed
// by a number of in-progress results (if interim_results is true) for subsequent
// utterances.
// Just print the first result here.
StreamingRecognitionResult result = response.getResultsList().get(0);
// There can be several alternative transcripts for a given chunk of speech. Just use the
// first (most likely) one here.
SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0);
System.out.printf("Transcript : %s\n", alternative.getTranscript());
}
}
}

// Microphone Input buffering
static class micBuffer implements Runnable {

@Override
public void run() {
System.out.println("Start speaking...");
targetLine.start();
byte[] data = new byte[BYTES_PER_BUFFER];
while (targetLine.isOpen()) {
try {
int numBytesRead = targetLine.read(data, 0, data.length);
if (numBytesRead <= 0) continue;
sharedQueue.put(data.clone());

} catch (InterruptedException e) {
System.out.println("Microphone input buffering interrupted : " + e.getMessage());
}
}
}
}
// [END speech_mic_streaming]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
/*
* Copyright 2018 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.example.speech;

import static com.google.common.truth.Truth.assertThat;

import java.io.ByteArrayOutputStream;
import java.io.PrintStream;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;

/**
* Tests for microphone streaming recognize sample.
*/
@RunWith(JUnit4.class)
@SuppressWarnings("checkstyle:abbreviationaswordinname")
public class MicStreamRecognizeIT {
private static final String PROJECT_ID = System.getenv("GOOGLE_CLOUD_PROJECT");

private ByteArrayOutputStream bout;
private PrintStream out;

// The path to the audio file to transcribe
private String audioFileName = "./resources/audio.raw";

@Before
public void setUp() {
bout = new ByteArrayOutputStream();
out = new PrintStream(bout);
System.setOut(out);
}

@After
public void tearDown() {
System.setOut(null);
}

@Test
public void testMicStreamRecognize() throws Exception {
Recognize.streamingRecognizeFile(audioFileName);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this testing the above file or just the Recognize.java streaming?

String got = bout.toString();
assertThat(got).contains("how old is the Brooklyn Bridge");
}

}