diff --git a/speech/grpc/pom.xml b/speech/grpc/pom.xml index c1853902515..6b4376039cb 100644 --- a/speech/grpc/pom.xml +++ b/speech/grpc/pom.xml @@ -111,6 +111,12 @@ limitations under the License. + + junit + junit + 4.12 + test + commons-cli commons-cli diff --git a/speech/grpc/src/main/java/com/google/cloud/speech/grpc/demos/AudioRequestFactory.java b/speech/grpc/src/main/java/com/google/cloud/speech/grpc/demos/AudioRequestFactory.java new file mode 100644 index 00000000000..fce35b3099c --- /dev/null +++ b/speech/grpc/src/main/java/com/google/cloud/speech/grpc/demos/AudioRequestFactory.java @@ -0,0 +1,66 @@ +/* + * Copyright 2016 Google Inc. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +package com.google.cloud.speech.grpc.demos; + +import com.google.cloud.speech.v1.AudioRequest; +import com.google.protobuf.ByteString; + +import java.io.IOException; +import java.net.URI; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; + +/* + * AudioRequestFactory takes a URI as an input and creates an AudioRequest. The URI can point to a + * local file or a file on Google Cloud Storage. + */ +public class AudioRequestFactory { + + private static final String FILE_SCHEME = "file"; + private static final String GS_SCHEME = "gs"; + + /** + * Takes an input URI of form $scheme:// and converts to audio request. + * + * @param uri input uri + * @return AudioRequest audio request + */ + public static AudioRequest createRequest(URI uri) + throws IOException { + if (uri.getScheme() == null || uri.getScheme().equals(FILE_SCHEME)) { + Path path = Paths.get(uri); + return audioFromBytes(Files.readAllBytes(path)); + } else if (uri.getScheme().equals(GS_SCHEME)) { + return AudioRequest.newBuilder().setUri(uri.toString()).build(); + } + throw new RuntimeException("scheme not supported " + uri.getScheme()); + } + + /** + * Convert bytes to AudioRequest. + * + * @param bytes input bytes + * @return AudioRequest audio request + */ + private static AudioRequest audioFromBytes(byte[] bytes) { + return AudioRequest.newBuilder() + .setContent(ByteString.copyFrom(bytes)) + .build(); + } +} diff --git a/speech/grpc/src/main/java/com/google/cloud/speech/grpc/demos/NonStreamingRecognizeClient.java b/speech/grpc/src/main/java/com/google/cloud/speech/grpc/demos/NonStreamingRecognizeClient.java index d7b7d70af08..3952c7f0e5a 100644 --- a/speech/grpc/src/main/java/com/google/cloud/speech/grpc/demos/NonStreamingRecognizeClient.java +++ b/speech/grpc/src/main/java/com/google/cloud/speech/grpc/demos/NonStreamingRecognizeClient.java @@ -32,7 +32,6 @@ import com.google.cloud.speech.v1.NonStreamingRecognizeResponse; import com.google.cloud.speech.v1.RecognizeRequest; import com.google.cloud.speech.v1.SpeechGrpc; -import com.google.protobuf.ByteString; import com.google.protobuf.TextFormat; import io.grpc.ManagedChannel; @@ -49,9 +48,7 @@ import org.apache.commons.cli.ParseException; import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.Paths; +import java.net.URI; import java.util.Arrays; import java.util.List; import java.util.concurrent.Executors; @@ -72,7 +69,7 @@ public class NonStreamingRecognizeClient { private final String host; private final int port; - private final String file; + private final URI input; private final int samplingRate; private final ManagedChannel channel; @@ -81,11 +78,11 @@ public class NonStreamingRecognizeClient { /** * Construct client connecting to Cloud Speech server at {@code host:port}. */ - public NonStreamingRecognizeClient(String host, int port, String file, int samplingRate) + public NonStreamingRecognizeClient(String host, int port, URI input, int samplingRate) throws IOException { this.host = host; this.port = port; - this.file = file; + this.input = input; this.samplingRate = samplingRate; GoogleCredentials creds = GoogleCredentials.getApplicationDefault(); @@ -99,10 +96,7 @@ public NonStreamingRecognizeClient(String host, int port, String file, int sampl } private AudioRequest createAudioRequest() throws IOException { - Path path = Paths.get(file); - return AudioRequest.newBuilder() - .setContent(ByteString.copyFrom(Files.readAllBytes(path))) - .build(); + return AudioRequestFactory.createRequest(this.input); } public void shutdown() throws InterruptedException { @@ -115,10 +109,10 @@ public void recognize() { try { audio = createAudioRequest(); } catch (IOException e) { - logger.log(Level.WARNING, "Failed to read audio file: " + file); + logger.log(Level.WARNING, "Failed to read audio uri input: " + input); return; } - logger.info("Sending " + audio.getContent().size() + " bytes from audio file: " + file); + logger.info("Sending " + audio.getContent().size() + " bytes from audio uri input: " + input); InitialRecognizeRequest initial = InitialRecognizeRequest.newBuilder() .setEncoding(AudioEncoding.LINEAR16) .setSampleRate(samplingRate) @@ -147,8 +141,8 @@ public static void main(String[] args) throws Exception { CommandLineParser parser = new DefaultParser(); Options options = new Options(); - options.addOption(OptionBuilder.withLongOpt("file") - .withDescription("path to audio file") + options.addOption(OptionBuilder.withLongOpt("uri") + .withDescription("path to audio uri") .hasArg() .withArgName("FILE_PATH") .create()); @@ -170,10 +164,10 @@ public static void main(String[] args) throws Exception { try { CommandLine line = parser.parse(options, args); - if (line.hasOption("file")) { - audioFile = line.getOptionValue("file"); + if (line.hasOption("uri")) { + audioFile = line.getOptionValue("uri"); } else { - System.err.println("An Audio file path must be specified (e.g. /foo/baz.raw)."); + System.err.println("An Audio uri must be specified (e.g. file:///foo/baz.raw)."); System.exit(1); } @@ -203,7 +197,7 @@ public static void main(String[] args) throws Exception { } NonStreamingRecognizeClient client = - new NonStreamingRecognizeClient(host, port, audioFile, sampling); + new NonStreamingRecognizeClient(host, port, URI.create(audioFile), sampling); try { client.recognize(); } finally { diff --git a/speech/grpc/src/main/proto/google/speech/v1/cloud-speech.proto b/speech/grpc/src/main/proto/google/speech/v1/cloud-speech.proto index 6ee3b031552..97dd8629649 100644 --- a/speech/grpc/src/main/proto/google/speech/v1/cloud-speech.proto +++ b/speech/grpc/src/main/proto/google/speech/v1/cloud-speech.proto @@ -23,6 +23,7 @@ option java_multiple_files = true; option java_outer_classname = "SpeechProto"; option java_package = "com.google.cloud.speech.v1"; + // Service that implements Google Cloud Speech API. service Speech { // Perform bidirectional streaming speech recognition on audio using gRPC. @@ -30,7 +31,7 @@ service Speech { // Perform non-streaming speech recognition on audio using HTTPS. rpc NonStreamingRecognize(RecognizeRequest) returns (NonStreamingRecognizeResponse) { - option (.google.api.http) = { post: "/v1/speech:recognize" body: "*" }; + option (google.api.http) = { post: "/v1/speech:recognize" body: "*" }; } } @@ -54,7 +55,7 @@ message RecognizeRequest { // The audio data to be recognized. For `NonStreamingRecognize`, all the // audio data must be contained in the first (and only) `RecognizeRequest` - // message. For streaming `Recognize`, sequential chunks of audio data are + // message. For streaming `Recognize`, sequential chunks of audio data are // sent in sequential `RecognizeRequest` messages. AudioRequest audio_request = 2; } @@ -64,7 +65,7 @@ message RecognizeRequest { message InitialRecognizeRequest { // Audio encoding of the data sent in the audio message. enum AudioEncoding { - // Not specified. Will return result `INVALID_ARGUMENT`. + // Not specified. Will return result [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]. ENCODING_UNSPECIFIED = 0; // Uncompressed 16-bit signed little-endian samples. @@ -118,8 +119,6 @@ message InitialRecognizeRequest { // profanities, replacing all but the initial character in each filtered word // with asterisks, e.g. "f***". If set to `false` or omitted, profanities // won't be filtered out. - // Note that profanity filtering is not implemented for all languages. - // If the language is not supported, this setting has no effect. bool profanity_filter = 5; // [Optional] If `false` or omitted, the recognizer will detect a single @@ -146,13 +145,38 @@ message InitialRecognizeRequest { // as they become available. // If `false` or omitted, no `EndpointerEvents` are returned. bool enable_endpointer_events = 8; + + // [Optional] URI that points to a file where the recognition result should + // be stored in JSON format. If omitted or empty string, the recognition + // result is returned in the response. Should be specified only for + // `NonStreamingRecognize`. If specified in a `Recognize` request, + // `Recognize` returns [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]. + // If specified in a `NonStreamingRecognize` request, + // `NonStreamingRecognize` returns immediately, and the output file + // is created asynchronously once the audio processing completes. + // Currently, only Google Cloud Storage URIs are supported, which must be + // specified in the following format: `gs://bucket_name/object_name` + // (other URI formats return [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]). For + // more information, see [Request URIs](/storage/docs/reference-uris). + string output_uri = 9; } // Contains audio data in the format specified in the `InitialRecognizeRequest`. +// Either `content` or `uri` must be supplied. Supplying both or neither +// returns [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]. message AudioRequest { - // [Required] The audio data bytes encoded as specified in - // `InitialRecognizeRequest`. + // The audio data bytes encoded as specified in + // `InitialRecognizeRequest`. Note: as with all bytes fields, protobuffers + // use a pure binary representation, whereas JSON representations use base64. bytes content = 1; + + // URI that points to a file that contains audio data bytes as specified in + // `InitialRecognizeRequest`. Currently, only Google Cloud Storage URIs are + // supported, which must be specified in the following format: + // `gs://bucket_name/object_name` (other URI formats return + // [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]). For more information, see + // [Request URIs](/storage/docs/reference-uris). + string uri = 2; } // `NonStreamingRecognizeResponse` is the only message returned to the client by @@ -191,10 +215,14 @@ message RecognizeResponse { // [Output-only] If set, returns a [google.rpc.Status][] message that // specifies the error for the operation. - .google.rpc.Status error = 1; - - // [Output-only] May contain zero or one `is_final=true` result (the newly - // settled portion). May also contain zero or more `is_final=false` results. + google.rpc.Status error = 1; + + // [Output-only] For `continuous=false`, this repeated list contains zero or + // one result that corresponds to all of the audio processed so far. For + // `continuous=true`, this repeated list contains zero or more results that + // correspond to consecutive portions of the audio being processed. + // In both cases, contains zero or one `is_final=true` result (the newly + // settled portion), followed by zero or more `is_final=false` results. repeated SpeechRecognitionResult results = 2; // [Output-only] Indicates the lowest index in the `results` array that has @@ -206,7 +234,10 @@ message RecognizeResponse { EndpointerEvent endpoint = 4; } +// A speech recognition result corresponding to a portion of the audio. message SpeechRecognitionResult { + // [Output-only] May contain one or more recognition hypotheses (up to the + // maximum specified in `max_alternatives`). repeated SpeechRecognitionAlternative alternatives = 1; // [Output-only] Set `true` if this is the final time the speech service will diff --git a/speech/grpc/src/test/java/com/google/cloud/speech/grpc/demos/AudioRequestFactoryTest.java b/speech/grpc/src/test/java/com/google/cloud/speech/grpc/demos/AudioRequestFactoryTest.java new file mode 100644 index 00000000000..8e5017d53f0 --- /dev/null +++ b/speech/grpc/src/test/java/com/google/cloud/speech/grpc/demos/AudioRequestFactoryTest.java @@ -0,0 +1,63 @@ +/* + * Copyright 2016 Google Inc. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.google.cloud.speech.grpc.demos; + +import static org.junit.Assert.assertEquals; + +import com.google.cloud.speech.v1.AudioRequest; + +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +import java.io.File; +import java.io.IOException; +import java.net.URI; + +/** + * Unit tests for {@link AudioRequestFactory}. + */ +@RunWith(JUnit4.class) +public class AudioRequestFactoryTest { + + @Test + public void verifyBytesInSizeFromLocalFile() throws IOException { + URI uri = new File("resources/audio.raw").toURI(); + AudioRequest audio = AudioRequestFactory.createRequest(uri); + + int numBytes = audio.getContent().toByteArray().length; + + //assert the number of bytes in the audio as 57958 + assertEquals(57958, numBytes); + } + + @Test + public void verifyBytesInSizeFromGoogleStorageFile() throws IOException { + String audioUri = "gs://cloud-samples-tests/speech/audio.raw"; + + URI uri = URI.create(audioUri); + AudioRequest audio = AudioRequestFactory.createRequest(uri); + + int numBytes = audio.getContent().toByteArray().length; + + //assert the number of bytes in the audio as 0 + assertEquals(0, numBytes); + + //assert the uri + assertEquals(audioUri, audio.getUri()); + } +}