diff --git a/samples/analyze/beta_snippets.py b/samples/analyze/beta_snippets.py index 12ad4197..ada4f9b4 100644 --- a/samples/analyze/beta_snippets.py +++ b/samples/analyze/beta_snippets.py @@ -14,16 +14,10 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""This application demonstrates face detection, face emotions -and speech transcription using the Google Cloud API. +"""This application demonstrates speech transcription using the +Google Cloud API. Usage Examples: - python beta_snippets.py boxes \ - gs://python-docs-samples-tests/video/googlework_short.mp4 - - python beta_snippets.py \ - emotions gs://python-docs-samples-tests/video/googlework_short.mp4 - python beta_snippets.py \ transcription gs://python-docs-samples-tests/video/googlework_short.mp4 """ @@ -33,108 +27,6 @@ from google.cloud import videointelligence_v1p1beta1 as videointelligence -# [START video_face_bounding_boxes] -def face_bounding_boxes(gcs_uri): - """ Detects faces' bounding boxes. """ - video_client = videointelligence.VideoIntelligenceServiceClient() - features = [videointelligence.enums.Feature.FACE_DETECTION] - - config = videointelligence.types.FaceConfig( - include_bounding_boxes=True) - context = videointelligence.types.VideoContext( - face_detection_config=config) - - operation = video_client.annotate_video( - gcs_uri, features=features, video_context=context) - print('\nProcessing video for face annotations:') - - result = operation.result(timeout=900) - print('\nFinished processing.') - - # There is only one result because a single video was processed. - faces = result.annotation_results[0].face_detection_annotations - for i, face in enumerate(faces): - print('Face {}'.format(i)) - - # Each face_detection_annotation has only one segment. - segment = face.segments[0] - start_time = (segment.segment.start_time_offset.seconds + - segment.segment.start_time_offset.nanos / 1e9) - end_time = (segment.segment.end_time_offset.seconds + - segment.segment.end_time_offset.nanos / 1e9) - positions = '{}s to {}s'.format(start_time, end_time) - print('\tSegment: {}\n'.format(positions)) - - # Each detected face may appear in many frames of the video. - # Here we process only the first frame. - frame = face.frames[0] - - time_offset = (frame.time_offset.seconds + - frame.time_offset.nanos / 1e9) - box = frame.attributes[0].normalized_bounding_box - - print('First frame time offset: {}s\n'.format(time_offset)) - - print('First frame normalized bounding box:') - print('\tleft : {}'.format(box.left)) - print('\ttop : {}'.format(box.top)) - print('\tright : {}'.format(box.right)) - print('\tbottom: {}'.format(box.bottom)) - print('\n') -# [END video_face_bounding_boxes] - - -# [START video_face_emotions] -def face_emotions(gcs_uri): - """ Analyze faces' emotions over frames. """ - video_client = videointelligence.VideoIntelligenceServiceClient() - features = [videointelligence.enums.Feature.FACE_DETECTION] - - config = videointelligence.types.FaceConfig( - include_emotions=True) - context = videointelligence.types.VideoContext( - face_detection_config=config) - - operation = video_client.annotate_video( - gcs_uri, features=features, video_context=context) - print('\nProcessing video for face annotations:') - - result = operation.result(timeout=600) - print('\nFinished processing.') - - # There is only one result because a single video was processed. - faces = result.annotation_results[0].face_detection_annotations - for i, face in enumerate(faces): - for j, frame in enumerate(face.frames): - time_offset = (frame.time_offset.seconds + - frame.time_offset.nanos / 1e9) - emotions = frame.attributes[0].emotions - - print('Face {}, frame {}, time_offset {}\n'.format( - i, j, time_offset)) - - # from videointelligence.enums - emotion_labels = ( - 'EMOTION_UNSPECIFIED', 'AMUSEMENT', 'ANGER', - 'CONCENTRATION', 'CONTENTMENT', 'DESIRE', - 'DISAPPOINTMENT', 'DISGUST', 'ELATION', - 'EMBARRASSMENT', 'INTEREST', 'PRIDE', 'SADNESS', - 'SURPRISE') - - for emotion in emotions: - emotion_index = emotion.emotion - emotion_label = emotion_labels[emotion_index] - emotion_score = emotion.score - - print('emotion: {} (confidence score: {})'.format( - emotion_label, emotion_score)) - - print('\n') - - print('\n') -# [END video_face_emotions] - - # [START video_speech_transcription] def speech_transcription(input_uri): """Transcribe speech from a video stored on GCS.""" @@ -181,13 +73,6 @@ def speech_transcription(input_uri): description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) subparsers = parser.add_subparsers(dest='command') - analyze_faces_parser = subparsers.add_parser( - 'boxes', help=face_bounding_boxes.__doc__) - analyze_faces_parser.add_argument('gcs_uri') - - analyze_emotions_parser = subparsers.add_parser( - 'emotions', help=face_emotions.__doc__) - analyze_emotions_parser.add_argument('gcs_uri') speech_transcription_parser = subparsers.add_parser( 'transcription', help=speech_transcription.__doc__) @@ -195,9 +80,5 @@ def speech_transcription(input_uri): args = parser.parse_args() - if args.command == 'boxes': - face_bounding_boxes(args.gcs_uri) - elif args.command == 'emotions': - face_emotions(args.gcs_uri) - elif args.command == 'transcription': + if args.command == 'transcription': speech_transcription(args.gcs_uri) diff --git a/samples/analyze/beta_snippets_test.py b/samples/analyze/beta_snippets_test.py index 6d27c2fd..e86f4f8c 100644 --- a/samples/analyze/beta_snippets_test.py +++ b/samples/analyze/beta_snippets_test.py @@ -20,30 +20,13 @@ import beta_snippets - BUCKET = os.environ['CLOUD_STORAGE_BUCKET'] -FACES_SHORT_FILE_PATH = 'video/googlework_short.mp4' - - -@pytest.mark.slow -def test_face_bounding_boxes(capsys): - beta_snippets.face_bounding_boxes( - 'gs://{}/{}'.format(BUCKET, FACES_SHORT_FILE_PATH)) - out, _ = capsys.readouterr() - assert 'top :' in out - - -@pytest.mark.slow -def test_face_emotions(capsys): - beta_snippets.face_emotions( - 'gs://{}/{}'.format(BUCKET, FACES_SHORT_FILE_PATH)) - out, _ = capsys.readouterr() - assert 'CONCENTRATION' in out +FILE_PATH = 'video/googlework_short.mp4' @pytest.mark.slow def test_speech_transcription(capsys): beta_snippets.speech_transcription( - 'gs://{}/{}'.format(BUCKET, FACES_SHORT_FILE_PATH)) + 'gs://{}/{}'.format(BUCKET, FILE_PATH)) out, _ = capsys.readouterr() assert 'cultural' in out