diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 6254dc0c5e1a..21e6bebf31dd 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -71,6 +71,7 @@ /speech/**/* @GoogleCloudPlatform/cloud-speech-eng @GoogleCloudPlatform/dee-data-ai @GoogleCloudPlatform/python-samples-reviewers /texttospeech/**/* @GoogleCloudPlatform/dee-data-ai @GoogleCloudPlatform/python-samples-reviewers /translate/**/* @GoogleCloudPlatform/dee-data-ai @GoogleCloudPlatform/python-samples-reviewers +/videointelligence/**/* @GoogleCloudPlatform/dee-data-ai @GoogleCloudPlatform/python-samples-reviewers /video/transcoder/* @GoogleCloudPlatform/dee-data-ai @GoogleCloudPlatform/python-samples-reviewers # Cloud SDK Databases & Data Analytics teams diff --git a/.github/blunderbuss.yml b/.github/blunderbuss.yml index d18cb8c30e46..e694c33100c7 100644 --- a/.github/blunderbuss.yml +++ b/.github/blunderbuss.yml @@ -228,6 +228,7 @@ assign_prs_by: - "api: transcoder" - "api: translate" - "api: vision" + - "api: videointelligence" to: - GoogleCloudPlatform/dee-data-ai diff --git a/videointelligence/samples/AUTHORING_GUIDE.md b/videointelligence/samples/AUTHORING_GUIDE.md new file mode 100644 index 000000000000..55c97b32f4c1 --- /dev/null +++ b/videointelligence/samples/AUTHORING_GUIDE.md @@ -0,0 +1 @@ +See https://github.com/GoogleCloudPlatform/python-docs-samples/blob/master/AUTHORING_GUIDE.md \ No newline at end of file diff --git a/videointelligence/samples/CONTRIBUTING.md b/videointelligence/samples/CONTRIBUTING.md new file mode 100644 index 000000000000..34c882b6f1a3 --- /dev/null +++ b/videointelligence/samples/CONTRIBUTING.md @@ -0,0 +1 @@ +See https://github.com/GoogleCloudPlatform/python-docs-samples/blob/master/CONTRIBUTING.md \ No newline at end of file diff --git a/videointelligence/samples/analyze/README.rst.in b/videointelligence/samples/analyze/README.rst.in new file mode 100644 index 000000000000..a01d163f9304 --- /dev/null +++ b/videointelligence/samples/analyze/README.rst.in @@ -0,0 +1,25 @@ +# This file is used to generate README.rst + +product: + name: Google Cloud Video Intelligence API + short_name: Cloud Video Intelligence API + url: https://cloud.google.com/video-intelligence/docs + description: > + `Google Cloud Video Intelligence API`_ allows developers to easily + integrate feature detection in video. + +setup: +- auth +- install_deps + +samples: +- name: analyze + file: analyze.py + show_help: True +- name: beta samples + file: beta_snippets.py + show_help: True + +cloud_client_library: true + +folder: video/cloud-client/analyze \ No newline at end of file diff --git a/videointelligence/samples/analyze/analyze.py b/videointelligence/samples/analyze/analyze.py new file mode 100644 index 000000000000..950ac8c793eb --- /dev/null +++ b/videointelligence/samples/analyze/analyze.py @@ -0,0 +1,592 @@ +#!/usr/bin/env python + +# Copyright 2017 Google LLC. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""This application demonstrates label detection, +explicit content, and shot change detection using the Google Cloud API. + +Usage Examples: + + python analyze.py labels gs://cloud-samples-data/video/chicago.mp4 + python analyze.py labels_file resources/cat.mp4 + python analyze.py shots gs://cloud-samples-data/video/gbikes_dinosaur.mp4 + python analyze.py explicit_content \ + gs://cloud-samples-data/video/gbikes_dinosaur.mp4 + python analyze.py text_gcs \ + gs://cloud-samples-data/video/googlework_tiny.mp4 + python analyze.py text_file resources/googlework_tiny.mp4 + python analyze.py objects_gcs gs://cloud-samples-data/video/cat.mp4 + python analyze.py objects_file resources/cat.mp4 +""" + +import argparse + +# [START video_detect_text] +import io + +from google.cloud import videointelligence + +# [END video_detect_text] + + +def analyze_explicit_content(path): + # [START video_analyze_explicit_content] + """Detects explicit content from the GCS path to a video.""" + video_client = videointelligence.VideoIntelligenceServiceClient() + features = [videointelligence.Feature.EXPLICIT_CONTENT_DETECTION] + + operation = video_client.annotate_video( + request={"features": features, "input_uri": path} + ) + print("\nProcessing video for explicit content annotations:") + + result = operation.result(timeout=90) + print("\nFinished processing.") + + # Retrieve first result because a single video was processed + for frame in result.annotation_results[0].explicit_annotation.frames: + likelihood = videointelligence.Likelihood(frame.pornography_likelihood) + frame_time = frame.time_offset.seconds + frame.time_offset.microseconds / 1e6 + print("Time: {}s".format(frame_time)) + print("\tpornography: {}".format(likelihood.name)) + # [END video_analyze_explicit_content] + + +def analyze_labels(path): + # [START video_analyze_labels_gcs] + """Detects labels given a GCS path.""" + video_client = videointelligence.VideoIntelligenceServiceClient() + features = [videointelligence.Feature.LABEL_DETECTION] + + mode = videointelligence.LabelDetectionMode.SHOT_AND_FRAME_MODE + config = videointelligence.LabelDetectionConfig(label_detection_mode=mode) + context = videointelligence.VideoContext(label_detection_config=config) + + operation = video_client.annotate_video( + request={"features": features, "input_uri": path, "video_context": context} + ) + print("\nProcessing video for label annotations:") + + result = operation.result(timeout=180) + print("\nFinished processing.") + + # Process video/segment level label annotations + segment_labels = result.annotation_results[0].segment_label_annotations + for i, segment_label in enumerate(segment_labels): + print("Video label description: {}".format(segment_label.entity.description)) + for category_entity in segment_label.category_entities: + print( + "\tLabel category description: {}".format(category_entity.description) + ) + + for i, segment in enumerate(segment_label.segments): + start_time = ( + segment.segment.start_time_offset.seconds + + segment.segment.start_time_offset.microseconds / 1e6 + ) + end_time = ( + segment.segment.end_time_offset.seconds + + segment.segment.end_time_offset.microseconds / 1e6 + ) + positions = "{}s to {}s".format(start_time, end_time) + confidence = segment.confidence + print("\tSegment {}: {}".format(i, positions)) + print("\tConfidence: {}".format(confidence)) + print("\n") + + # Process shot level label annotations + shot_labels = result.annotation_results[0].shot_label_annotations + for i, shot_label in enumerate(shot_labels): + print("Shot label description: {}".format(shot_label.entity.description)) + for category_entity in shot_label.category_entities: + print( + "\tLabel category description: {}".format(category_entity.description) + ) + + for i, shot in enumerate(shot_label.segments): + start_time = ( + shot.segment.start_time_offset.seconds + + shot.segment.start_time_offset.microseconds / 1e6 + ) + end_time = ( + shot.segment.end_time_offset.seconds + + shot.segment.end_time_offset.microseconds / 1e6 + ) + positions = "{}s to {}s".format(start_time, end_time) + confidence = shot.confidence + print("\tSegment {}: {}".format(i, positions)) + print("\tConfidence: {}".format(confidence)) + print("\n") + + # Process frame level label annotations + frame_labels = result.annotation_results[0].frame_label_annotations + for i, frame_label in enumerate(frame_labels): + print("Frame label description: {}".format(frame_label.entity.description)) + for category_entity in frame_label.category_entities: + print( + "\tLabel category description: {}".format(category_entity.description) + ) + + # Each frame_label_annotation has many frames, + # here we print information only about the first frame. + frame = frame_label.frames[0] + time_offset = frame.time_offset.seconds + frame.time_offset.microseconds / 1e6 + print("\tFirst frame time offset: {}s".format(time_offset)) + print("\tFirst frame confidence: {}".format(frame.confidence)) + print("\n") + # [END video_analyze_labels_gcs] + + +def analyze_labels_file(path): + # [START video_analyze_labels] + """Detect labels given a file path.""" + video_client = videointelligence.VideoIntelligenceServiceClient() + features = [videointelligence.Feature.LABEL_DETECTION] + + with io.open(path, "rb") as movie: + input_content = movie.read() + + operation = video_client.annotate_video( + request={"features": features, "input_content": input_content} + ) + print("\nProcessing video for label annotations:") + + result = operation.result(timeout=90) + print("\nFinished processing.") + + # Process video/segment level label annotations + segment_labels = result.annotation_results[0].segment_label_annotations + for i, segment_label in enumerate(segment_labels): + print("Video label description: {}".format(segment_label.entity.description)) + for category_entity in segment_label.category_entities: + print( + "\tLabel category description: {}".format(category_entity.description) + ) + + for i, segment in enumerate(segment_label.segments): + start_time = ( + segment.segment.start_time_offset.seconds + + segment.segment.start_time_offset.microseconds / 1e6 + ) + end_time = ( + segment.segment.end_time_offset.seconds + + segment.segment.end_time_offset.microseconds / 1e6 + ) + positions = "{}s to {}s".format(start_time, end_time) + confidence = segment.confidence + print("\tSegment {}: {}".format(i, positions)) + print("\tConfidence: {}".format(confidence)) + print("\n") + + # Process shot level label annotations + shot_labels = result.annotation_results[0].shot_label_annotations + for i, shot_label in enumerate(shot_labels): + print("Shot label description: {}".format(shot_label.entity.description)) + for category_entity in shot_label.category_entities: + print( + "\tLabel category description: {}".format(category_entity.description) + ) + + for i, shot in enumerate(shot_label.segments): + start_time = ( + shot.segment.start_time_offset.seconds + + shot.segment.start_time_offset.microseconds / 1e6 + ) + end_time = ( + shot.segment.end_time_offset.seconds + + shot.segment.end_time_offset.microseconds / 1e6 + ) + positions = "{}s to {}s".format(start_time, end_time) + confidence = shot.confidence + print("\tSegment {}: {}".format(i, positions)) + print("\tConfidence: {}".format(confidence)) + print("\n") + + # Process frame level label annotations + frame_labels = result.annotation_results[0].frame_label_annotations + for i, frame_label in enumerate(frame_labels): + print("Frame label description: {}".format(frame_label.entity.description)) + for category_entity in frame_label.category_entities: + print( + "\tLabel category description: {}".format(category_entity.description) + ) + + # Each frame_label_annotation has many frames, + # here we print information only about the first frame. + frame = frame_label.frames[0] + time_offset = frame.time_offset.seconds + frame.time_offset.microseconds / 1e6 + print("\tFirst frame time offset: {}s".format(time_offset)) + print("\tFirst frame confidence: {}".format(frame.confidence)) + print("\n") + # [END video_analyze_labels] + + +def analyze_shots(path): + # [START video_analyze_shots] + """Detects camera shot changes.""" + video_client = videointelligence.VideoIntelligenceServiceClient() + features = [videointelligence.Feature.SHOT_CHANGE_DETECTION] + operation = video_client.annotate_video( + request={"features": features, "input_uri": path} + ) + print("\nProcessing video for shot change annotations:") + + result = operation.result(timeout=90) + print("\nFinished processing.") + + # first result is retrieved because a single video was processed + for i, shot in enumerate(result.annotation_results[0].shot_annotations): + start_time = ( + shot.start_time_offset.seconds + shot.start_time_offset.microseconds / 1e6 + ) + end_time = ( + shot.end_time_offset.seconds + shot.end_time_offset.microseconds / 1e6 + ) + print("\tShot {}: {} to {}".format(i, start_time, end_time)) + # [END video_analyze_shots] + + +def speech_transcription(path): + # [START video_speech_transcription_gcs] + """Transcribe speech from a video stored on GCS.""" + from google.cloud import videointelligence + + video_client = videointelligence.VideoIntelligenceServiceClient() + features = [videointelligence.Feature.SPEECH_TRANSCRIPTION] + + config = videointelligence.SpeechTranscriptionConfig( + language_code="en-US", enable_automatic_punctuation=True + ) + video_context = videointelligence.VideoContext(speech_transcription_config=config) + + operation = video_client.annotate_video( + request={ + "features": features, + "input_uri": path, + "video_context": video_context, + } + ) + + print("\nProcessing video for speech transcription.") + + result = operation.result(timeout=600) + + # There is only one annotation_result since only + # one video is processed. + annotation_results = result.annotation_results[0] + for speech_transcription in annotation_results.speech_transcriptions: + # The number of alternatives for each transcription is limited by + # SpeechTranscriptionConfig.max_alternatives. + # Each alternative is a different possible transcription + # and has its own confidence score. + for alternative in speech_transcription.alternatives: + print("Alternative level information:") + + print("Transcript: {}".format(alternative.transcript)) + print("Confidence: {}\n".format(alternative.confidence)) + + print("Word level information:") + for word_info in alternative.words: + word = word_info.word + start_time = word_info.start_time + end_time = word_info.end_time + print( + "\t{}s - {}s: {}".format( + start_time.seconds + start_time.microseconds * 1e-6, + end_time.seconds + end_time.microseconds * 1e-6, + word, + ) + ) + # [END video_speech_transcription_gcs] + + +def video_detect_text_gcs(input_uri): + # [START video_detect_text_gcs] + """Detect text in a video stored on GCS.""" + from google.cloud import videointelligence + + video_client = videointelligence.VideoIntelligenceServiceClient() + features = [videointelligence.Feature.TEXT_DETECTION] + + operation = video_client.annotate_video( + request={"features": features, "input_uri": input_uri} + ) + + print("\nProcessing video for text detection.") + result = operation.result(timeout=600) + + # The first result is retrieved because a single video was processed. + annotation_result = result.annotation_results[0] + + for text_annotation in annotation_result.text_annotations: + print("\nText: {}".format(text_annotation.text)) + + # Get the first text segment + text_segment = text_annotation.segments[0] + start_time = text_segment.segment.start_time_offset + end_time = text_segment.segment.end_time_offset + print( + "start_time: {}, end_time: {}".format( + start_time.seconds + start_time.microseconds * 1e-6, + end_time.seconds + end_time.microseconds * 1e-6, + ) + ) + + print("Confidence: {}".format(text_segment.confidence)) + + # Show the result for the first frame in this segment. + frame = text_segment.frames[0] + time_offset = frame.time_offset + print( + "Time offset for the first frame: {}".format( + time_offset.seconds + time_offset.microseconds * 1e-6 + ) + ) + print("Rotated Bounding Box Vertices:") + for vertex in frame.rotated_bounding_box.vertices: + print("\tVertex.x: {}, Vertex.y: {}".format(vertex.x, vertex.y)) + # [END video_detect_text_gcs] + + +# [START video_detect_text] +def video_detect_text(path): + """Detect text in a local video.""" + video_client = videointelligence.VideoIntelligenceServiceClient() + features = [videointelligence.Feature.TEXT_DETECTION] + video_context = videointelligence.VideoContext() + + with io.open(path, "rb") as file: + input_content = file.read() + + operation = video_client.annotate_video( + request={ + "features": features, + "input_content": input_content, + "video_context": video_context, + } + ) + + print("\nProcessing video for text detection.") + result = operation.result(timeout=300) + + # The first result is retrieved because a single video was processed. + annotation_result = result.annotation_results[0] + + for text_annotation in annotation_result.text_annotations: + print("\nText: {}".format(text_annotation.text)) + + # Get the first text segment + text_segment = text_annotation.segments[0] + start_time = text_segment.segment.start_time_offset + end_time = text_segment.segment.end_time_offset + print( + "start_time: {}, end_time: {}".format( + start_time.seconds + start_time.microseconds * 1e-6, + end_time.seconds + end_time.microseconds * 1e-6, + ) + ) + + print("Confidence: {}".format(text_segment.confidence)) + + # Show the result for the first frame in this segment. + frame = text_segment.frames[0] + time_offset = frame.time_offset + print( + "Time offset for the first frame: {}".format( + time_offset.seconds + time_offset.microseconds * 1e-6 + ) + ) + print("Rotated Bounding Box Vertices:") + for vertex in frame.rotated_bounding_box.vertices: + print("\tVertex.x: {}, Vertex.y: {}".format(vertex.x, vertex.y)) + + +# [END video_detect_text] + + +def track_objects_gcs(gcs_uri): + # [START video_object_tracking_gcs] + """Object tracking in a video stored on GCS.""" + from google.cloud import videointelligence + + video_client = videointelligence.VideoIntelligenceServiceClient() + features = [videointelligence.Feature.OBJECT_TRACKING] + operation = video_client.annotate_video( + request={"features": features, "input_uri": gcs_uri} + ) + print("\nProcessing video for object annotations.") + + result = operation.result(timeout=500) + print("\nFinished processing.\n") + + # The first result is retrieved because a single video was processed. + object_annotations = result.annotation_results[0].object_annotations + + for object_annotation in object_annotations: + print("Entity description: {}".format(object_annotation.entity.description)) + if object_annotation.entity.entity_id: + print("Entity id: {}".format(object_annotation.entity.entity_id)) + + print( + "Segment: {}s to {}s".format( + object_annotation.segment.start_time_offset.seconds + + object_annotation.segment.start_time_offset.microseconds / 1e6, + object_annotation.segment.end_time_offset.seconds + + object_annotation.segment.end_time_offset.microseconds / 1e6, + ) + ) + + print("Confidence: {}".format(object_annotation.confidence)) + + # Here we print only the bounding box of the first frame in the segment + frame = object_annotation.frames[0] + box = frame.normalized_bounding_box + print( + "Time offset of the first frame: {}s".format( + frame.time_offset.seconds + frame.time_offset.microseconds / 1e6 + ) + ) + print("Bounding box position:") + print("\tleft : {}".format(box.left)) + print("\ttop : {}".format(box.top)) + print("\tright : {}".format(box.right)) + print("\tbottom: {}".format(box.bottom)) + print("\n") + # [END video_object_tracking_gcs] + + +def track_objects(path): + # [START video_object_tracking] + """Object tracking in a local video.""" + from google.cloud import videointelligence + + video_client = videointelligence.VideoIntelligenceServiceClient() + features = [videointelligence.Feature.OBJECT_TRACKING] + + with io.open(path, "rb") as file: + input_content = file.read() + + operation = video_client.annotate_video( + request={"features": features, "input_content": input_content} + ) + print("\nProcessing video for object annotations.") + + result = operation.result(timeout=500) + print("\nFinished processing.\n") + + # The first result is retrieved because a single video was processed. + object_annotations = result.annotation_results[0].object_annotations + + # Get only the first annotation for demo purposes. + object_annotation = object_annotations[0] + print("Entity description: {}".format(object_annotation.entity.description)) + if object_annotation.entity.entity_id: + print("Entity id: {}".format(object_annotation.entity.entity_id)) + + print( + "Segment: {}s to {}s".format( + object_annotation.segment.start_time_offset.seconds + + object_annotation.segment.start_time_offset.microseconds / 1e6, + object_annotation.segment.end_time_offset.seconds + + object_annotation.segment.end_time_offset.microseconds / 1e6, + ) + ) + + print("Confidence: {}".format(object_annotation.confidence)) + + # Here we print only the bounding box of the first frame in this segment + frame = object_annotation.frames[0] + box = frame.normalized_bounding_box + print( + "Time offset of the first frame: {}s".format( + frame.time_offset.seconds + frame.time_offset.microseconds / 1e6 + ) + ) + print("Bounding box position:") + print("\tleft : {}".format(box.left)) + print("\ttop : {}".format(box.top)) + print("\tright : {}".format(box.right)) + print("\tbottom: {}".format(box.bottom)) + print("\n") + # [END video_object_tracking] + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter + ) + subparsers = parser.add_subparsers(dest="command") + + analyze_labels_parser = subparsers.add_parser("labels", help=analyze_labels.__doc__) + analyze_labels_parser.add_argument("path") + + analyze_labels_file_parser = subparsers.add_parser( + "labels_file", help=analyze_labels_file.__doc__ + ) + analyze_labels_file_parser.add_argument("path") + + analyze_explicit_content_parser = subparsers.add_parser( + "explicit_content", help=analyze_explicit_content.__doc__ + ) + analyze_explicit_content_parser.add_argument("path") + + analyze_shots_parser = subparsers.add_parser("shots", help=analyze_shots.__doc__) + analyze_shots_parser.add_argument("path") + + transcribe_speech_parser = subparsers.add_parser( + "transcribe", help=speech_transcription.__doc__ + ) + transcribe_speech_parser.add_argument("path") + + detect_text_parser = subparsers.add_parser( + "text_gcs", help=video_detect_text_gcs.__doc__ + ) + detect_text_parser.add_argument("path") + + detect_text_file_parser = subparsers.add_parser( + "text_file", help=video_detect_text.__doc__ + ) + detect_text_file_parser.add_argument("path") + + tack_objects_parser = subparsers.add_parser( + "objects_gcs", help=track_objects_gcs.__doc__ + ) + tack_objects_parser.add_argument("path") + + tack_objects_file_parser = subparsers.add_parser( + "objects_file", help=track_objects.__doc__ + ) + tack_objects_file_parser.add_argument("path") + + args = parser.parse_args() + + if args.command == "labels": + analyze_labels(args.path) + if args.command == "labels_file": + analyze_labels_file(args.path) + if args.command == "shots": + analyze_shots(args.path) + if args.command == "explicit_content": + analyze_explicit_content(args.path) + if args.command == "transcribe": + speech_transcription(args.path) + if args.command == "text_gcs": + video_detect_text_gcs(args.path) + if args.command == "text_file": + video_detect_text(args.path) + if args.command == "objects_gcs": + track_objects_gcs(args.path) + if args.command == "objects_file": + track_objects(args.path) diff --git a/videointelligence/samples/analyze/analyze_test.py b/videointelligence/samples/analyze/analyze_test.py new file mode 100644 index 000000000000..2632d65d033b --- /dev/null +++ b/videointelligence/samples/analyze/analyze_test.py @@ -0,0 +1,110 @@ +#!/usr/bin/env python + +# Copyright 2017 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import time + +from google.api_core.exceptions import ServiceUnavailable +import pytest + +import analyze + +POSSIBLE_TEXTS = [ + "Google", + "SUR", + "SUR", + "ROTO", + "Vice President", + "58oo9", + "LONDRES", + "OMAR", + "PARIS", + "METRO", + "RUE", + "CARLO", +] + + +def test_analyze_shots(capsys): + analyze.analyze_shots("gs://cloud-samples-data/video/gbikes_dinosaur.mp4") + out, _ = capsys.readouterr() + assert "Shot 1:" in out + + +def test_analyze_labels(capsys): + analyze.analyze_labels("gs://cloud-samples-data/video/cat.mp4") + out, _ = capsys.readouterr() + assert "label description: cat" in out + + +def test_analyze_labels_file(capsys): + analyze.analyze_labels_file("resources/googlework_tiny.mp4") + out, _ = capsys.readouterr() + assert "label description" in out + + +def test_analyze_explicit_content(capsys): + try_count = 0 + while try_count < 3: + try: + analyze.analyze_explicit_content("gs://cloud-samples-data/video/cat.mp4") + out, _ = capsys.readouterr() + assert "pornography" in out + except ServiceUnavailable as e: + # Service is throttling or not available for the moment, sleep for 5 sec and retrying again. + print("Got service unavailable exception: {}".format(str(e))) + time.sleep(5) + continue + try_count = try_count + 1 + break + + +def test_speech_transcription(capsys): + analyze.speech_transcription("gs://cloud-samples-data/video/googlework_short.mp4") + out, _ = capsys.readouterr() + assert "cultural" in out + + +# Flaky timeout +@pytest.mark.flaky(max_runs=3, min_passes=1) +def test_detect_text_gcs(capsys): + analyze.video_detect_text_gcs("gs://cloud-samples-data/video/googlework_tiny.mp4") + out, _ = capsys.readouterr() + assert "Text" in out + + +# Flaky timeout +@pytest.mark.flaky(max_runs=3, min_passes=1) +def test_detect_text(capsys): + analyze.video_detect_text("resources/googlework_tiny.mp4") + out, _ = capsys.readouterr() + assert "Text" in out + + +# Flaky timeout +@pytest.mark.flaky(max_runs=3, min_passes=1) +def test_track_objects_gcs(capsys): + analyze.track_objects_gcs("gs://cloud-samples-data/video/cat.mp4") + out, _ = capsys.readouterr() + assert "cat" in out + + +# Flaky timeout +@pytest.mark.flaky(max_runs=3, min_passes=1) +def test_track_objects(capsys): + in_file = "./resources/googlework_tiny.mp4" + analyze.track_objects(in_file) + out, _ = capsys.readouterr() + assert "Entity id" in out diff --git a/videointelligence/samples/analyze/beta_snippets.py b/videointelligence/samples/analyze/beta_snippets.py new file mode 100644 index 000000000000..83b2a5da06d9 --- /dev/null +++ b/videointelligence/samples/analyze/beta_snippets.py @@ -0,0 +1,916 @@ +#!/usr/bin/env python + +# Copyright 2019 Google LLC. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""This application demonstrates speech transcription using the +Google Cloud API. + +Usage Examples: + python beta_snippets.py transcription \ + gs://python-docs-samples-tests/video/googlework_tiny.mp4 + + python beta_snippets.py video-text-gcs \ + gs://python-docs-samples-tests/video/googlework_tiny.mp4 + + python beta_snippets.py streaming-labels resources/cat.mp4 + + python beta_snippets.py streaming-shot-change resources/cat.mp4 + + python beta_snippets.py streaming-objects resources/cat.mp4 + + python beta_snippets.py streaming-explicit-content resources/cat.mp4 + + python beta_snippets.py streaming-annotation-storage resources/cat.mp4 \ + gs://mybucket/myfolder + + python beta_snippets.py streaming-automl-classification resources/cat.mp4 \ + $PROJECT_ID $MODEL_ID + + python beta_snippets.py streaming-automl-object-tracking resources/cat.mp4 \ + $PROJECT_ID $MODEL_ID + + python beta_snippets.py streaming-automl-action-recognition \ + resources/cat.mp4 $PROJECT_ID $MODEL_ID +""" + +import argparse +import io + + +def speech_transcription(input_uri, timeout=180): + # [START video_speech_transcription_gcs_beta] + """Transcribe speech from a video stored on GCS.""" + from google.cloud import videointelligence_v1p1beta1 as videointelligence + + video_client = videointelligence.VideoIntelligenceServiceClient() + + features = [videointelligence.Feature.SPEECH_TRANSCRIPTION] + + config = videointelligence.SpeechTranscriptionConfig( + language_code="en-US", enable_automatic_punctuation=True + ) + video_context = videointelligence.VideoContext(speech_transcription_config=config) + + operation = video_client.annotate_video( + request={ + "features": features, + "input_uri": input_uri, + "video_context": video_context, + } + ) + + print("\nProcessing video for speech transcription.") + + result = operation.result(timeout) + + # There is only one annotation_result since only + # one video is processed. + annotation_results = result.annotation_results[0] + for speech_transcription in annotation_results.speech_transcriptions: + # The number of alternatives for each transcription is limited by + # SpeechTranscriptionConfig.max_alternatives. + # Each alternative is a different possible transcription + # and has its own confidence score. + for alternative in speech_transcription.alternatives: + print("Alternative level information:") + + print("Transcript: {}".format(alternative.transcript)) + print("Confidence: {}\n".format(alternative.confidence)) + + print("Word level information:") + for word_info in alternative.words: + word = word_info.word + start_time = word_info.start_time + end_time = word_info.end_time + print( + "\t{}s - {}s: {}".format( + start_time.seconds + start_time.microseconds * 1e-6, + end_time.seconds + end_time.microseconds * 1e-6, + word, + ) + ) + # [END video_speech_transcription_gcs_beta] + + +def video_detect_text_gcs(input_uri): + # [START video_detect_text_gcs_beta] + """Detect text in a video stored on GCS.""" + from google.cloud import videointelligence_v1p2beta1 as videointelligence + + video_client = videointelligence.VideoIntelligenceServiceClient() + features = [videointelligence.Feature.TEXT_DETECTION] + + operation = video_client.annotate_video( + request={"features": features, "input_uri": input_uri} + ) + + print("\nProcessing video for text detection.") + result = operation.result(timeout=300) + + # The first result is retrieved because a single video was processed. + annotation_result = result.annotation_results[0] + + # Get only the first result + text_annotation = annotation_result.text_annotations[0] + print("\nText: {}".format(text_annotation.text)) + + # Get the first text segment + text_segment = text_annotation.segments[0] + start_time = text_segment.segment.start_time_offset + end_time = text_segment.segment.end_time_offset + print( + "start_time: {}, end_time: {}".format( + start_time.seconds + start_time.microseconds * 1e-6, + end_time.seconds + end_time.microseconds * 1e-6, + ) + ) + + print("Confidence: {}".format(text_segment.confidence)) + + # Show the result for the first frame in this segment. + frame = text_segment.frames[0] + time_offset = frame.time_offset + print( + "Time offset for the first frame: {}".format( + time_offset.seconds + time_offset.microseconds * 1e-6 + ) + ) + print("Rotated Bounding Box Vertices:") + for vertex in frame.rotated_bounding_box.vertices: + print("\tVertex.x: {}, Vertex.y: {}".format(vertex.x, vertex.y)) + # [END video_detect_text_gcs_beta] + return annotation_result.text_annotations + + +def video_detect_text(path): + # [START video_detect_text_beta] + """Detect text in a local video.""" + from google.cloud import videointelligence_v1p2beta1 as videointelligence + + video_client = videointelligence.VideoIntelligenceServiceClient() + features = [videointelligence.Feature.TEXT_DETECTION] + video_context = videointelligence.VideoContext() + + with io.open(path, "rb") as file: + input_content = file.read() + + operation = video_client.annotate_video( + request={ + "features": features, + "input_content": input_content, + "video_context": video_context, + } + ) + + print("\nProcessing video for text detection.") + result = operation.result(timeout=300) + + # The first result is retrieved because a single video was processed. + annotation_result = result.annotation_results[0] + + # Get only the first result + text_annotation = annotation_result.text_annotations[0] + print("\nText: {}".format(text_annotation.text)) + + # Get the first text segment + text_segment = text_annotation.segments[0] + start_time = text_segment.segment.start_time_offset + end_time = text_segment.segment.end_time_offset + print( + "start_time: {}, end_time: {}".format( + start_time.seconds + start_time.microseconds * 1e-6, + end_time.seconds + end_time.microseconds * 1e-6, + ) + ) + + print("Confidence: {}".format(text_segment.confidence)) + + # Show the result for the first frame in this segment. + frame = text_segment.frames[0] + time_offset = frame.time_offset + print( + "Time offset for the first frame: {}".format( + time_offset.seconds + time_offset.microseconds * 1e-6 + ) + ) + print("Rotated Bounding Box Vertices:") + for vertex in frame.rotated_bounding_box.vertices: + print("\tVertex.x: {}, Vertex.y: {}".format(vertex.x, vertex.y)) + # [END video_detect_text_beta] + return annotation_result.text_annotations + + +def detect_labels_streaming(path): + # [START video_streaming_label_detection_beta] + from google.cloud import videointelligence_v1p3beta1 as videointelligence + + # path = 'path_to_file' + + client = videointelligence.StreamingVideoIntelligenceServiceClient() + + # Set streaming config. + config = videointelligence.StreamingVideoConfig( + feature=(videointelligence.StreamingFeature.STREAMING_LABEL_DETECTION) + ) + + # config_request should be the first in the stream of requests. + config_request = videointelligence.StreamingAnnotateVideoRequest( + video_config=config + ) + + # Set the chunk size to 5MB (recommended less than 10MB). + chunk_size = 5 * 1024 * 1024 + + # Load file content. + stream = [] + with io.open(path, "rb") as video_file: + while True: + data = video_file.read(chunk_size) + if not data: + break + stream.append(data) + + def stream_generator(): + yield config_request + for chunk in stream: + yield videointelligence.StreamingAnnotateVideoRequest(input_content=chunk) + + requests = stream_generator() + + # streaming_annotate_video returns a generator. + # The default timeout is about 300 seconds. + # To process longer videos it should be set to + # larger than the length (in seconds) of the stream. + responses = client.streaming_annotate_video(requests, timeout=600) + + # Each response corresponds to about 1 second of video. + for response in responses: + # Check for errors. + if response.error.message: + print(response.error.message) + break + + label_annotations = response.annotation_results.label_annotations + + # label_annotations could be empty + if not label_annotations: + continue + + for annotation in label_annotations: + # Each annotation has one frame, which has a timeoffset. + frame = annotation.frames[0] + time_offset = ( + frame.time_offset.seconds + frame.time_offset.microseconds / 1e6 + ) + + description = annotation.entity.description + confidence = annotation.frames[0].confidence + # description is in Unicode + print( + "{}s: {} (confidence: {})".format(time_offset, description, confidence) + ) + # [END video_streaming_label_detection_beta] + + +def detect_shot_change_streaming(path): + # [START video_streaming_shot_change_detection_beta] + from google.cloud import videointelligence_v1p3beta1 as videointelligence + + # path = 'path_to_file' + + client = videointelligence.StreamingVideoIntelligenceServiceClient() + + # Set streaming config. + config = videointelligence.StreamingVideoConfig( + feature=(videointelligence.StreamingFeature.STREAMING_SHOT_CHANGE_DETECTION) + ) + + # config_request should be the first in the stream of requests. + config_request = videointelligence.StreamingAnnotateVideoRequest( + video_config=config + ) + + # Set the chunk size to 5MB (recommended less than 10MB). + chunk_size = 5 * 1024 * 1024 + + # Load file content. + stream = [] + with io.open(path, "rb") as video_file: + while True: + data = video_file.read(chunk_size) + if not data: + break + stream.append(data) + + def stream_generator(): + yield config_request + for chunk in stream: + yield videointelligence.StreamingAnnotateVideoRequest(input_content=chunk) + + requests = stream_generator() + + # streaming_annotate_video returns a generator. + # The default timeout is about 300 seconds. + # To process longer videos it should be set to + # larger than the length (in seconds) of the stream. + responses = client.streaming_annotate_video(requests, timeout=600) + + # Each response corresponds to about 1 second of video. + for response in responses: + # Check for errors. + if response.error.message: + print(response.error.message) + break + + for annotation in response.annotation_results.shot_annotations: + start = ( + annotation.start_time_offset.seconds + + annotation.start_time_offset.microseconds / 1e6 + ) + end = ( + annotation.end_time_offset.seconds + + annotation.end_time_offset.microseconds / 1e6 + ) + + print("Shot: {}s to {}s".format(start, end)) + # [END video_streaming_shot_change_detection_beta] + + +def track_objects_streaming(path): + # [START video_streaming_object_tracking_beta] + from google.cloud import videointelligence_v1p3beta1 as videointelligence + + # path = 'path_to_file' + + client = videointelligence.StreamingVideoIntelligenceServiceClient() + + # Set streaming config. + config = videointelligence.StreamingVideoConfig( + feature=(videointelligence.StreamingFeature.STREAMING_OBJECT_TRACKING) + ) + + # config_request should be the first in the stream of requests. + config_request = videointelligence.StreamingAnnotateVideoRequest( + video_config=config + ) + + # Set the chunk size to 5MB (recommended less than 10MB). + chunk_size = 5 * 1024 * 1024 + + # Load file content. + stream = [] + with io.open(path, "rb") as video_file: + while True: + data = video_file.read(chunk_size) + if not data: + break + stream.append(data) + + def stream_generator(): + yield config_request + for chunk in stream: + yield videointelligence.StreamingAnnotateVideoRequest(input_content=chunk) + + requests = stream_generator() + + # streaming_annotate_video returns a generator. + # The default timeout is about 300 seconds. + # To process longer videos it should be set to + # larger than the length (in seconds) of the stream. + responses = client.streaming_annotate_video(requests, timeout=900) + + # Each response corresponds to about 1 second of video. + for response in responses: + # Check for errors. + if response.error.message: + print(response.error.message) + break + + object_annotations = response.annotation_results.object_annotations + + # object_annotations could be empty + if not object_annotations: + continue + + for annotation in object_annotations: + # Each annotation has one frame, which has a timeoffset. + frame = annotation.frames[0] + time_offset = ( + frame.time_offset.seconds + frame.time_offset.microseconds / 1e6 + ) + + description = annotation.entity.description + confidence = annotation.confidence + + # track_id tracks the same object in the video. + track_id = annotation.track_id + + # description is in Unicode + print("{}s".format(time_offset)) + print("\tEntity description: {}".format(description)) + print("\tTrack Id: {}".format(track_id)) + if annotation.entity.entity_id: + print("\tEntity id: {}".format(annotation.entity.entity_id)) + + print("\tConfidence: {}".format(confidence)) + + # Every annotation has only one frame + frame = annotation.frames[0] + box = frame.normalized_bounding_box + print("\tBounding box position:") + print("\tleft : {}".format(box.left)) + print("\ttop : {}".format(box.top)) + print("\tright : {}".format(box.right)) + print("\tbottom: {}\n".format(box.bottom)) + # [END video_streaming_object_tracking_beta] + + +def detect_explicit_content_streaming(path): + # [START video_streaming_explicit_content_detection_beta] + from google.cloud import videointelligence_v1p3beta1 as videointelligence + + # path = 'path_to_file' + + client = videointelligence.StreamingVideoIntelligenceServiceClient() + + # Set streaming config. + config = videointelligence.StreamingVideoConfig( + feature=( + videointelligence.StreamingFeature.STREAMING_EXPLICIT_CONTENT_DETECTION + ) + ) + + # config_request should be the first in the stream of requests. + config_request = videointelligence.StreamingAnnotateVideoRequest( + video_config=config + ) + + # Set the chunk size to 5MB (recommended less than 10MB). + chunk_size = 5 * 1024 * 1024 + + # Load file content. + stream = [] + with io.open(path, "rb") as video_file: + while True: + data = video_file.read(chunk_size) + if not data: + break + stream.append(data) + + def stream_generator(): + yield config_request + for chunk in stream: + yield videointelligence.StreamingAnnotateVideoRequest(input_content=chunk) + + requests = stream_generator() + + # streaming_annotate_video returns a generator. + # The default timeout is about 300 seconds. + # To process longer videos it should be set to + # larger than the length (in seconds) of the stream. + responses = client.streaming_annotate_video(requests, timeout=900) + + # Each response corresponds to about 1 second of video. + for response in responses: + # Check for errors. + if response.error.message: + print(response.error.message) + break + + for frame in response.annotation_results.explicit_annotation.frames: + time_offset = ( + frame.time_offset.seconds + frame.time_offset.microseconds / 1e6 + ) + pornography_likelihood = videointelligence.Likelihood( + frame.pornography_likelihood + ) + + print("Time: {}s".format(time_offset)) + print("\tpornogaphy: {}".format(pornography_likelihood.name)) + # [END video_streaming_explicit_content_detection_beta] + + +def annotation_to_storage_streaming(path, output_uri): + # [START video_streaming_annotation_to_storage_beta] + from google.cloud import videointelligence_v1p3beta1 as videointelligence + + # path = 'path_to_file' + # output_uri = 'gs://path_to_output' + + client = videointelligence.StreamingVideoIntelligenceServiceClient() + + # Set streaming config specifying the output_uri. + # The output_uri is the prefix of the actual output files. + storage_config = videointelligence.StreamingStorageConfig( + enable_storage_annotation_result=True, + annotation_result_storage_directory=output_uri, + ) + # Here we use label detection as an example. + # All features support output to GCS. + config = videointelligence.StreamingVideoConfig( + feature=(videointelligence.StreamingFeature.STREAMING_LABEL_DETECTION), + storage_config=storage_config, + ) + + # config_request should be the first in the stream of requests. + config_request = videointelligence.StreamingAnnotateVideoRequest( + video_config=config + ) + + # Set the chunk size to 5MB (recommended less than 10MB). + chunk_size = 5 * 1024 * 1024 + + # Load file content. + stream = [] + with io.open(path, "rb") as video_file: + while True: + data = video_file.read(chunk_size) + if not data: + break + stream.append(data) + + def stream_generator(): + yield config_request + for chunk in stream: + yield videointelligence.StreamingAnnotateVideoRequest(input_content=chunk) + + requests = stream_generator() + + # streaming_annotate_video returns a generator. + # The default timeout is about 300 seconds. + # To process longer videos it should be set to + # larger than the length (in seconds) of the stream. + responses = client.streaming_annotate_video(requests, timeout=600) + + for response in responses: + # Check for errors. + if response.error.message: + print(response.error.message) + break + + print("Storage URI: {}".format(response.annotation_results_uri)) + # [END video_streaming_annotation_to_storage_beta] + + +def streaming_automl_classification(path, project_id, model_id): + # [START video_streaming_automl_classification_beta] + import io + + from google.cloud import videointelligence_v1p3beta1 as videointelligence + + # path = 'path_to_file' + # project_id = 'gcp_project_id' + # model_id = 'automl_classification_model_id' + + client = videointelligence.StreamingVideoIntelligenceServiceClient() + + model_path = "projects/{}/locations/us-central1/models/{}".format( + project_id, model_id + ) + + # Here we use classification as an example. + automl_config = videointelligence.StreamingAutomlClassificationConfig( + model_name=model_path + ) + + video_config = videointelligence.StreamingVideoConfig( + feature=videointelligence.StreamingFeature.STREAMING_AUTOML_CLASSIFICATION, + automl_classification_config=automl_config, + ) + + # config_request should be the first in the stream of requests. + config_request = videointelligence.StreamingAnnotateVideoRequest( + video_config=video_config + ) + + # Set the chunk size to 5MB (recommended less than 10MB). + chunk_size = 5 * 1024 * 1024 + + # Load file content. + # Note: Input videos must have supported video codecs. See + # https://cloud.google.com/video-intelligence/docs/streaming/streaming#supported_video_codecs + # for more details. + stream = [] + with io.open(path, "rb") as video_file: + while True: + data = video_file.read(chunk_size) + if not data: + break + stream.append(data) + + def stream_generator(): + yield config_request + for chunk in stream: + yield videointelligence.StreamingAnnotateVideoRequest(input_content=chunk) + + requests = stream_generator() + + # streaming_annotate_video returns a generator. + # The default timeout is about 300 seconds. + # To process longer videos it should be set to + # larger than the length (in seconds) of the stream. + responses = client.streaming_annotate_video(requests, timeout=600) + + for response in responses: + # Check for errors. + if response.error.message: + print(response.error.message) + break + + for label in response.annotation_results.label_annotations: + for frame in label.frames: + print( + "At {:3d}s segment, {:5.1%} {}".format( + frame.time_offset.seconds, + frame.confidence, + label.entity.entity_id, + ) + ) + # [END video_streaming_automl_classification_beta] + + +def streaming_automl_object_tracking(path, project_id, model_id): + # [START video_streaming_automl_object_tracking_beta] + import io + + from google.cloud import videointelligence_v1p3beta1 as videointelligence + + # path = 'path_to_file' + # project_id = 'project_id' + # model_id = 'automl_object_tracking_model_id' + + client = videointelligence.StreamingVideoIntelligenceServiceClient() + + model_path = "projects/{}/locations/us-central1/models/{}".format( + project_id, model_id + ) + + automl_config = videointelligence.StreamingAutomlObjectTrackingConfig( + model_name=model_path + ) + + video_config = videointelligence.StreamingVideoConfig( + feature=videointelligence.StreamingFeature.STREAMING_AUTOML_OBJECT_TRACKING, + automl_object_tracking_config=automl_config, + ) + + # config_request should be the first in the stream of requests. + config_request = videointelligence.StreamingAnnotateVideoRequest( + video_config=video_config + ) + + # Set the chunk size to 5MB (recommended less than 10MB). + chunk_size = 5 * 1024 * 1024 + + # Load file content. + # Note: Input videos must have supported video codecs. See + # https://cloud.google.com/video-intelligence/docs/streaming/streaming#supported_video_codecs + # for more details. + stream = [] + with io.open(path, "rb") as video_file: + while True: + data = video_file.read(chunk_size) + if not data: + break + stream.append(data) + + def stream_generator(): + yield config_request + for chunk in stream: + yield videointelligence.StreamingAnnotateVideoRequest(input_content=chunk) + + requests = stream_generator() + + # streaming_annotate_video returns a generator. + # The default timeout is about 300 seconds. + # To process longer videos it should be set to + # larger than the length (in seconds) of the stream. + responses = client.streaming_annotate_video(requests, timeout=900) + + # Each response corresponds to about 1 second of video. + for response in responses: + # Check for errors. + if response.error.message: + print(response.error.message) + break + + object_annotations = response.annotation_results.object_annotations + + # object_annotations could be empty + if not object_annotations: + continue + + for annotation in object_annotations: + # Each annotation has one frame, which has a timeoffset. + frame = annotation.frames[0] + time_offset = ( + frame.time_offset.seconds + frame.time_offset.microseconds / 1e6 + ) + + description = annotation.entity.description + confidence = annotation.confidence + + # track_id tracks the same object in the video. + track_id = annotation.track_id + + # description is in Unicode + print("{}s".format(time_offset)) + print("\tEntity description: {}".format(description)) + print("\tTrack Id: {}".format(track_id)) + if annotation.entity.entity_id: + print("\tEntity id: {}".format(annotation.entity.entity_id)) + + print("\tConfidence: {}".format(confidence)) + + # Every annotation has only one frame + frame = annotation.frames[0] + box = frame.normalized_bounding_box + print("\tBounding box position:") + print("\tleft : {}".format(box.left)) + print("\ttop : {}".format(box.top)) + print("\tright : {}".format(box.right)) + print("\tbottom: {}\n".format(box.bottom)) + # [END video_streaming_automl_object_tracking_beta] + + +def streaming_automl_action_recognition(path, project_id, model_id): + # [START video_streaming_automl_action_recognition_beta] + import io + + from google.cloud import videointelligence_v1p3beta1 as videointelligence + + # path = 'path_to_file' + # project_id = 'project_id' + # model_id = 'automl_action_recognition_model_id' + + client = videointelligence.StreamingVideoIntelligenceServiceClient() + + model_path = "projects/{}/locations/us-central1/models/{}".format( + project_id, model_id + ) + + automl_config = videointelligence.StreamingAutomlActionRecognitionConfig( + model_name=model_path + ) + + video_config = videointelligence.StreamingVideoConfig( + feature=videointelligence.StreamingFeature.STREAMING_AUTOML_ACTION_RECOGNITION, + automl_action_recognition_config=automl_config, + ) + + # config_request should be the first in the stream of requests. + config_request = videointelligence.StreamingAnnotateVideoRequest( + video_config=video_config + ) + + # Set the chunk size to 5MB (recommended less than 10MB). + chunk_size = 5 * 1024 * 1024 + + def stream_generator(): + yield config_request + # Load file content. + # Note: Input videos must have supported video codecs. See + # https://cloud.google.com/video-intelligence/docs/streaming/streaming#supported_video_codecs + # for more details. + with io.open(path, "rb") as video_file: + while True: + data = video_file.read(chunk_size) + if not data: + break + yield videointelligence.StreamingAnnotateVideoRequest( + input_content=data + ) + + requests = stream_generator() + + # streaming_annotate_video returns a generator. + # The default timeout is about 300 seconds. + # To process longer videos it should be set to + # larger than the length (in seconds) of the video. + responses = client.streaming_annotate_video(requests, timeout=900) + + # Each response corresponds to about 1 second of video. + for response in responses: + # Check for errors. + if response.error.message: + print(response.error.message) + break + + for label in response.annotation_results.label_annotations: + for frame in label.frames: + print( + "At {:3d}s segment, {:5.1%} {}".format( + frame.time_offset.seconds, + frame.confidence, + label.entity.entity_id, + ) + ) + # [END video_streaming_automl_action_recognition_beta] + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter + ) + subparsers = parser.add_subparsers(dest="command") + + speech_transcription_parser = subparsers.add_parser( + "transcription", help=speech_transcription.__doc__ + ) + speech_transcription_parser.add_argument("gcs_uri") + + video_text_gcs_parser = subparsers.add_parser( + "video-text-gcs", help=video_detect_text_gcs.__doc__ + ) + video_text_gcs_parser.add_argument("gcs_uri") + + video_text_parser = subparsers.add_parser( + "video-text", help=video_detect_text.__doc__ + ) + video_text_parser.add_argument("path") + + video_streaming_labels_parser = subparsers.add_parser( + "streaming-labels", help=detect_labels_streaming.__doc__ + ) + video_streaming_labels_parser.add_argument("path") + + video_streaming_shot_change_parser = subparsers.add_parser( + "streaming-shot-change", help=detect_shot_change_streaming.__doc__ + ) + video_streaming_shot_change_parser.add_argument("path") + + video_streaming_objects_parser = subparsers.add_parser( + "streaming-objects", help=track_objects_streaming.__doc__ + ) + video_streaming_objects_parser.add_argument("path") + + video_streaming_explicit_content_parser = subparsers.add_parser( + "streaming-explicit-content", help=detect_explicit_content_streaming.__doc__ + ) + video_streaming_explicit_content_parser.add_argument("path") + + video_streaming_annotation_to_storage_parser = subparsers.add_parser( + "streaming-annotation-storage", help=annotation_to_storage_streaming.__doc__ + ) + video_streaming_annotation_to_storage_parser.add_argument("path") + video_streaming_annotation_to_storage_parser.add_argument("output_uri") + + video_streaming_automl_classification_parser = subparsers.add_parser( + "streaming-automl-classification", help=streaming_automl_classification.__doc__ + ) + video_streaming_automl_classification_parser.add_argument("path") + video_streaming_automl_classification_parser.add_argument("project_id") + video_streaming_automl_classification_parser.add_argument("model_id") + + video_streaming_automl_object_tracking_parser = subparsers.add_parser( + "streaming-automl-object-tracking", + help=streaming_automl_object_tracking.__doc__, + ) + video_streaming_automl_object_tracking_parser.add_argument("path") + video_streaming_automl_object_tracking_parser.add_argument("project_id") + video_streaming_automl_object_tracking_parser.add_argument("model_id") + + video_streaming_automl_action_recognition_parser = subparsers.add_parser( + "streaming-automl-action-recognition", + help=streaming_automl_action_recognition.__doc__, + ) + video_streaming_automl_action_recognition_parser.add_argument("path") + video_streaming_automl_action_recognition_parser.add_argument("project_id") + video_streaming_automl_action_recognition_parser.add_argument("model_id") + + args = parser.parse_args() + + if args.command == "transcription": + speech_transcription(args.gcs_uri) + elif args.command == "video-text-gcs": + video_detect_text_gcs(args.gcs_uri) + elif args.command == "video-text": + video_detect_text(args.path) + elif args.command == "streaming-labels": + detect_labels_streaming(args.path) + elif args.command == "streaming-shot-change": + detect_shot_change_streaming(args.path) + elif args.command == "streaming-objects": + track_objects_streaming(args.path) + elif args.command == "streaming-explicit-content": + detect_explicit_content_streaming(args.path) + elif args.command == "streaming-annotation-storage": + annotation_to_storage_streaming(args.path, args.output_uri) + elif args.command == "streaming-automl-classification": + streaming_automl_classification(args.path, args.project_id, args.model_id) + elif args.command == "streaming-automl-object-tracking": + streaming_automl_object_tracking(args.path, args.project_id, args.model_id) + elif args.command == "streaming-automl-action-recognition": + streaming_automl_action_recognition(args.path, args.project_id, args.model_id) diff --git a/videointelligence/samples/analyze/beta_snippets_test.py b/videointelligence/samples/analyze/beta_snippets_test.py new file mode 100644 index 000000000000..6749f1ebc747 --- /dev/null +++ b/videointelligence/samples/analyze/beta_snippets_test.py @@ -0,0 +1,166 @@ +#!/usr/bin/env python + +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from urllib.request import urlopen +import uuid + +import backoff +from google.api_core.exceptions import Conflict +from google.cloud import storage +import pytest + +import beta_snippets + +POSSIBLE_TEXTS = [ + "Google", + "SUR", + "SUR", + "ROTO", + "Vice President", + "58oo9", + "LONDRES", + "OMAR", + "PARIS", + "METRO", + "RUE", + "CARLO", +] + + +@pytest.fixture(scope="session") +def video_path(tmpdir_factory): + file = urlopen("http://storage.googleapis.com/cloud-samples-data/video/cat.mp4") + path = tmpdir_factory.mktemp("video").join("file.mp4") + with open(str(path), "wb") as f: + f.write(file.read()) + + return str(path) + + +@pytest.fixture(scope="function") +def bucket(): + # Create a temporaty bucket to store annotation output. + bucket_name = f"tmp-{uuid.uuid4().hex}" + storage_client = storage.Client() + bucket = storage_client.create_bucket(bucket_name) + + yield bucket + + # Teardown. We're occasionally seeing 409 conflict errors. + # Retrying upon 409s. + @backoff.on_exception(backoff.expo, Conflict, max_time=120) + def delete_bucket(): + bucket.delete(force=True) + + delete_bucket() + + +def test_speech_transcription(capsys): + beta_snippets.speech_transcription( + "gs://python-docs-samples-tests/video/googlework_short.mp4", timeout=240 + ) + out, _ = capsys.readouterr() + assert "cultural" in out + + +@pytest.mark.flaky(max_runs=3, min_passes=1) +def test_detect_labels_streaming(capsys, video_path): + beta_snippets.detect_labels_streaming(video_path) + + out, _ = capsys.readouterr() + assert "cat" in out + + +def test_detect_shot_change_streaming(capsys, video_path): + beta_snippets.detect_shot_change_streaming(video_path) + + out, _ = capsys.readouterr() + assert "Shot" in out + + +# Flaky ServiceUnavailable +@pytest.mark.flaky(max_runs=3, min_passes=1) +def test_track_objects_streaming(capsys, video_path): + beta_snippets.track_objects_streaming(video_path) + + out, _ = capsys.readouterr() + assert "cat" in out + + +@pytest.mark.flaky(max_runs=3, min_passes=1) +def test_detect_explicit_content_streaming(capsys, video_path): + beta_snippets.detect_explicit_content_streaming(video_path) + + out, _ = capsys.readouterr() + assert "Time" in out + + +@pytest.mark.flaky(max_runs=3, min_passes=1) +def test_annotation_to_storage_streaming(capsys, video_path, bucket): + output_uri = "gs://{}".format(bucket.name) + beta_snippets.annotation_to_storage_streaming(video_path, output_uri) + + out, _ = capsys.readouterr() + assert "Storage" in out + + +# Flaky timeout +@pytest.mark.flaky(max_runs=3, min_passes=1) +def test_detect_text(capsys): + in_file = "./resources/googlework_tiny.mp4" + beta_snippets.video_detect_text(in_file) + out, _ = capsys.readouterr() + assert "Text" in out + + +# Flaky timeout +@pytest.mark.flaky(max_runs=3, min_passes=1) +def test_detect_text_gcs(capsys): + in_file = "gs://python-docs-samples-tests/video/googlework_tiny.mp4" + beta_snippets.video_detect_text_gcs(in_file) + out, _ = capsys.readouterr() + assert "Text" in out + + +# Flaky Gateway +@pytest.mark.flaky(max_runs=3, min_passes=1) +def test_streaming_automl_classification(capsys, video_path): + project_id = os.environ["GOOGLE_CLOUD_PROJECT"] + model_id = "VCN6363999689846554624" + beta_snippets.streaming_automl_classification(video_path, project_id, model_id) + out, _ = capsys.readouterr() + assert "brush_hair" in out + + +# Flaky Gateway +@pytest.mark.flaky(max_runs=3, min_passes=1) +def test_streaming_automl_object_tracking(capsys, video_path): + project_id = os.environ["GOOGLE_CLOUD_PROJECT"] + model_id = "VOT282620667826798592" + beta_snippets.streaming_automl_object_tracking(video_path, project_id, model_id) + out, _ = capsys.readouterr() + assert "Track Id" in out + + +# Flaky Gateway +@pytest.mark.flaky(max_runs=3, min_passes=1) +def test_streaming_automl_action_recognition(capsys, video_path): + project_id = os.environ["GOOGLE_CLOUD_PROJECT"] + model_id = "6652522536091254784" + beta_snippets.streaming_automl_action_recognition(video_path, project_id, model_id) + out, _ = capsys.readouterr() + assert "segment" in out diff --git a/videointelligence/samples/analyze/requirements-test.txt b/videointelligence/samples/analyze/requirements-test.txt new file mode 100644 index 000000000000..62437e83962d --- /dev/null +++ b/videointelligence/samples/analyze/requirements-test.txt @@ -0,0 +1,3 @@ +backoff==2.2.1 +pytest==7.4.0 +flaky==3.7.0 diff --git a/videointelligence/samples/analyze/requirements.txt b/videointelligence/samples/analyze/requirements.txt new file mode 100644 index 000000000000..2d5d5acc4143 --- /dev/null +++ b/videointelligence/samples/analyze/requirements.txt @@ -0,0 +1,2 @@ +google-cloud-videointelligence==2.11.3 +google-cloud-storage==2.10.0 diff --git a/videointelligence/samples/analyze/resources/README.md b/videointelligence/samples/analyze/resources/README.md new file mode 100644 index 000000000000..1acbef1484a9 --- /dev/null +++ b/videointelligence/samples/analyze/resources/README.md @@ -0,0 +1,17 @@ +# Resources folder for local files + +[![Open in Cloud Shell][shell_img]][shell_link] + +[shell_img]: http://gstatic.com/cloudssh/images/open-btn.png +[shell_link]: https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/GoogleCloudPlatform/python-docs-samples&page=editor&open_in_editor=video/cloud-client/analyze/resources/README.md + +Copy from Google Cloud Storage to this folder for testing video analysis +of local files. For `cat.mp4` used in the usage example, run the following +`gcloud` command. + + gsutil cp gs://cloud-samples-data/video/cat.mp4 . + +Now, when you run the following command, the video used for label detection +will be passed from here: + + python analyze.py labels_file resources/cat.mp4 diff --git a/videointelligence/samples/analyze/resources/cat.mp4 b/videointelligence/samples/analyze/resources/cat.mp4 new file mode 100644 index 000000000000..0e071b9ec678 Binary files /dev/null and b/videointelligence/samples/analyze/resources/cat.mp4 differ diff --git a/videointelligence/samples/analyze/resources/googlework_short.mp4 b/videointelligence/samples/analyze/resources/googlework_short.mp4 new file mode 100644 index 000000000000..30af418a6c5e Binary files /dev/null and b/videointelligence/samples/analyze/resources/googlework_short.mp4 differ diff --git a/videointelligence/samples/analyze/resources/googlework_tiny.mp4 b/videointelligence/samples/analyze/resources/googlework_tiny.mp4 new file mode 100644 index 000000000000..7c6cbc31f1b3 Binary files /dev/null and b/videointelligence/samples/analyze/resources/googlework_tiny.mp4 differ diff --git a/videointelligence/samples/analyze/video_detect_faces.py b/videointelligence/samples/analyze/video_detect_faces.py new file mode 100644 index 000000000000..92c9d12f8fe3 --- /dev/null +++ b/videointelligence/samples/analyze/video_detect_faces.py @@ -0,0 +1,85 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# [START video_detect_faces] +import io + +from google.cloud import videointelligence_v1 as videointelligence + + +def detect_faces(local_file_path="path/to/your/video-file.mp4"): + """Detects faces in a video from a local file.""" + + client = videointelligence.VideoIntelligenceServiceClient() + + with io.open(local_file_path, "rb") as f: + input_content = f.read() + + # Configure the request + config = videointelligence.FaceDetectionConfig( + include_bounding_boxes=True, include_attributes=True + ) + context = videointelligence.VideoContext(face_detection_config=config) + + # Start the asynchronous request + operation = client.annotate_video( + request={ + "features": [videointelligence.Feature.FACE_DETECTION], + "input_content": input_content, + "video_context": context, + } + ) + + print("\nProcessing video for face detection annotations.") + result = operation.result(timeout=300) + + print("\nFinished processing.\n") + + # Retrieve the first result, because a single video was processed. + annotation_result = result.annotation_results[0] + + for annotation in annotation_result.face_detection_annotations: + print("Face detected:") + for track in annotation.tracks: + print( + "Segment: {}s to {}s".format( + track.segment.start_time_offset.seconds + + track.segment.start_time_offset.microseconds / 1e6, + track.segment.end_time_offset.seconds + + track.segment.end_time_offset.microseconds / 1e6, + ) + ) + + # Each segment includes timestamped faces that include + # characteristics of the face detected. + # Grab the first timestamped face + timestamped_object = track.timestamped_objects[0] + box = timestamped_object.normalized_bounding_box + print("Bounding box:") + print("\tleft : {}".format(box.left)) + print("\ttop : {}".format(box.top)) + print("\tright : {}".format(box.right)) + print("\tbottom: {}".format(box.bottom)) + + # Attributes include glasses, headwear, smiling, direction of gaze + print("Attributes:") + for attribute in timestamped_object.attributes: + print( + "\t{}:{} {}".format( + attribute.name, attribute.value, attribute.confidence + ) + ) + + +# [END video_detect_faces] diff --git a/videointelligence/samples/analyze/video_detect_faces_gcs.py b/videointelligence/samples/analyze/video_detect_faces_gcs.py new file mode 100644 index 000000000000..20e59bfea457 --- /dev/null +++ b/videointelligence/samples/analyze/video_detect_faces_gcs.py @@ -0,0 +1,80 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# [START video_detect_faces_gcs] +from google.cloud import videointelligence_v1 as videointelligence + + +def detect_faces(gcs_uri="gs://YOUR_BUCKET_ID/path/to/your/video.mp4"): + """Detects faces in a video.""" + + client = videointelligence.VideoIntelligenceServiceClient() + + # Configure the request + config = videointelligence.FaceDetectionConfig( + include_bounding_boxes=True, include_attributes=True + ) + context = videointelligence.VideoContext(face_detection_config=config) + + # Start the asynchronous request + operation = client.annotate_video( + request={ + "features": [videointelligence.Feature.FACE_DETECTION], + "input_uri": gcs_uri, + "video_context": context, + } + ) + + print("\nProcessing video for face detection annotations.") + result = operation.result(timeout=300) + + print("\nFinished processing.\n") + + # Retrieve the first result, because a single video was processed. + annotation_result = result.annotation_results[0] + + for annotation in annotation_result.face_detection_annotations: + print("Face detected:") + for track in annotation.tracks: + print( + "Segment: {}s to {}s".format( + track.segment.start_time_offset.seconds + + track.segment.start_time_offset.microseconds / 1e6, + track.segment.end_time_offset.seconds + + track.segment.end_time_offset.microseconds / 1e6, + ) + ) + + # Each segment includes timestamped faces that include + # characteristics of the face detected. + # Grab the first timestamped face + timestamped_object = track.timestamped_objects[0] + box = timestamped_object.normalized_bounding_box + print("Bounding box:") + print("\tleft : {}".format(box.left)) + print("\ttop : {}".format(box.top)) + print("\tright : {}".format(box.right)) + print("\tbottom: {}".format(box.bottom)) + + # Attributes include glasses, headwear, smiling, direction of gaze + print("Attributes:") + for attribute in timestamped_object.attributes: + print( + "\t{}:{} {}".format( + attribute.name, attribute.value, attribute.confidence + ) + ) + + +# [END video_detect_faces_gcs] diff --git a/videointelligence/samples/analyze/video_detect_faces_gcs_test.py b/videointelligence/samples/analyze/video_detect_faces_gcs_test.py new file mode 100644 index 000000000000..341073c9c42c --- /dev/null +++ b/videointelligence/samples/analyze/video_detect_faces_gcs_test.py @@ -0,0 +1,32 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import pytest + +import video_detect_faces_gcs + +RESOURCES = os.path.join(os.path.dirname(__file__), "resources") + + +@pytest.mark.flaky(max_runs=3, min_passes=1) +def test_detect_faces(capsys): + input_uri = "gs://cloud-samples-data/video/googlework_short.mp4" + + video_detect_faces_gcs.detect_faces(gcs_uri=input_uri) + + out, _ = capsys.readouterr() + + assert "Face detected:" in out diff --git a/videointelligence/samples/analyze/video_detect_faces_test.py b/videointelligence/samples/analyze/video_detect_faces_test.py new file mode 100644 index 000000000000..a0879b08ee61 --- /dev/null +++ b/videointelligence/samples/analyze/video_detect_faces_test.py @@ -0,0 +1,32 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import pytest + +import video_detect_faces + +RESOURCES = os.path.join(os.path.dirname(__file__), "resources") + + +@pytest.mark.flaky(max_runs=3, min_passes=1) +def test_detect_faces(capsys): + local_file_path = os.path.join(RESOURCES, "googlework_short.mp4") + + video_detect_faces.detect_faces(local_file_path=local_file_path) + + out, _ = capsys.readouterr() + + assert "Face detected:" in out diff --git a/videointelligence/samples/analyze/video_detect_logo.py b/videointelligence/samples/analyze/video_detect_logo.py new file mode 100644 index 000000000000..65eb22f7d98b --- /dev/null +++ b/videointelligence/samples/analyze/video_detect_logo.py @@ -0,0 +1,108 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +# isort: split +# [START video_detect_logo] +import io + +from google.cloud import videointelligence + + +def detect_logo(local_file_path="path/to/your/video.mp4"): + """Performs asynchronous video annotation for logo recognition on a local file.""" + + client = videointelligence.VideoIntelligenceServiceClient() + + with io.open(local_file_path, "rb") as f: + input_content = f.read() + features = [videointelligence.Feature.LOGO_RECOGNITION] + + operation = client.annotate_video( + request={"features": features, "input_content": input_content} + ) + + print("Waiting for operation to complete...") + response = operation.result() + + # Get the first response, since we sent only one video. + annotation_result = response.annotation_results[0] + + # Annotations for list of logos detected, tracked and recognized in video. + for logo_recognition_annotation in annotation_result.logo_recognition_annotations: + entity = logo_recognition_annotation.entity + + # Opaque entity ID. Some IDs may be available in [Google Knowledge Graph + # Search API](https://developers.google.com/knowledge-graph/). + print("Entity Id : {}".format(entity.entity_id)) + + print("Description : {}".format(entity.description)) + + # All logo tracks where the recognized logo appears. Each track corresponds + # to one logo instance appearing in consecutive frames. + for track in logo_recognition_annotation.tracks: + # Video segment of a track. + print( + "\n\tStart Time Offset : {}.{}".format( + track.segment.start_time_offset.seconds, + track.segment.start_time_offset.microseconds * 1000, + ) + ) + print( + "\tEnd Time Offset : {}.{}".format( + track.segment.end_time_offset.seconds, + track.segment.end_time_offset.microseconds * 1000, + ) + ) + print("\tConfidence : {}".format(track.confidence)) + + # The object with timestamp and attributes per frame in the track. + for timestamped_object in track.timestamped_objects: + # Normalized Bounding box in a frame, where the object is located. + normalized_bounding_box = timestamped_object.normalized_bounding_box + print("\n\t\tLeft : {}".format(normalized_bounding_box.left)) + print("\t\tTop : {}".format(normalized_bounding_box.top)) + print("\t\tRight : {}".format(normalized_bounding_box.right)) + print("\t\tBottom : {}".format(normalized_bounding_box.bottom)) + + # Optional. The attributes of the object in the bounding box. + for attribute in timestamped_object.attributes: + print("\n\t\t\tName : {}".format(attribute.name)) + print("\t\t\tConfidence : {}".format(attribute.confidence)) + print("\t\t\tValue : {}".format(attribute.value)) + + # Optional. Attributes in the track level. + for track_attribute in track.attributes: + print("\n\t\tName : {}".format(track_attribute.name)) + print("\t\tConfidence : {}".format(track_attribute.confidence)) + print("\t\tValue : {}".format(track_attribute.value)) + + # All video segments where the recognized logo appears. There might be + # multiple instances of the same logo class appearing in one VideoSegment. + for segment in logo_recognition_annotation.segments: + print( + "\n\tStart Time Offset : {}.{}".format( + segment.start_time_offset.seconds, + segment.start_time_offset.microseconds * 1000, + ) + ) + print( + "\tEnd Time Offset : {}.{}".format( + segment.end_time_offset.seconds, + segment.end_time_offset.microseconds * 1000, + ) + ) + + +# [END video_detect_logo] diff --git a/videointelligence/samples/analyze/video_detect_logo_gcs.py b/videointelligence/samples/analyze/video_detect_logo_gcs.py new file mode 100644 index 000000000000..620b331b2d53 --- /dev/null +++ b/videointelligence/samples/analyze/video_detect_logo_gcs.py @@ -0,0 +1,101 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# [START video_detect_logo_gcs] + +from google.cloud import videointelligence + + +def detect_logo_gcs(input_uri="gs://YOUR_BUCKET_ID/path/to/your/file.mp4"): + client = videointelligence.VideoIntelligenceServiceClient() + + features = [videointelligence.Feature.LOGO_RECOGNITION] + + operation = client.annotate_video( + request={"features": features, "input_uri": input_uri} + ) + + print("Waiting for operation to complete...") + response = operation.result() + + # Get the first response, since we sent only one video. + annotation_result = response.annotation_results[0] + + # Annotations for list of logos detected, tracked and recognized in video. + for logo_recognition_annotation in annotation_result.logo_recognition_annotations: + entity = logo_recognition_annotation.entity + + # Opaque entity ID. Some IDs may be available in [Google Knowledge Graph + # Search API](https://developers.google.com/knowledge-graph/). + print("Entity Id : {}".format(entity.entity_id)) + + print("Description : {}".format(entity.description)) + + # All logo tracks where the recognized logo appears. Each track corresponds + # to one logo instance appearing in consecutive frames. + for track in logo_recognition_annotation.tracks: + # Video segment of a track. + print( + "\n\tStart Time Offset : {}.{}".format( + track.segment.start_time_offset.seconds, + track.segment.start_time_offset.microseconds * 1000, + ) + ) + print( + "\tEnd Time Offset : {}.{}".format( + track.segment.end_time_offset.seconds, + track.segment.end_time_offset.microseconds * 1000, + ) + ) + print("\tConfidence : {}".format(track.confidence)) + + # The object with timestamp and attributes per frame in the track. + for timestamped_object in track.timestamped_objects: + # Normalized Bounding box in a frame, where the object is located. + normalized_bounding_box = timestamped_object.normalized_bounding_box + print("\n\t\tLeft : {}".format(normalized_bounding_box.left)) + print("\t\tTop : {}".format(normalized_bounding_box.top)) + print("\t\tRight : {}".format(normalized_bounding_box.right)) + print("\t\tBottom : {}".format(normalized_bounding_box.bottom)) + + # Optional. The attributes of the object in the bounding box. + for attribute in timestamped_object.attributes: + print("\n\t\t\tName : {}".format(attribute.name)) + print("\t\t\tConfidence : {}".format(attribute.confidence)) + print("\t\t\tValue : {}".format(attribute.value)) + + # Optional. Attributes in the track level. + for track_attribute in track.attributes: + print("\n\t\tName : {}".format(track_attribute.name)) + print("\t\tConfidence : {}".format(track_attribute.confidence)) + print("\t\tValue : {}".format(track_attribute.value)) + + # All video segments where the recognized logo appears. There might be + # multiple instances of the same logo class appearing in one VideoSegment. + for segment in logo_recognition_annotation.segments: + print( + "\n\tStart Time Offset : {}.{}".format( + segment.start_time_offset.seconds, + segment.start_time_offset.microseconds * 1000, + ) + ) + print( + "\tEnd Time Offset : {}.{}".format( + segment.end_time_offset.seconds, + segment.end_time_offset.microseconds * 1000, + ) + ) + + +# [END video_detect_logo_gcs] diff --git a/videointelligence/samples/analyze/video_detect_logo_gcs_test.py b/videointelligence/samples/analyze/video_detect_logo_gcs_test.py new file mode 100644 index 000000000000..f071726eefa4 --- /dev/null +++ b/videointelligence/samples/analyze/video_detect_logo_gcs_test.py @@ -0,0 +1,36 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import pytest + +import video_detect_logo_gcs + +RESOURCES = os.path.join(os.path.dirname(__file__), "resources") + + +# Flaky Deadline +@pytest.mark.flaky(max_runs=3, min_passes=1) +def test_detect_logo_gcs(capsys): + input_uri = "gs://cloud-samples-data/video/googlework_tiny.mp4" + + video_detect_logo_gcs.detect_logo_gcs(input_uri=input_uri) + + out, _ = capsys.readouterr() + + assert "Description" in out + assert "Confidence" in out + assert "Start Time Offset" in out + assert "End Time Offset" in out diff --git a/videointelligence/samples/analyze/video_detect_logo_test.py b/videointelligence/samples/analyze/video_detect_logo_test.py new file mode 100644 index 000000000000..2b1ca7c031a1 --- /dev/null +++ b/videointelligence/samples/analyze/video_detect_logo_test.py @@ -0,0 +1,36 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import pytest + +import video_detect_logo + +RESOURCES = os.path.join(os.path.dirname(__file__), "resources") + + +# Flaky Bad Gateway +@pytest.mark.flaky(max_runs=3, min_passes=1) +def test_detect_logo(capsys): + local_file_path = os.path.join(RESOURCES, "googlework_tiny.mp4") + + video_detect_logo.detect_logo(local_file_path=local_file_path) + + out, _ = capsys.readouterr() + + assert "Description" in out + assert "Confidence" in out + assert "Start Time Offset" in out + assert "End Time Offset" in out diff --git a/videointelligence/samples/analyze/video_detect_person.py b/videointelligence/samples/analyze/video_detect_person.py new file mode 100644 index 000000000000..aea812c5a5cc --- /dev/null +++ b/videointelligence/samples/analyze/video_detect_person.py @@ -0,0 +1,101 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# [START video_detect_person] +import io + +from google.cloud import videointelligence_v1 as videointelligence + + +def detect_person(local_file_path="path/to/your/video-file.mp4"): + """Detects people in a video from a local file.""" + + client = videointelligence.VideoIntelligenceServiceClient() + + with io.open(local_file_path, "rb") as f: + input_content = f.read() + + # Configure the request + config = videointelligence.types.PersonDetectionConfig( + include_bounding_boxes=True, + include_attributes=True, + include_pose_landmarks=True, + ) + context = videointelligence.types.VideoContext(person_detection_config=config) + + # Start the asynchronous request + operation = client.annotate_video( + request={ + "features": [videointelligence.Feature.PERSON_DETECTION], + "input_content": input_content, + "video_context": context, + } + ) + + print("\nProcessing video for person detection annotations.") + result = operation.result(timeout=300) + + print("\nFinished processing.\n") + + # Retrieve the first result, because a single video was processed. + annotation_result = result.annotation_results[0] + + for annotation in annotation_result.person_detection_annotations: + print("Person detected:") + for track in annotation.tracks: + print( + "Segment: {}s to {}s".format( + track.segment.start_time_offset.seconds + + track.segment.start_time_offset.microseconds / 1e6, + track.segment.end_time_offset.seconds + + track.segment.end_time_offset.microseconds / 1e6, + ) + ) + + # Each segment includes timestamped objects that include + # characteristic - -e.g.clothes, posture of the person detected. + # Grab the first timestamped object + timestamped_object = track.timestamped_objects[0] + box = timestamped_object.normalized_bounding_box + print("Bounding box:") + print("\tleft : {}".format(box.left)) + print("\ttop : {}".format(box.top)) + print("\tright : {}".format(box.right)) + print("\tbottom: {}".format(box.bottom)) + + # Attributes include unique pieces of clothing, + # poses, or hair color. + print("Attributes:") + for attribute in timestamped_object.attributes: + print( + "\t{}:{} {}".format( + attribute.name, attribute.value, attribute.confidence + ) + ) + + # Landmarks in person detection include body parts such as + # left_shoulder, right_ear, and right_ankle + print("Landmarks:") + for landmark in timestamped_object.landmarks: + print( + "\t{}: {} (x={}, y={})".format( + landmark.name, + landmark.confidence, + landmark.point.x, # Normalized vertex + landmark.point.y, # Normalized vertex + ) + ) + + +# [END video_detect_person] diff --git a/videointelligence/samples/analyze/video_detect_person_gcs.py b/videointelligence/samples/analyze/video_detect_person_gcs.py new file mode 100644 index 000000000000..ebc63c19fa0e --- /dev/null +++ b/videointelligence/samples/analyze/video_detect_person_gcs.py @@ -0,0 +1,96 @@ +# +# Copyright 2020 Google LLC +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# [START video_detect_person_gcs] +from google.cloud import videointelligence_v1 as videointelligence + + +def detect_person(gcs_uri="gs://YOUR_BUCKET_ID/path/to/your/video.mp4"): + """Detects people in a video.""" + + client = videointelligence.VideoIntelligenceServiceClient() + + # Configure the request + config = videointelligence.types.PersonDetectionConfig( + include_bounding_boxes=True, + include_attributes=True, + include_pose_landmarks=True, + ) + context = videointelligence.types.VideoContext(person_detection_config=config) + + # Start the asynchronous request + operation = client.annotate_video( + request={ + "features": [videointelligence.Feature.PERSON_DETECTION], + "input_uri": gcs_uri, + "video_context": context, + } + ) + + print("\nProcessing video for person detection annotations.") + result = operation.result(timeout=300) + + print("\nFinished processing.\n") + + # Retrieve the first result, because a single video was processed. + annotation_result = result.annotation_results[0] + + for annotation in annotation_result.person_detection_annotations: + print("Person detected:") + for track in annotation.tracks: + print( + "Segment: {}s to {}s".format( + track.segment.start_time_offset.seconds + + track.segment.start_time_offset.microseconds / 1e6, + track.segment.end_time_offset.seconds + + track.segment.end_time_offset.microseconds / 1e6, + ) + ) + + # Each segment includes timestamped objects that include + # characteristics - -e.g.clothes, posture of the person detected. + # Grab the first timestamped object + timestamped_object = track.timestamped_objects[0] + box = timestamped_object.normalized_bounding_box + print("Bounding box:") + print("\tleft : {}".format(box.left)) + print("\ttop : {}".format(box.top)) + print("\tright : {}".format(box.right)) + print("\tbottom: {}".format(box.bottom)) + + # Attributes include unique pieces of clothing, + # poses, or hair color. + print("Attributes:") + for attribute in timestamped_object.attributes: + print( + "\t{}:{} {}".format( + attribute.name, attribute.value, attribute.confidence + ) + ) + + # Landmarks in person detection include body parts such as + # left_shoulder, right_ear, and right_ankle + print("Landmarks:") + for landmark in timestamped_object.landmarks: + print( + "\t{}: {} (x={}, y={})".format( + landmark.name, + landmark.confidence, + landmark.point.x, # Normalized vertex + landmark.point.y, # Normalized vertex + ) + ) + + +# [END video_detect_person_gcs] diff --git a/videointelligence/samples/analyze/video_detect_person_gcs_test.py b/videointelligence/samples/analyze/video_detect_person_gcs_test.py new file mode 100644 index 000000000000..739c3980c556 --- /dev/null +++ b/videointelligence/samples/analyze/video_detect_person_gcs_test.py @@ -0,0 +1,35 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import pytest + +import video_detect_person_gcs + +RESOURCES = os.path.join(os.path.dirname(__file__), "resources") + + +@pytest.mark.flaky(max_runs=3, min_passes=1) +def test_detect_person(capsys): + input_uri = "gs://cloud-samples-data/video/googlework_tiny.mp4" + + video_detect_person_gcs.detect_person(gcs_uri=input_uri) + + out, _ = capsys.readouterr() + + assert "Person detected:" in out + assert "Attributes:" in out + assert "x=" in out + assert "y=" in out diff --git a/videointelligence/samples/analyze/video_detect_person_test.py b/videointelligence/samples/analyze/video_detect_person_test.py new file mode 100644 index 000000000000..fcf7eb79a0b8 --- /dev/null +++ b/videointelligence/samples/analyze/video_detect_person_test.py @@ -0,0 +1,35 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import pytest + +import video_detect_person + +RESOURCES = os.path.join(os.path.dirname(__file__), "resources") + + +@pytest.mark.flaky(max_runs=3, min_passes=1) +def test_detect_person(capsys): + local_file_path = os.path.join(RESOURCES, "googlework_tiny.mp4") + + video_detect_person.detect_person(local_file_path=local_file_path) + + out, _ = capsys.readouterr() + + assert "Person detected:" in out + assert "Attributes:" in out + assert "x=" in out + assert "y=" in out diff --git a/videointelligence/samples/labels/README.rst.in b/videointelligence/samples/labels/README.rst.in new file mode 100644 index 000000000000..2d6b97cf6e6c --- /dev/null +++ b/videointelligence/samples/labels/README.rst.in @@ -0,0 +1,22 @@ +# This file is used to generate README.rst + +product: + name: Google Cloud Video Intelligence API + short_name: Cloud Video Intelligence API + url: https://cloud.google.com/video-intelligence/docs + description: > + `Google Cloud Video Intelligence API`_ allows developers to easily + integrate feature detection in video. + +setup: +- auth +- install_deps + +samples: +- name: labels + file: labels.py + show_help: True + +cloud_client_library: true + +folder: video/cloud-client/labels \ No newline at end of file diff --git a/videointelligence/samples/labels/labels.py b/videointelligence/samples/labels/labels.py new file mode 100644 index 000000000000..0efc37f9a1a6 --- /dev/null +++ b/videointelligence/samples/labels/labels.py @@ -0,0 +1,91 @@ +#!/usr/bin/env python + +# Copyright 2017 Google LLC. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""This application demonstrates how to detect labels from a video +based on the image content with the Google Cloud Video Intelligence +API. + +For more information, check out the documentation at +https://cloud.google.com/videointelligence/docs. + +Usage Example: + + python labels.py gs://cloud-ml-sandbox/video/chicago.mp4 + +""" + +# [START video_label_tutorial] +# [START video_label_tutorial_imports] +import argparse + +from google.cloud import videointelligence + +# [END video_label_tutorial_imports] + + +def analyze_labels(path): + """Detects labels given a GCS path.""" + # [START video_label_tutorial_construct_request] + video_client = videointelligence.VideoIntelligenceServiceClient() + features = [videointelligence.Feature.LABEL_DETECTION] + operation = video_client.annotate_video( + request={"features": features, "input_uri": path} + ) + # [END video_label_tutorial_construct_request] + print("\nProcessing video for label annotations:") + + # [START video_label_tutorial_check_operation] + result = operation.result(timeout=90) + print("\nFinished processing.") + # [END video_label_tutorial_check_operation] + + # [START video_label_tutorial_parse_response] + segment_labels = result.annotation_results[0].segment_label_annotations + for i, segment_label in enumerate(segment_labels): + print("Video label description: {}".format(segment_label.entity.description)) + for category_entity in segment_label.category_entities: + print( + "\tLabel category description: {}".format(category_entity.description) + ) + + for i, segment in enumerate(segment_label.segments): + start_time = ( + segment.segment.start_time_offset.seconds + + segment.segment.start_time_offset.microseconds / 1e6 + ) + end_time = ( + segment.segment.end_time_offset.seconds + + segment.segment.end_time_offset.microseconds / 1e6 + ) + positions = "{}s to {}s".format(start_time, end_time) + confidence = segment.confidence + print("\tSegment {}: {}".format(i, positions)) + print("\tConfidence: {}".format(confidence)) + print("\n") + # [END video_label_tutorial_parse_response] + + +if __name__ == "__main__": + # [START video_label_tutorial_run_application] + parser = argparse.ArgumentParser( + description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter + ) + parser.add_argument("path", help="GCS file path for label detection.") + args = parser.parse_args() + + analyze_labels(args.path) + # [END video_label_tutorial_run_application] +# [END video_label_tutorial] diff --git a/videointelligence/samples/labels/labels_test.py b/videointelligence/samples/labels/labels_test.py new file mode 100644 index 000000000000..2b3a250f80ff --- /dev/null +++ b/videointelligence/samples/labels/labels_test.py @@ -0,0 +1,23 @@ +#!/usr/bin/env python + +# Copyright 2017 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import labels + + +def test_feline_video_labels(capsys): + labels.analyze_labels("gs://cloud-samples-data/video/cat.mp4") + out, _ = capsys.readouterr() + assert "Video label description: cat" in out diff --git a/videointelligence/samples/labels/requirements-test.txt b/videointelligence/samples/labels/requirements-test.txt new file mode 100644 index 000000000000..70613be0cfe4 --- /dev/null +++ b/videointelligence/samples/labels/requirements-test.txt @@ -0,0 +1 @@ +pytest==7.4.0 diff --git a/videointelligence/samples/labels/requirements.txt b/videointelligence/samples/labels/requirements.txt new file mode 100644 index 000000000000..0647cdcf07e4 --- /dev/null +++ b/videointelligence/samples/labels/requirements.txt @@ -0,0 +1 @@ +google-cloud-videointelligence==2.11.3 diff --git a/videointelligence/samples/quickstart/README.rst b/videointelligence/samples/quickstart/README.rst new file mode 100644 index 000000000000..e6b3904fdc85 --- /dev/null +++ b/videointelligence/samples/quickstart/README.rst @@ -0,0 +1,114 @@ + +.. This file is automatically generated. Do not edit this file directly. + +Google Cloud Video Intelligence API Python Samples +=============================================================================== + +.. image:: https://gstatic.com/cloudssh/images/open-btn.png + :target: https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/GoogleCloudPlatform/python-docs-samples&page=editor&open_in_editor=video/cloud-client/quickstart/README.rst + + +This directory contains samples for Google Cloud Video Intelligence API. `Google Cloud Video Intelligence API`_ allows developers to easily integrate feature detection in video. + + + + +.. _Google Cloud Video Intelligence API: https://cloud.google.com/video-intelligence/docs + + +Setup +------------------------------------------------------------------------------- + + + +Authentication +++++++++++++++ + +This sample requires you to have authentication setup. Refer to the +`Authentication Getting Started Guide`_ for instructions on setting up +credentials for applications. + +.. _Authentication Getting Started Guide: + https://cloud.google.com/docs/authentication/getting-started + + + + +Install Dependencies +++++++++++++++++++++ + +#. Clone python-docs-samples and change directory to the sample directory you want to use. + + .. code-block:: bash + + $ git clone https://github.com/GoogleCloudPlatform/python-docs-samples.git + +#. Install `pip`_ and `virtualenv`_ if you do not already have them. You may want to refer to the `Python Development Environment Setup Guide`_ for Google Cloud Platform for instructions. + + .. _Python Development Environment Setup Guide: + https://cloud.google.com/python/setup + +#. Create a virtualenv. Samples are compatible with Python 3.6+. + + .. code-block:: bash + + $ virtualenv env + $ source env/bin/activate + +#. Install the dependencies needed to run the samples. + + .. code-block:: bash + + $ pip install -r requirements.txt + +.. _pip: https://pip.pypa.io/ +.. _virtualenv: https://virtualenv.pypa.io/ + + + + + + +Samples +------------------------------------------------------------------------------- + + +quickstart ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + +.. image:: https://gstatic.com/cloudssh/images/open-btn.png + :target: https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/GoogleCloudPlatform/python-docs-samples&page=editor&open_in_editor=video/cloud-client/quickstart/quickstart.py,video/cloud-client/quickstart/README.rst + + + + +To run this sample: + +.. code-block:: bash + + $ python quickstart.py + + + + + + + + +The client library +------------------------------------------------------------------------------- + +This sample uses the `Google Cloud Client Library for Python`_. +You can read the documentation for more details on API usage and use GitHub +to `browse the source`_ and `report issues`_. + +.. _Google Cloud Client Library for Python: + https://googlecloudplatform.github.io/google-cloud-python/ +.. _browse the source: + https://github.com/GoogleCloudPlatform/google-cloud-python +.. _report issues: + https://github.com/GoogleCloudPlatform/google-cloud-python/issues + + + +.. _Google Cloud SDK: https://cloud.google.com/sdk/ diff --git a/videointelligence/samples/quickstart/README.rst.in b/videointelligence/samples/quickstart/README.rst.in new file mode 100644 index 000000000000..9763ec6334a4 --- /dev/null +++ b/videointelligence/samples/quickstart/README.rst.in @@ -0,0 +1,21 @@ +# This file is used to generate README.rst + +product: + name: Google Cloud Video Intelligence API + short_name: Cloud Video Intelligence API + url: https://cloud.google.com/video-intelligence/docs + description: > + `Google Cloud Video Intelligence API`_ allows developers to easily + integrate feature detection in video. + +setup: +- auth +- install_deps + +samples: +- name: quickstart + file: quickstart.py + +cloud_client_library: true + +folder: video/cloud-client/quickstart \ No newline at end of file diff --git a/videointelligence/samples/quickstart/quickstart.py b/videointelligence/samples/quickstart/quickstart.py new file mode 100644 index 000000000000..87837b1c4932 --- /dev/null +++ b/videointelligence/samples/quickstart/quickstart.py @@ -0,0 +1,70 @@ +#!/usr/bin/env python + +# Copyright 2017 Google LLC. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""This application demonstrates label detection on a demo video using +the Google Cloud API. + +Usage: + python quickstart.py + +""" + + +def run_quickstart(): + # [START video_quickstart] + from google.cloud import videointelligence + + video_client = videointelligence.VideoIntelligenceServiceClient() + features = [videointelligence.Feature.LABEL_DETECTION] + operation = video_client.annotate_video( + request={ + "features": features, + "input_uri": "gs://cloud-samples-data/video/cat.mp4", + } + ) + print("\nProcessing video for label annotations:") + + result = operation.result(timeout=180) + print("\nFinished processing.") + + # first result is retrieved because a single video was processed + segment_labels = result.annotation_results[0].segment_label_annotations + for i, segment_label in enumerate(segment_labels): + print("Video label description: {}".format(segment_label.entity.description)) + for category_entity in segment_label.category_entities: + print( + "\tLabel category description: {}".format(category_entity.description) + ) + + for i, segment in enumerate(segment_label.segments): + start_time = ( + segment.segment.start_time_offset.seconds + + segment.segment.start_time_offset.microseconds / 1e6 + ) + end_time = ( + segment.segment.end_time_offset.seconds + + segment.segment.end_time_offset.microseconds / 1e6 + ) + positions = "{}s to {}s".format(start_time, end_time) + confidence = segment.confidence + print("\tSegment {}: {}".format(i, positions)) + print("\tConfidence: {}".format(confidence)) + print("\n") + # [END video_quickstart] + + +if __name__ == "__main__": + run_quickstart() diff --git a/videointelligence/samples/quickstart/quickstart_test.py b/videointelligence/samples/quickstart/quickstart_test.py new file mode 100644 index 000000000000..53554f885a12 --- /dev/null +++ b/videointelligence/samples/quickstart/quickstart_test.py @@ -0,0 +1,23 @@ +#!/usr/bin/env python + +# Copyright 2017 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import quickstart + + +def test_quickstart(capsys): + quickstart.run_quickstart() + out, _ = capsys.readouterr() + assert "Video label description: cat" in out diff --git a/videointelligence/samples/quickstart/requirements-test.txt b/videointelligence/samples/quickstart/requirements-test.txt new file mode 100644 index 000000000000..70613be0cfe4 --- /dev/null +++ b/videointelligence/samples/quickstart/requirements-test.txt @@ -0,0 +1 @@ +pytest==7.4.0 diff --git a/videointelligence/samples/quickstart/requirements.txt b/videointelligence/samples/quickstart/requirements.txt new file mode 100644 index 000000000000..0647cdcf07e4 --- /dev/null +++ b/videointelligence/samples/quickstart/requirements.txt @@ -0,0 +1 @@ +google-cloud-videointelligence==2.11.3 diff --git a/videointelligence/samples/shotchange/README.rst b/videointelligence/samples/shotchange/README.rst new file mode 100644 index 000000000000..dea14f6e13f4 --- /dev/null +++ b/videointelligence/samples/shotchange/README.rst @@ -0,0 +1,134 @@ + +.. This file is automatically generated. Do not edit this file directly. + +Google Cloud Video Intelligence API Python Samples +=============================================================================== + +.. image:: https://gstatic.com/cloudssh/images/open-btn.png + :target: https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/GoogleCloudPlatform/python-docs-samples&page=editor&open_in_editor=video/cloud-client/shotchange/README.rst + + +This directory contains samples for Google Cloud Video Intelligence API. `Google Cloud Video Intelligence API`_ allows developers to easily integrate feature detection in video. + + + + +.. _Google Cloud Video Intelligence API: https://cloud.google.com/video-intelligence/docs + + +Setup +------------------------------------------------------------------------------- + + + +Authentication +++++++++++++++ + +This sample requires you to have authentication setup. Refer to the +`Authentication Getting Started Guide`_ for instructions on setting up +credentials for applications. + +.. _Authentication Getting Started Guide: + https://cloud.google.com/docs/authentication/getting-started + + + + +Install Dependencies +++++++++++++++++++++ + +#. Clone python-docs-samples and change directory to the sample directory you want to use. + + .. code-block:: bash + + $ git clone https://github.com/GoogleCloudPlatform/python-docs-samples.git + +#. Install `pip`_ and `virtualenv`_ if you do not already have them. You may want to refer to the `Python Development Environment Setup Guide`_ for Google Cloud Platform for instructions. + + .. _Python Development Environment Setup Guide: + https://cloud.google.com/python/setup + +#. Create a virtualenv. Samples are compatible with Python 3.6+. + + .. code-block:: bash + + $ virtualenv env + $ source env/bin/activate + +#. Install the dependencies needed to run the samples. + + .. code-block:: bash + + $ pip install -r requirements.txt + +.. _pip: https://pip.pypa.io/ +.. _virtualenv: https://virtualenv.pypa.io/ + + + + + + +Samples +------------------------------------------------------------------------------- + + +Shot Change Detection ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + +.. image:: https://gstatic.com/cloudssh/images/open-btn.png + :target: https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/GoogleCloudPlatform/python-docs-samples&page=editor&open_in_editor=video/cloud-client/shotchange/shotchange.py,video/cloud-client/shotchange/README.rst + + + + +To run this sample: + +.. code-block:: bash + + $ python shotchange.py + + + usage: shotchange.py [-h] path + + This application demonstrates how to identify all different shots + in a video using the Google Cloud Video Intelligence API. + + For more information, check out the documentation at + https://cloud.google.com/videointelligence/docs. + + Example Usage: + + python shotchange.py gs://cloud-samples-data/video/gbikes_dinosaur.mp4 + + positional arguments: + path GCS path for shot change detection. + + optional arguments: + -h, --help show this help message and exit + + + + + + + + + +The client library +------------------------------------------------------------------------------- + +This sample uses the `Google Cloud Client Library for Python`_. +You can read the documentation for more details on API usage and use GitHub +to `browse the source`_ and `report issues`_. + +.. _Google Cloud Client Library for Python: + https://googlecloudplatform.github.io/google-cloud-python/ +.. _browse the source: + https://github.com/GoogleCloudPlatform/google-cloud-python +.. _report issues: + https://github.com/GoogleCloudPlatform/google-cloud-python/issues + + + +.. _Google Cloud SDK: https://cloud.google.com/sdk/ diff --git a/videointelligence/samples/shotchange/README.rst.in b/videointelligence/samples/shotchange/README.rst.in new file mode 100644 index 000000000000..6463d192f72b --- /dev/null +++ b/videointelligence/samples/shotchange/README.rst.in @@ -0,0 +1,22 @@ +# This file is used to generate README.rst + +product: + name: Google Cloud Video Intelligence API + short_name: Cloud Video Intelligence API + url: https://cloud.google.com/video-intelligence/docs + description: > + `Google Cloud Video Intelligence API`_ allows developers to easily + integrate feature detection in video. + +setup: +- auth +- install_deps + +samples: +- name: Shot Change Detection + file: shotchange.py + show_help: True + +cloud_client_library: true + +folder: video/cloud-client/shotchange \ No newline at end of file diff --git a/videointelligence/samples/shotchange/requirements-test.txt b/videointelligence/samples/shotchange/requirements-test.txt new file mode 100644 index 000000000000..70613be0cfe4 --- /dev/null +++ b/videointelligence/samples/shotchange/requirements-test.txt @@ -0,0 +1 @@ +pytest==7.4.0 diff --git a/videointelligence/samples/shotchange/requirements.txt b/videointelligence/samples/shotchange/requirements.txt new file mode 100644 index 000000000000..0647cdcf07e4 --- /dev/null +++ b/videointelligence/samples/shotchange/requirements.txt @@ -0,0 +1 @@ +google-cloud-videointelligence==2.11.3 diff --git a/videointelligence/samples/shotchange/shotchange.py b/videointelligence/samples/shotchange/shotchange.py new file mode 100644 index 000000000000..6f19457db297 --- /dev/null +++ b/videointelligence/samples/shotchange/shotchange.py @@ -0,0 +1,77 @@ +#!/usr/bin/env python + +# Copyright 2017 Google LLC. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""This application demonstrates how to identify all different shots +in a video using the Google Cloud Video Intelligence API. + +For more information, check out the documentation at +https://cloud.google.com/videointelligence/docs. + +Example Usage: + + python shotchange.py gs://cloud-samples-data/video/gbikes_dinosaur.mp4 + +""" + +# [START video_shot_tutorial] +# [START video_shot_tutorial_imports] +import argparse + +from google.cloud import videointelligence + +# [END video_shot_tutorial_imports] + + +def analyze_shots(path): + """Detects camera shot changes.""" + # [START video_shot_tutorial_construct_request] + video_client = videointelligence.VideoIntelligenceServiceClient() + features = [videointelligence.Feature.SHOT_CHANGE_DETECTION] + operation = video_client.annotate_video( + request={"features": features, "input_uri": path} + ) + # [END video_shot_tutorial_construct_request] + print("\nProcessing video for shot change annotations:") + + # [START video_shot_tutorial_check_operation] + result = operation.result(timeout=120) + print("\nFinished processing.") + + # [END video_shot_tutorial_check_operation] + + # [START video_shot_tutorial_parse_response] + for i, shot in enumerate(result.annotation_results[0].shot_annotations): + start_time = ( + shot.start_time_offset.seconds + shot.start_time_offset.microseconds / 1e6 + ) + end_time = ( + shot.end_time_offset.seconds + shot.end_time_offset.microseconds / 1e6 + ) + print("\tShot {}: {} to {}".format(i, start_time, end_time)) + # [END video_shot_tutorial_parse_response] + + +if __name__ == "__main__": + # [START video_shot_tutorial_run_application] + parser = argparse.ArgumentParser( + description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter + ) + parser.add_argument("path", help="GCS path for shot change detection.") + args = parser.parse_args() + + analyze_shots(args.path) + # [END video_shot_tutorial_run_application] +# [END video_shot_tutorial] diff --git a/videointelligence/samples/shotchange/shotchange_test.py b/videointelligence/samples/shotchange/shotchange_test.py new file mode 100644 index 000000000000..3bb31f175963 --- /dev/null +++ b/videointelligence/samples/shotchange/shotchange_test.py @@ -0,0 +1,23 @@ +#!/usr/bin/env python + +# Copyright 2017 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import shotchange + + +def test_shots_dino(capsys): + shotchange.analyze_shots("gs://cloud-samples-data/video/gbikes_dinosaur.mp4") + out, _ = capsys.readouterr() + assert "Shot 1:" in out