Skip to content

Commit

Permalink
Fix dlp tests (#3058)
Browse files Browse the repository at this point in the history
Since the tests are flaky and timing out, I'm proposing we do the ML API approach of creating an operation then canceling it. 

It would 
fix #2809
fix #2810  
fix #2811 
fix #2812
  • Loading branch information
leahecole authored Mar 9, 2020
1 parent dd8984f commit c43618b
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 34 deletions.
2 changes: 1 addition & 1 deletion dlp/inspect_content.py
Original file line number Diff line number Diff line change
Expand Up @@ -473,7 +473,7 @@ def inspect_gcs_file(
}

operation = dlp.create_dlp_job(parent, inspect_job=inspect_job)

print("Inspection operation started: {}".format(operation.name))
# Create a Pub/Sub client and find the subscription. The subscription is
# expected to already be listening to the topic.
subscriber = google.cloud.pubsub.SubscriberClient()
Expand Down
72 changes: 43 additions & 29 deletions dlp/inspect_content_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import google.api_core.exceptions
import google.cloud.bigquery
import google.cloud.datastore
import google.cloud.dlp_v2
import google.cloud.exceptions
import google.cloud.pubsub
import google.cloud.storage
Expand Down Expand Up @@ -94,9 +95,7 @@ def subscription_id(topic_id):
# Subscribes to a topic.
subscriber = google.cloud.pubsub.SubscriberClient()
topic_path = subscriber.topic_path(GCLOUD_PROJECT, topic_id)
subscription_path = subscriber.subscription_path(
GCLOUD_PROJECT, SUBSCRIPTION_ID
)
subscription_path = subscriber.subscription_path(GCLOUD_PROJECT, SUBSCRIPTION_ID)
try:
subscriber.create_subscription(subscription_path, topic_path)
except google.api_core.exceptions.AlreadyExists:
Expand Down Expand Up @@ -290,7 +289,6 @@ def test_inspect_image_file(capsys):
assert "Info type: PHONE_NUMBER" in out


@flaky
def test_inspect_gcs_file(bucket, topic_id, subscription_id, capsys):
inspect_content.inspect_gcs_file(
GCLOUD_PROJECT,
Expand All @@ -303,10 +301,14 @@ def test_inspect_gcs_file(bucket, topic_id, subscription_id, capsys):
)

out, _ = capsys.readouterr()
assert "Info type: EMAIL_ADDRESS" in out
assert "Inspection operation started" in out
# Cancel the operation
operation_id = out.split("Inspection operation started: ")[1].split("\n")[0]
print(operation_id)
client = google.cloud.dlp_v2.DlpServiceClient()
client.cancel_dlp_job(operation_id)


@flaky
def test_inspect_gcs_file_with_custom_info_types(
bucket, topic_id, subscription_id, capsys
):
Expand All @@ -326,14 +328,16 @@ def test_inspect_gcs_file_with_custom_info_types(
)

out, _ = capsys.readouterr()
assert "Info type: CUSTOM_DICTIONARY_0" in out
assert "Info type: CUSTOM_REGEX_0" in out

assert "Inspection operation started" in out
# Cancel the operation
operation_id = out.split("Inspection operation started: ")[1].split("\n")[0]
print(operation_id)
client = google.cloud.dlp_v2.DlpServiceClient()
client.cancel_dlp_job(operation_id)

@flaky
def test_inspect_gcs_file_no_results(
bucket, topic_id, subscription_id, capsys
):

def test_inspect_gcs_file_no_results(bucket, topic_id, subscription_id, capsys):
inspect_content.inspect_gcs_file(
GCLOUD_PROJECT,
bucket.name,
Expand All @@ -345,7 +349,13 @@ def test_inspect_gcs_file_no_results(
)

out, _ = capsys.readouterr()
assert "No findings" in out

assert "Inspection operation started" in out
# Cancel the operation
operation_id = out.split("Inspection operation started: ")[1].split("\n")[0]
print(operation_id)
client = google.cloud.dlp_v2.DlpServiceClient()
client.cancel_dlp_job(operation_id)


@pytest.mark.skip(reason="nondeterministically failing")
Expand All @@ -363,7 +373,6 @@ def test_inspect_gcs_image_file(bucket, topic_id, subscription_id, capsys):
assert "Info type: EMAIL_ADDRESS" in out


@flaky
def test_inspect_gcs_multiple_files(bucket, topic_id, subscription_id, capsys):
inspect_content.inspect_gcs_file(
GCLOUD_PROJECT,
Expand All @@ -375,14 +384,17 @@ def test_inspect_gcs_multiple_files(bucket, topic_id, subscription_id, capsys):
)

out, _ = capsys.readouterr()
assert "Info type: EMAIL_ADDRESS" in out
assert "Info type: PHONE_NUMBER" in out

assert "Inspection operation started" in out
# Cancel the operation
operation_id = out.split("Inspection operation started: ")[1].split("\n")[0]
print(operation_id)
client = google.cloud.dlp_v2.DlpServiceClient()
client.cancel_dlp_job(operation_id)


@flaky
def test_inspect_datastore(
datastore_project, topic_id, subscription_id, capsys
):
def test_inspect_datastore(datastore_project, topic_id, subscription_id, capsys):
@eventually_consistent.call
def _():
inspect_content.inspect_datastore(
Expand All @@ -402,17 +414,19 @@ def _():
def test_inspect_datastore_no_results(
datastore_project, topic_id, subscription_id, capsys
):
inspect_content.inspect_datastore(
GCLOUD_PROJECT,
datastore_project,
DATASTORE_KIND,
topic_id,
subscription_id,
["PHONE_NUMBER"],
)
@eventually_consistent.call
def _():
inspect_content.inspect_datastore(
GCLOUD_PROJECT,
datastore_project,
DATASTORE_KIND,
topic_id,
subscription_id,
["PHONE_NUMBER"],
)

out, _ = capsys.readouterr()
assert "No findings" in out
out, _ = capsys.readouterr()
assert "No findings" in out


@pytest.mark.skip(reason="unknown issue")
Expand Down
8 changes: 4 additions & 4 deletions dlp/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
google-cloud-dlp==0.12.1
google-cloud-dlp==0.13.0
google-cloud-storage==1.26.0
google-cloud-pubsub==1.0.0
google-cloud-datastore==1.9.0
google-cloud-bigquery==1.20.0
google-cloud-pubsub==1.3.1
google-cloud-datastore==1.11.0
google-cloud-bigquery==1.24.0

0 comments on commit c43618b

Please sign in to comment.