diff --git a/dlp/README.rst b/dlp/README.rst index 76bd9dd8dfc6..9ef0fc3fa147 100644 --- a/dlp/README.rst +++ b/dlp/README.rst @@ -339,13 +339,12 @@ To run this sample: .. code-block:: bash $ python deid.py - - usage: deid.py [-h] {deid_mask,deid_fpe,reid_fpe,deid_date_shift} ... + usage: deid.py [-h] {deid_mask,deid_fpe,reid_fpe,deid_date_shift,redact} ... Uses of the Data Loss Prevention API for deidentifying sensitive data. positional arguments: - {deid_mask,deid_fpe,reid_fpe,deid_date_shift} + {deid_mask,deid_fpe,reid_fpe,deid_date_shift,redact} Select how to submit content to the API. deid_mask Deidentify sensitive data in a string by masking it with a character. @@ -355,6 +354,8 @@ To run this sample: Preserving Encryption (FPE). deid_date_shift Deidentify dates in a CSV file by pseudorandomly shifting them. + redact Redact sensitive data in a string by replacing it with + the info type of the data. optional arguments: -h, --help show this help message and exit @@ -378,4 +379,4 @@ to `browse the source`_ and `report issues`_. https://github.com/GoogleCloudPlatform/google-cloud-python/issues -.. _Google Cloud SDK: https://cloud.google.com/sdk/ \ No newline at end of file +.. _Google Cloud SDK: https://cloud.google.com/sdk/ diff --git a/dlp/deid.py b/dlp/deid.py index b08a341dd82e..81847690866c 100644 --- a/dlp/deid.py +++ b/dlp/deid.py @@ -435,6 +435,61 @@ def write_data(data): # [END dlp_deidentify_date_shift] +# [START dlp_redact_sensitive_data] +def redact_sensitive_data(project, item, info_types): + """Uses the Data Loss Prevention API to redact sensitive data in a + string by replacing it with the info type. + Args: + project: The Google Cloud project id to use as a parent resource. + item: The string to redact (will be treated as text). + info_types: A list of strings representing info types to look for. + A full list of info type categories can be fetched from the API. + Returns: + None; the response from the API is printed to the terminal. + """ + + # Import the client library + import google.cloud.dlp + + # Instantiate a client + dlp = google.cloud.dlp_v2.DlpServiceClient() + + # Convert the project id into a full resource id. + parent = dlp.project_path(project) + + # Construct inspect configuration dictionary + inspect_config = { + "info_types": [{"name": info_type} for info_type in info_types] + } + + # Construct deidentify configuration dictionary + deidentify_config = { + "info_type_transformations": { + "transformations": [ + { + "primitive_transformation": { + "replace_with_info_type_config": {} + } + } + ] + } + } + + # Call the API + response = dlp.deidentify_content( + parent, + inspect_config=inspect_config, + deidentify_config=deidentify_config, + item={"value": item}, + ) + + # Print out the results. + print(response.item.value) + + +# [END dlp_redact_sensitive_data] + + if __name__ == "__main__": parser = argparse.ArgumentParser(description=__doc__) subparsers = parser.add_subparsers( @@ -626,6 +681,30 @@ def write_data(data): "key_name.", ) + redact_parser = subparsers.add_parser( + "redact", + help="Redact sensitive data in a string by replacing it with the " + "info type of the data.", + ) + redact_parser.add_argument( + "--info_types", + action="append", + help="Strings representing info types to look for. A full list of " + "info categories and types is available from the API. Examples " + 'include "FIRST_NAME", "LAST_NAME", "EMAIL_ADDRESS". ' + "If unspecified, the three above examples will be used.", + default=["FIRST_NAME", "LAST_NAME", "EMAIL_ADDRESS"], + ) + redact_parser.add_argument( + "project", + help="The Google Cloud project id to use as a parent resource.", + ) + redact_parser.add_argument( + "item", + help="The string to redact." + "Example: 'My credit card is 4242 4242 4242 4242'", + ) + args = parser.parse_args() if args.content == "deid_mask": @@ -667,3 +746,9 @@ def write_data(data): wrapped_key=args.wrapped_key, key_name=args.key_name, ) + elif args.content == "redact": + redact_sensitive_data( + args.project, + item=args.item, + info_types=args.info_types, + ) diff --git a/dlp/deid_test.py b/dlp/deid_test.py index db14b5758e96..db0c94e35dd6 100644 --- a/dlp/deid_test.py +++ b/dlp/deid_test.py @@ -185,3 +185,17 @@ def test_reidentify_with_fpe(capsys): out, _ = capsys.readouterr() assert "731997681" not in out + + +def test_redact_sensitive_data(capsys): + url_to_redact = "https://cloud.google.com" + deid.redact_sensitive_data( + GCLOUD_PROJECT, + "My favorite site is " + url_to_redact, + ["URL"], + ) + + out, _ = capsys.readouterr() + + assert url_to_redact not in out + assert "My favorite site is [URL]" in out