Skip to content
This repository has been archived by the owner on Sep 27, 2023. It is now read-only.

feat: add Deidentify action #742

Merged
merged 7 commits into from
Sep 9, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
589 changes: 470 additions & 119 deletions protos/google/privacy/dlp/v2/dlp.proto

Large diffs are not rendered by default.

57 changes: 51 additions & 6 deletions protos/google/privacy/dlp/v2/storage.proto
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ syntax = "proto3";

package google.privacy.dlp.v2;

import "google/api/resource.proto";
import "google/protobuf/timestamp.proto";

option csharp_namespace = "Google.Cloud.Dlp.V2";
Expand All @@ -39,6 +40,32 @@ message InfoType {
string version = 2;
}

// Score is a summary of all elements in the data profile.
// A higher number means more sensitive.
message SensitivityScore {
// Various score levels for resources.
enum SensitivityScoreLevel {
// Unused.
SENSITIVITY_SCORE_UNSPECIFIED = 0;

// No sensitive information detected. Limited access.
SENSITIVITY_LOW = 10;

// Medium risk - PII, potentially sensitive data, or fields with free-text
// data that are at higher risk of having intermittent sensitive data.
// Consider limiting access.
SENSITIVITY_MODERATE = 20;

// High risk – SPII may be present. Exfiltration of data may lead to user
// data loss. Re-identification of users may be possible. Consider limiting
// usage and or removing SPII.
SENSITIVITY_HIGH = 30;
}

// The score applied to the resource.
SensitivityScoreLevel score = 1;
}

// Categorization of results based on how likely they are to represent a match,
// based on the number of elements they contain which imply a match.
enum Likelihood {
Expand Down Expand Up @@ -411,7 +438,7 @@ message CloudStorageRegexFileSet {
repeated string exclude_regex = 3;
}

// Options defining a file or a set of files within a Google Cloud Storage
// Options defining a file or a set of files within a Cloud Storage
// bucket.
message CloudStorageOptions {
// Set of files to scan.
Expand Down Expand Up @@ -547,7 +574,7 @@ message BigQueryOptions {
// Shared message indicating Cloud storage type.
message StorageConfig {
// Configuration of the timespan of the items to include in scanning.
// Currently only supported when inspecting Google Cloud Storage and BigQuery.
// Currently only supported when inspecting Cloud Storage and BigQuery.
message TimespanConfig {
// Exclude files, tables, or rows older than this value.
// If not set, no lower time limit is applied.
Expand All @@ -560,7 +587,8 @@ message StorageConfig {
// Specification of the field containing the timestamp of scanned items.
// Used for data sources like Datastore and BigQuery.
//
// For BigQuery:
// <b>For BigQuery</b>
//
// If this value is not specified and the table was modified between the
// given start and end times, the entire table will be scanned. If this
// value is specified, then rows are filtered based on the given start and
Expand All @@ -569,25 +597,42 @@ message StorageConfig {
// Valid data types of the provided BigQuery column are: `INTEGER`, `DATE`,
// `TIMESTAMP`, and `DATETIME`.
//
// For Datastore:
// If your BigQuery table is [partitioned at ingestion
// time](https://cloud.google.com/bigquery/docs/partitioned-tables#ingestion_time),
// you can use any of the following pseudo-columns as your timestamp field.
// When used with Cloud DLP, these pseudo-column names are case sensitive.
//
// <ul>
// <li><code>_PARTITIONTIME</code></li>
// <li><code>_PARTITIONDATE</code></li>
// <li><code>_PARTITION_LOAD_TIME</code></li>
// </ul>
//
// <b>For Datastore</b>
//
// If this value is specified, then entities are filtered based on the given
// start and end times. If an entity does not contain the provided timestamp
// property or contains empty or invalid values, then it is included.
// Valid data types of the provided timestamp property are: `TIMESTAMP`.
//
// See the
// [known issue](https://cloud.google.com/dlp/docs/known-issues#bq-timespan)
// related to this operation.
FieldId timestamp_field = 3;

// When the job is started by a JobTrigger we will automatically figure out
// a valid start_time to avoid scanning files that have not been modified
// since the last time the JobTrigger executed. This will be based on the
// time of the execution of the last run of the JobTrigger.
// time of the execution of the last run of the JobTrigger or the timespan
// end_time used in the last run of the JobTrigger.
bool enable_auto_population_of_timespan_config = 4;
}

oneof type {
// Google Cloud Datastore options.
DatastoreOptions datastore_options = 2;

// Google Cloud Storage options.
// Cloud Storage options.
CloudStorageOptions cloud_storage_options = 3;

// BigQuery options.
Expand Down
Loading