Skip to content

Commit

Permalink
Merge pull request #180 from Qrlew/fix_group_by_differential_privacy_…
Browse files Browse the repository at this point in the history
…in_aggregates

Fix group by differential privacy in aggregates
  • Loading branch information
ngrislain authored Nov 13, 2023
2 parents 513668b + 78e01b5 commit e5a076c
Show file tree
Hide file tree
Showing 10 changed files with 312 additions and 26 deletions.
5 changes: 4 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [0.4.12] - 2023-11-09

### Fixed
- `DataType`` propagation in joins: if their is not INNEr or CROSS contraint, then the output `DataType`s must be optional [MR179](https://github.com/Qrlew/qrlew/pull/179)
- Dp query should release all the possible values of the grouping keys. [MR180](https://github.com/Qrlew/qrlew/pull/180)
- `DataType`` propagation in joins: if their is not INNER or CROSS contraint, then the output `DataType`s must be optional [MR179](https://github.com/Qrlew/qrlew/pull/179)
### Added
- Implemented `Coalesce` [MR178](https://github.com/Qrlew/qrlew/pull/178)
### Changed
- If no tau-thresholding, the budget is transferred to the aggregations [MR180](https://github.com/Qrlew/qrlew/pull/180)
- Allow public tables [MR182](https://github.com/Qrlew/qrlew/pull/182)

## [0.4.11] - 2023-11-09
Expand Down
10 changes: 8 additions & 2 deletions src/differential_privacy/aggregates.rs
Original file line number Diff line number Diff line change
Expand Up @@ -116,11 +116,17 @@ impl PEPRelation {
let mut input_builder = Map::builder()
.with((
self.protected_entity_id(),
Expr::col(self.protected_entity_id()),
Expr::coalesce(
Expr::cast_as_text(Expr::col(self.protected_entity_id())),
Expr::val(self.protected_entity_null_id().to_string())
),
))
.with((
self.protected_entity_weight(),
Expr::col(self.protected_entity_weight()),
Expr::coalesce(
Expr::col(self.protected_entity_weight()),
Expr::val(0.)
)
));

let mut group_by_names = vec![];
Expand Down
116 changes: 110 additions & 6 deletions src/differential_privacy/group_by.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use crate::{
builder::{Ready, With, WithIterator},
differential_privacy::{private_query, DPRelation, PrivateQuery, Result},
expr::{aggregate, Expr},
namer,
namer::{self, name_from_content},
protection::{PEPRelation, ProtectedEntity},
relation::{Join, Reduce, Relation, Variant as _},
};
Expand Down Expand Up @@ -146,6 +146,8 @@ impl Relation {
Ok(relation_with_private_values.public_values()?)
}

/// We join the `self` `Relation` with the `grouping_values Relation`;
/// We use a `LEFT OUTER` join for guaranteeing that all the possible grouping keys are released
pub fn join_with_grouping_values(self, grouping_values: Relation) -> Result<Relation> {
let left = grouping_values;
let right = self;
Expand All @@ -166,12 +168,30 @@ impl Relation {
.iter()
.map(|f| f.name().to_string())
.collect::<Vec<_>>();
let left_names = left
.schema()
.iter()
.map(|f| f.name().to_string())
.collect::<Vec<_>>();
let right_names = right
.schema()
.iter()
.map(|f| {
let name = f.name().to_string();
if left_names.contains(&name) {
name_from_content("left_".to_string(), f)
} else {
name
}
})
.collect::<Vec<_>>();

let join_rel: Relation = Relation::join()
.right(right)
.right_names(names.clone())
.right_names(right_names.clone())
.left(left)
.inner()
.left_names(left_names.clone())
.left_outer()
.on_iter(on)
.build();

Expand All @@ -185,14 +205,18 @@ mod tests {
use crate::{
ast,
builder::With,
data_type::{DataType, DataTyped, Variant},
data_type::{DataType, DataTyped, Variant, Integer},
display::Dot,
expr::AggregateColumn,
io::{postgresql, Database},
protection::{ProtectedEntity, Protection, Strategy},
relation::{Join, Schema},
relation::{Join, Schema, Field},
};
use std::{
ops::Deref,
collections::HashSet
};
use std::ops::Deref;


#[test]
fn test_tau_thresholding_values() {
Expand Down Expand Up @@ -603,4 +627,84 @@ mod tests {
let dp_query = ast::Query::from(&dp_relation);
_ = database.query(&dp_query.to_string()).unwrap();
}

#[test]
fn test_differentially_private_output_all_grouping_keys() {
// test the results contains all the keys asked by the user (i.e. in the WHERE )
let mut database = postgresql::test_database();
let relations = database.relations();
let table = relations.get(&["large_user_table".into()]).unwrap().as_ref().clone();
let new_schema: Schema = table.schema()
.iter()
.map(|f| if f.name() == "city" {
Field::from_name_data_type("city", DataType::text())
} else {
f.clone()
})
.collect();
let table:Relation = Relation::table()
.path(["large_user_table"])
.name("more_users")
.size(100000)
.schema(new_schema)
.build();
let input: Relation = Relation::map()
.name("map_relation")
.with(("income", expr!(income)))
.with(("city", expr!(city)))
.with(("age", expr!(age)))
.with((
ProtectedEntity::protected_entity_id(),
expr!(id),
))
.with((
ProtectedEntity::protected_entity_weight(),
expr!(id),
))
.filter(
Expr::in_list(
Expr::col("city"),
Expr::list(vec!["Paris".to_string(), "London".to_string()]),
)
)
.input(table.clone())
.build();
let reduce: Reduce = Relation::reduce()
.name("reduce_relation")
.with(("sum_income".to_string(), AggregateColumn::sum("income")))
.group_by(expr!(city))
.group_by(expr!(age))
.input(input)
.build();
let (dp_relation, _) = reduce
.differentially_private_group_by(1., 1e-2)
.unwrap()
.into();
dp_relation.display_dot().unwrap();
let query: &str = &ast::Query::from(&dp_relation).to_string();
let results = database
.query(query)
.unwrap();
let city_keys: HashSet<_> = results.iter()
.map(|row| row.to_vec().clone()[0].clone().to_string())
.collect();
let correct_keys: HashSet<_> = vec!["London".to_string(), "Paris".to_string()].into_iter().collect();
assert_eq!(city_keys, correct_keys);

let input_relation_with_protected_group_by = reduce
.input()
.clone()
.join_with_grouping_values(dp_relation).unwrap();
input_relation_with_protected_group_by.display_dot().unwrap();
let query: &str = &ast::Query::from(&input_relation_with_protected_group_by).to_string();
let results = database
.query(query)
.unwrap();
let city_keys: HashSet<_> = results.iter()
.map(|row| row.to_vec().clone()[0].clone().to_string())
.collect();
println!("{:?}", city_keys);
assert_eq!(city_keys, correct_keys);
}

}
163 changes: 160 additions & 3 deletions src/differential_privacy/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,14 @@ impl Reduce {
reduce
};

// if the (epsilon_tau_thresholding, delta_tau_thresholding) budget has
// not been spent, allocate it to the aggregations.
let (epsilon, delta) = if private_query.is_null() {
(epsilon + epsilon_tau_thresholding, delta + delta_tau_thresholding)
} else {
(epsilon, delta)
};

// DP rewrite aggregates
let (dp_relation, private_query_agg) = reduce_with_dp_group_by
.differentially_private_aggregates(epsilon, delta)?
Expand All @@ -156,6 +164,7 @@ impl Reduce {

#[cfg(test)]
mod tests {
use std::collections::HashSet;
use super::*;
use crate::{
ast,
Expand All @@ -165,7 +174,8 @@ mod tests {
expr::{AggregateColumn, Expr},
io::{postgresql, Database},
protection::{Protection, Strategy},
relation::{Map, Relation, Variant},
relation::{Map, Relation, Variant, Schema, Field},
protection::ProtectedEntity,
};

#[test]
Expand Down Expand Up @@ -216,7 +226,7 @@ mod tests {
dp_relation.display_dot().unwrap();
assert_eq!(
private_query,
PrivateQuery::gaussian_from_epsilon_delta_sensitivity(epsilon, delta, 50.)
PrivateQuery::gaussian_from_epsilon_delta_sensitivity(epsilon + epsilon_tau_thresholding, delta + delta_tau_thresholding, 50.)
);
assert!(dp_relation
.data_type()
Expand Down Expand Up @@ -286,7 +296,11 @@ mod tests {
dp_relation.display_dot().unwrap();
assert_eq!(
private_query,
PrivateQuery::gaussian_from_epsilon_delta_sensitivity(epsilon, delta, 50.)
PrivateQuery::gaussian_from_epsilon_delta_sensitivity(
epsilon + epsilon_tau_thresholding,
delta + delta_tau_thresholding,
50.
)
);
assert!(dp_relation
.data_type()
Expand Down Expand Up @@ -452,4 +466,147 @@ mod tests {
println!("{query}");
_ = database.query(query).unwrap();
}

#[test]
fn test_differentially_private_output_all_grouping_keys_simple() {
// test the results contains all the possible keys
let mut database = postgresql::test_database();
let relations = database.relations();
let table = relations.get(&["large_user_table".into()]).unwrap().as_ref().clone();
let new_schema: Schema = table.schema()
.iter()
.map(|f| if f.name() == "city" {
Field::from_name_data_type("city", DataType::text())
} else {
f.clone()
})
.collect();
let table:Relation = Relation::table()
.path(["large_user_table"])
.name("more_users")
.size(100000)
.schema(new_schema)
.build();
let input: Relation = Relation::map()
.name("map_relation")
.with(("income", expr!(income)))
.with(("city", expr!(city)))
.with((
ProtectedEntity::protected_entity_id(),
expr!(id),
))
.with((
ProtectedEntity::protected_entity_weight(),
expr!(id),
))
.filter(
Expr::in_list(
Expr::col("city"),
Expr::list(vec!["Paris".to_string(), "London".to_string()]),
)
)
.input(table.clone())
.build();
let reduce: Reduce = Relation::reduce()
.name("reduce_relation")
.with(("city".to_string(), AggregateColumn::first("city")))
.with(("count_income".to_string(), AggregateColumn::count("income")))
.group_by(expr!(city))
.input(input)
.build();
let (dp_relation, private_query) = reduce
.differentially_private(
10.,
1e-5,
1.,
1e-2,
)
.unwrap()
.into();
println!("{}", private_query);
dp_relation.display_dot().unwrap();
let query: &str = &ast::Query::from(&dp_relation).to_string();
let results = database
.query(query)
.unwrap();
println!("results = {:?}", results);
let city_keys: HashSet<_> = results.iter()
.map(|row| row.to_vec().clone()[0].clone().to_string())
.collect();
let correct_keys: HashSet<_> = vec!["London".to_string(), "Paris".to_string()].into_iter().collect();
assert_eq!(city_keys, correct_keys);
}

#[test]
fn test_differentially_private_output_all_grouping_keys() {
// test the results contains all the possible keys
let mut database = postgresql::test_database();
let relations = database.relations();
let table = relations.get(&["large_user_table".into()]).unwrap().as_ref().clone();
let new_schema: Schema = table.schema()
.iter()
.map(|f| if f.name() == "city" {
Field::from_name_data_type("city", DataType::text())
} else {
f.clone()
})
.collect();
let table:Relation = Relation::table()
.path(["large_user_table"])
.name("more_users")
.size(100000)
.schema(new_schema)
.build();
let input: Relation = Relation::map()
.name("map_relation")
.with(("income", expr!(income)))
.with(("city", expr!(city)))
.with(("age", expr!(age)))
.with((
ProtectedEntity::protected_entity_id(),
expr!(id),
))
.with((
ProtectedEntity::protected_entity_weight(),
expr!(id),
))
.filter(
Expr::in_list(
Expr::col("city"),
Expr::list(vec!["Paris".to_string(), "London".to_string()]),
)
)
.input(table.clone())
.build();
let reduce: Reduce = Relation::reduce()
.name("reduce_relation")
.with(("city".to_string(), AggregateColumn::first("city")))
.with(("age".to_string(), AggregateColumn::first("age")))
.with(("sum_income".to_string(), AggregateColumn::sum("income")))
.group_by(expr!(city))
.group_by(expr!(age))
.input(input)
.build();
let (dp_relation, private_query) = reduce
.differentially_private(
10.,
1e-5,
1.,
1e-2,
)
.unwrap()
.into();
println!("{}", private_query);
dp_relation.display_dot().unwrap();
let query: &str = &ast::Query::from(&dp_relation).to_string();
let results = database
.query(query)
.unwrap();
println!("{:?}", results);
let city_keys: HashSet<_> = results.iter()
.map(|row| row.to_vec().clone()[0].clone().to_string())
.collect();
let correct_keys: HashSet<_> = vec!["London".to_string(), "Paris".to_string()].into_iter().collect();
assert_eq!(city_keys, correct_keys);
}
}
Loading

0 comments on commit e5a076c

Please sign in to comment.