Skip to content

Commit

Permalink
Merge pull request #262 from Qrlew/261-many-relations-should-not-shar…
Browse files Browse the repository at this point in the history
…e-the-same-name-+-pid-should-stay-unique

261 many relations should not share the same name + pid should stay unique
  • Loading branch information
ngrislain authored Jan 30, 2024
2 parents e9ac454 + 42d458e commit ca720f0
Show file tree
Hide file tree
Showing 14 changed files with 157 additions and 55 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased]
### Changed

## [0.9.11] - 2024-01-30
### Changed
- Fix Relations sharing the same name

## [0.9.10] - 2024-01-30
### Changed
- Enable manual clipping parameters setting
Expand Down
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[package]
authors = ["Nicolas Grislain <ng@sarus.tech>"]
name = "qrlew"
version = "0.9.10"
version = "0.9.11"
edition = "2021"
description = "Sarus Qrlew Engine"
documentation = "https://docs.rs/qrlew"
Expand Down
2 changes: 0 additions & 2 deletions src/display/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,6 @@ mod tests {
relation::{schema::Schema, Relation},
};

#[ignore]
#[test]
fn test_relation() {
namer::reset();
Expand Down Expand Up @@ -241,7 +240,6 @@ mod tests {
join_2.display_dot().unwrap();
}

#[ignore]
#[test]
fn test_expr() {
let rel: Arc<Relation> = Arc::new(
Expand Down
87 changes: 87 additions & 0 deletions src/expr/bijection.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
use super::{function, identifier, Column, Expr, Function};

impl Expr {
/// Reduce the expression modulo a bijection
pub fn reduce_modulo_bijection(&self) -> &Expr {
match self {
Expr::Function(Function { function, arguments }) => {
if function.is_bijection() {
arguments.get(0).map(|arg|arg.reduce_modulo_bijection() )
.unwrap_or_else(|| self)
} else {
self
}
}
expr => self,
}
}

/// Some column if it reduces to a column, None else.
pub fn into_column_modulo_bijection(&self) -> Option<Column> {
let expr = self.reduce_modulo_bijection();
match expr {
Expr::Column(column) => Some(column.clone()),
_ => None,
}
}

/// True if reduces into a unique 0-ary function.
pub fn is_unique(&self) -> bool {
let expr = self.reduce_modulo_bijection();
match expr {
Expr::Function(Function { function, arguments }) => {
if function.is_bijection() {
arguments.get(0).map(|arg|arg.is_unique() )
.unwrap_or_else(|| false)
} else {
function.is_unique()
}
}
_ => false,
}
}

/// True if 2 expressions are equal modulo a bijection
pub fn eq_modulo_bijection(&self, expr: &Expr) -> bool {
self.reduce_modulo_bijection()==expr.reduce_modulo_bijection()
}
}

#[cfg(test)]
mod tests {
use identifier::Identifier;

use super::*;

#[test]
fn test_into_column_modulo_bijection() {
let a = expr!(md5(cast_as_text(exp(a))));
let b = expr!(md5(cast_as_text(sin(a))));
println!("a.into_column_modulo_bijection() {:?}", a.into_column_modulo_bijection());
println!("b.into_column_modulo_bijection() {:?}", b.into_column_modulo_bijection());
assert!(a.into_column_modulo_bijection()==Some(Identifier::from_name("a")));
assert!(b.into_column_modulo_bijection()==None);
}

#[test]
fn test_eq_modulo_bijection() {
let a = expr!(a + b);
let b = expr!(exp(a+b));
assert!(a.eq_modulo_bijection(&b));
let a = expr!(a + b);
let b = expr!(exp(sin(a+b)));
assert!(!a.eq_modulo_bijection(&b));
}

#[test]
fn test_is_unique() {
assert!(
Expr::md5(Expr::cast_as_text(Expr::exp(Expr::newid())))
.is_unique()
);
assert!(
!Expr::md5(Expr::cast_as_text(Expr::exp(Expr::col("a"))))
.is_unique()
);
}
}
9 changes: 0 additions & 9 deletions src/expr/dot.rs
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,6 @@ mod tests {
e.with(rel.data_type()).display_dot().unwrap();
}

#[ignore]
#[test]
fn test_dot_distributivity_dsl() {
let val = Value::structured([
Expand All @@ -278,7 +277,6 @@ mod tests {
&expr! { (a*b+d) }.with(val).display_dot().unwrap();
}

#[ignore]
#[test]
fn test_dot_plus_minus_dsl() {
let val = Value::structured([
Expand All @@ -290,7 +288,6 @@ mod tests {
expr! { a+b-c+d }.with(val).display_dot().unwrap();
}

#[ignore]
#[test]
fn test_dot_simple_value_dsl() {
let val = Value::structured([
Expand All @@ -308,7 +305,6 @@ mod tests {
.unwrap();
}

#[ignore]
#[test]
fn test_dot_value_dsl() {
let val = Value::structured([
Expand All @@ -328,7 +324,6 @@ mod tests {
.unwrap();
}

#[ignore]
#[test]
fn test_dot_aggregate_dsl() {
let data_types = DataType::structured([
Expand All @@ -351,7 +346,6 @@ mod tests {
x.with(data_types).display_dot().unwrap();
}

#[ignore]
#[test]
fn test_dot_aggregate_any_dsl() {
let data_types = DataType::structured([
Expand Down Expand Up @@ -387,7 +381,6 @@ mod tests {
assert_eq!(my_expr.to_string(), "(a % 2)".to_string());
}

#[ignore]
#[test]
fn test_max() {
let data_types = DataType::structured([("a", DataType::float_interval(0., 4.))]);
Expand All @@ -399,7 +392,6 @@ mod tests {
my_expr.with(data_types).display_dot().unwrap();
}

#[ignore]
#[test]
fn test_dot_struct_dsl() {
let rel: Arc<Relation> = Arc::new(
Expand Down Expand Up @@ -433,7 +425,6 @@ mod tests {
.unwrap();
}

#[ignore]
#[test]
fn test_dot_case() {
let data_types = DataType::structured([(
Expand Down
33 changes: 33 additions & 0 deletions src/expr/function.rs
Original file line number Diff line number Diff line change
Expand Up @@ -321,6 +321,39 @@ impl Function {
arity
}

/// Return true if it is bijective
pub fn is_bijection(self) -> bool {
match self {
// Unary Operators
Function::Opposite
| Function::Not
| Function::Exp
| Function::Ln
| Function::Log
| Function::Sqrt
| Function::Md5
| Function::CastAsText
| Function::CastAsFloat
| Function::CastAsInteger
| Function::CastAsBoolean
| Function::CastAsDateTime
| Function::CastAsDate
| Function::CastAsTime
| Function::Unhex => true,
_ => false,
}
}

/// Return true if it implicitly depends on the row index and is unique
pub fn is_unique(self) -> bool {
match self {
// Unary Operators
Function::Random(_)
| Function::Newid => true,
_ => false,
}
}

/// Return the function object implementing the function
pub fn super_image(self, sets: &[DataType]) -> Result<DataType> {
let set = match self.arity() {
Expand Down
1 change: 1 addition & 0 deletions src/expr/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ pub mod implementation;
pub mod rewriting;
pub mod split;
pub mod sql;
pub mod bijection;

use itertools::Itertools;
use paste::paste;
Expand Down
4 changes: 1 addition & 3 deletions src/privacy_unit_tracking/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -352,7 +352,7 @@ impl<'a> PrivacyUnitTracking<'a> {
.left(Relation::from(left))
.right(Relation::from(right))
.build();
let mut builder = Relation::map().name(name);
let mut builder = Relation::map();
builder = builder.with((
PrivacyUnit::privacy_unit(),
Expr::col(format!("_LEFT{}", PrivacyUnit::privacy_unit())),
Expand Down Expand Up @@ -410,7 +410,6 @@ impl<'a> PrivacyUnitTracking<'a> {
.right(Relation::from(right))
.build();
let mut builder = Relation::map()
.name(name)
.with((
PrivacyUnit::privacy_unit(),
Expr::col(format!("_RIGHT{}", PrivacyUnit::privacy_unit())),
Expand Down Expand Up @@ -463,7 +462,6 @@ impl<'a> PrivacyUnitTracking<'a> {
.right(Relation::from(right))
.build();
let mut builder = Relation::map()
.name(name)
.with((
PrivacyUnit::privacy_unit(),
Expr::col(format!("_LEFT{}", PrivacyUnit::privacy_unit())),
Expand Down
1 change: 0 additions & 1 deletion src/relation/dot.rs
Original file line number Diff line number Diff line change
Expand Up @@ -428,7 +428,6 @@ mod tests {
map.display_dot();
}

#[ignore]
#[test]
fn test_display_join() {
namer::reset();
Expand Down
9 changes: 5 additions & 4 deletions src/relation/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,7 @@ use crate::{
Variant as _,
},
expr::{
self, aggregate::Aggregate, function, AggregateColumn, Column, Expr,
Function as ExprFunction, Identifier, Split,
self, aggregate::Aggregate, function, AggregateColumn, Column, Expr, Identifier, Split,
},
hierarchy::Hierarchy,
namer,
Expand Down Expand Up @@ -304,8 +303,10 @@ impl Map {
Field::new(
name,
expr.super_image(&input_data_type).unwrap(),
if let Expr::Column(c) = expr.clone() {
input.schema()[c.last().unwrap()].constraint()
if let Some(column) = expr.into_column_modulo_bijection() {
input.schema()[column.last().unwrap()].constraint()
} else if expr.is_unique() {
Some(Constraint::Unique)
} else {
None
},
Expand Down
53 changes: 22 additions & 31 deletions src/relation/rewriting.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1518,7 +1518,6 @@ mod tests {
);
}

#[ignore]
#[test]
fn test_poisson_sampling() {
let mut database = postgresql::test_database();
Expand Down Expand Up @@ -1627,7 +1626,6 @@ mod tests {
assert_eq!(expected_sampled_join, sampled_join);
}

#[ignore]
#[test]
fn test_sampling_query() {
let mut database = postgresql::test_database();
Expand All @@ -1645,18 +1643,17 @@ mod tests {
namer::reset();
let sampled_relation = relation.poisson_sampling(proba);

let query_sampled_relation = &ast::Query::try_from(&sampled_relation).unwrap().to_string();

let expected_query = r#"WITH
map_qcqr (field_z650, field_08wv) AS (SELECT price AS field_z650, order_id AS field_08wv FROM item_table),
reduce_8knj (field_glfp) AS (SELECT sum(field_z650) AS field_glfp FROM map_qcqr GROUP BY field_08wv),
map_xyv8 (z, a, b) AS (SELECT 0 AS z, field_glfp AS a, field_glfp AS b FROM reduce_8knj),
map_bfzk (z, a, b) AS (SELECT z AS z, a AS a, b AS b FROM map_xyv8 WHERE (random()) < (0.5))
SELECT * FROM map_bfzk"#;
let query_sampled_relation = ast::Query::try_from(&sampled_relation).unwrap().to_string();

let expected_query = r#"WITH "map_647m" ("field_z650", "field_08wv") AS (SELECT "price" AS "field_z650", "order_id" AS "field_08wv" FROM "item_table"),
"reduce_0m62" ("field_yub7") AS (SELECT SUM("field_z650") AS "field_yub7" FROM "map_647m" GROUP BY "field_08wv"),
"map_h16i" ("z", "a", "b") AS (SELECT 0 AS "z", "field_yub7" AS "a", "field_yub7" AS "b" FROM "reduce_0m62"),
"map_tsjq" ("z", "a", "b") AS (SELECT "z" AS "z", "a" AS "a", "b" AS "b" FROM "map_h16i" WHERE (RANDOM()) < (0.5))
SELECT * FROM "map_tsjq"
"#;
assert_eq!(
expected_query.replace('\n', " ").replace(' ', ""),
(&query_sampled_relation[..]).replace(' ', "")
query_sampled_relation.replace(' ', "")
);
print!("{}\n", query_sampled_relation);

Expand All @@ -1672,15 +1669,15 @@ mod tests {
namer::reset();
let sampled_relation = relation.poisson_sampling(proba);

let query_sampled_relation = &ast::Query::try_from(&sampled_relation).unwrap().to_string();

let expected_query = r#"WITH map_gj2u (field_uy24) AS (SELECT log(price) AS field_uy24 FROM item_table),
map_upop (field_uy24) AS (SELECT field_uy24 AS field_uy24 FROM map_gj2u WHERE (random()) < (0.5))
SELECT * FROM map_upop"#;
let query_sampled_relation = ast::Query::try_from(&sampled_relation).unwrap().to_string();

let expected_query = r#"WITH "map_4tf4" ("field_005r") AS (SELECT LOG("price") AS "field_005r" FROM "item_table"),
"map_pv6w" ("field_005r") AS (SELECT "field_005r" AS "field_005r" FROM "map_4tf4" WHERE (RANDOM()) < (0.5))
SELECT * FROM "map_pv6w"
"#;
assert_eq!(
expected_query.replace('\n', " ").replace(' ', ""),
(&query_sampled_relation[..]).replace(' ', "")
query_sampled_relation.replace(' ', "")
);
print!("{}\n", query_sampled_relation);

Expand All @@ -1696,20 +1693,14 @@ mod tests {
namer::reset();
let sampled_relation = relation.poisson_sampling(proba);

let query_sampled_relation = &ast::Query::try_from(&sampled_relation).unwrap().to_string();

let expected_query = r#"WITH
join__e_y (field_eygr, field_0wjz, field_cg0j, field_idxm, field_0eqn, field_3ned, field_gwco) AS (
SELECT * FROM order_table JOIN item_table ON (order_table.id) = (item_table.order_id)
), map_8r2s (field_eygr, field_0wjz, field_cg0j, field_idxm, field_0eqn, field_3ned, field_gwco) AS (
SELECT field_eygr AS field_eygr, field_0wjz AS field_0wjz, field_cg0j AS field_cg0j,
field_idxm AS field_idxm, field_0eqn AS field_0eqn, field_3ned AS field_3ned, field_gwco AS field_gwco
FROM join__e_y
), map_yko1 (field_eygr, field_0wjz, field_cg0j, field_idxm, field_0eqn, field_3ned, field_gwco) AS (
SELECT field_eygr AS field_eygr, field_0wjz AS field_0wjz, field_cg0j AS field_cg0j,
field_idxm AS field_idxm, field_0eqn AS field_0eqn, field_3ned AS field_3ned, field_gwco AS field_gwco
FROM map_8r2s WHERE (random()) < (0.5)
) SELECT * FROM map_yko1"#;
let query_sampled_relation = ast::Query::try_from(&sampled_relation).unwrap().to_string();
println!("DEBUG {query_sampled_relation}");
let expected_query = r#"
WITH "join_bes1" ("field_uwvc", "field_llat", "field_r8n6", "field_xyhh", "field_5zs7", "field_9oif", "field_pdz9") AS (SELECT * FROM "order_table" AS "_LEFT_" JOIN "item_table" AS "_RIGHT_" ON ("_LEFT_"."id") = ("_RIGHT_"."order_id")),
"map_afr0" ("field_uwvc", "field_llat", "field_r8n6", "field_xyhh", "field_5zs7", "field_9oif", "field_pdz9") AS (SELECT "field_uwvc" AS "field_uwvc", "field_llat" AS "field_llat", "field_r8n6" AS "field_r8n6", "field_xyhh" AS "field_xyhh", "field_5zs7" AS "field_5zs7", "field_9oif" AS "field_9oif", "field_pdz9" AS "field_pdz9" FROM "join_bes1"),
"map_h_vu" ("field_uwvc", "field_llat", "field_r8n6", "field_xyhh", "field_5zs7", "field_9oif", "field_pdz9") AS (SELECT "field_uwvc" AS "field_uwvc", "field_llat" AS "field_llat", "field_r8n6" AS "field_r8n6", "field_xyhh" AS "field_xyhh", "field_5zs7" AS "field_5zs7", "field_9oif" AS "field_9oif", "field_pdz9" AS "field_pdz9" FROM "map_afr0" WHERE (RANDOM()) < (0.5))
SELECT * FROM "map_h_vu"
"#;

assert_eq!(
expected_query.replace('\n', " ").replace(' ', ""),
Expand Down
Loading

0 comments on commit ca720f0

Please sign in to comment.