-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Hadrien Kohl
committed
Dec 14, 2023
1 parent
c923a20
commit d41c0b3
Showing
17 changed files
with
4,461 additions
and
3,971 deletions.
There are no files selected for viewing
331 changes: 331 additions & 0 deletions
331
vtl-spark/src/test/java/fr/insee/vtl/spark/processing.engine/AnalyticAvgTest.java
Large diffs are not rendered by default.
Oops, something went wrong.
445 changes: 445 additions & 0 deletions
445
vtl-spark/src/test/java/fr/insee/vtl/spark/processing.engine/AnalyticCountTest.java
Large diffs are not rendered by default.
Oops, something went wrong.
228 changes: 228 additions & 0 deletions
228
vtl-spark/src/test/java/fr/insee/vtl/spark/processing.engine/AnalyticFirstTest.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,228 @@ | ||
package fr.insee.vtl.spark.processing.engine; | ||
|
||
import fr.insee.vtl.model.Dataset; | ||
import org.junit.jupiter.api.Test; | ||
|
||
import javax.script.ScriptContext; | ||
import javax.script.ScriptException; | ||
import java.util.Map; | ||
|
||
import static org.assertj.core.api.Assertions.assertThat; | ||
|
||
public class AnalyticFirstTest extends AnalyticTest { | ||
|
||
@Test | ||
public void testAnFirstWithPartitionClause() throws ScriptException { | ||
|
||
// Analytical function Test case 1 : first on window with partition | ||
/* Input dataset | ||
+----+----+----+----+----+ | ||
|Id_1|Id_2|Year|Me_1|Me_2| | ||
+----+----+----+----+----+ | ||
| A| XX|1993| 3| 1.0| | ||
| A| XX|1994| 4| 9.0| | ||
| A| XX|1995| 7| 5.0| | ||
| A| XX|1996| 6| 8.0| | ||
| A| YY|1993| 9| 3.0| | ||
| A| YY|1994| 5| 4.0| | ||
| A| YY|1995| 10| 2.0| | ||
| A| YY|1996| 2| 7.0| | ||
+----+----+----+----+----+ | ||
* */ | ||
ScriptContext context = engine.getContext(); | ||
context.setAttribute("ds2", anCountDS2, ScriptContext.ENGINE_SCOPE); | ||
|
||
|
||
engine.eval("res := first_value ( ds2 over ( partition by Id_1, Id_2) );"); | ||
assertThat(engine.getContext().getAttribute("res")).isInstanceOf(Dataset.class); | ||
|
||
/* | ||
* | ||
+----+----+----+----+----+----------+----------+ | ||
|Id_1|Id_2|Year|Me_1|Me_2|first_Me_1|first_Me_2| | ||
+----+----+----+----+----+----------+----------+ | ||
| A| XX|1993| 3| 1.0| 3| 1.0| | ||
| A| XX|1994| 4| 9.0| 3| 1.0| | ||
| A| XX|1995| 7| 5.0| 3| 1.0| | ||
| A| XX|1996| 6| 8.0| 3| 1.0| | ||
| A| YY|1993| 9| 3.0| 9| 3.0| | ||
| A| YY|1994| 5| 4.0| 9| 3.0| | ||
| A| YY|1995| 10| 2.0| 9| 3.0| | ||
| A| YY|1996| 2| 7.0| 9| 3.0| | ||
+----+----+----+----+----+----------+----------+ | ||
* */ | ||
assertThat(((Dataset) engine.getContext().getAttribute("res")).getDataAsMap()).containsExactly( | ||
Map.of("Id_1", "A", "Id_2", "XX", "Year", 1993L, "Me_1", 3L, "Me_2", 1.0D), | ||
Map.of("Id_1", "A", "Id_2", "XX", "Year", 1994L, "Me_1", 3L, "Me_2", 1.0D), | ||
Map.of("Id_1", "A", "Id_2", "XX", "Year", 1995L, "Me_1", 3L, "Me_2", 1.0D), | ||
Map.of("Id_1", "A", "Id_2", "XX", "Year", 1996L, "Me_1", 3L, "Me_2", 1.0D), | ||
Map.of("Id_1", "A", "Id_2", "YY", "Year", 1993L, "Me_1", 9L, "Me_2", 3.0D), | ||
Map.of("Id_1", "A", "Id_2", "YY", "Year", 1994L, "Me_1", 9L, "Me_2", 3.0D), | ||
Map.of("Id_1", "A", "Id_2", "YY", "Year", 1995L, "Me_1", 9L, "Me_2", 3.0D), | ||
Map.of("Id_1", "A", "Id_2", "YY", "Year", 1996L, "Me_1", 9L, "Me_2", 3.0D) | ||
); | ||
|
||
} | ||
|
||
@Test | ||
public void testAnFirstPartitionOrderByDesc() throws ScriptException { | ||
|
||
// Analytical function Test case 2 : first on window with partition and desc order | ||
/* Input dataset | ||
+----+----+----+----+----+ | ||
|Id_1|Id_2|Year|Me_1|Me_2| | ||
+----+----+----+----+----+ | ||
| A| XX|1993| 3| 1.0| | ||
| A| XX|1994| 4| 9.0| | ||
| A| XX|1995| 7| 5.0| | ||
| A| XX|1996| 6| 8.0| | ||
| A| YY|1993| 9| 3.0| | ||
| A| YY|1994| 5| 4.0| | ||
| A| YY|1995| 10| 2.0| | ||
| A| YY|1996| 2| 7.0| | ||
+----+----+----+----+----+ | ||
* */ | ||
ScriptContext context = engine.getContext(); | ||
context.setAttribute("ds2", anCountDS2, ScriptContext.ENGINE_SCOPE); | ||
|
||
|
||
engine.eval("res := first_value ( ds2 over ( partition by Id_1, Id_2 order by Year desc) );"); | ||
assertThat(engine.getContext().getAttribute("res")).isInstanceOf(Dataset.class); | ||
|
||
/* | ||
* | ||
+----+----+----+----+----+----------+----------+ | ||
|Id_1|Id_2|Year|Me_1|Me_2|first_Me_1|first_Me_2| | ||
+----+----+----+----+----+----------+----------+ | ||
| A| XX|1996| 6| 8.0| 6| 8.0| | ||
| A| XX|1995| 7| 5.0| 6| 8.0| | ||
| A| XX|1994| 4| 9.0| 6| 8.0| | ||
| A| XX|1993| 3| 1.0| 6| 8.0| | ||
| A| YY|1996| 2| 7.0| 2| 7.0| | ||
| A| YY|1995| 10| 2.0| 2| 7.0| | ||
| A| YY|1994| 5| 4.0| 2| 7.0| | ||
| A| YY|1993| 9| 3.0| 2| 7.0| | ||
+----+----+----+----+----+----------+----------+ | ||
* */ | ||
assertThat(((Dataset) engine.getContext().getAttribute("res")).getDataAsMap()).containsExactly( | ||
Map.of("Id_1", "A", "Id_2", "XX", "Year", 1996L, "Me_1", 6L, "Me_2", 8.0D), | ||
Map.of("Id_1", "A", "Id_2", "XX", "Year", 1995L, "Me_1", 6L, "Me_2", 8.0D), | ||
Map.of("Id_1", "A", "Id_2", "XX", "Year", 1994L, "Me_1", 6L, "Me_2", 8.0D), | ||
Map.of("Id_1", "A", "Id_2", "XX", "Year", 1993L, "Me_1", 6L, "Me_2", 8.0D), | ||
Map.of("Id_1", "A", "Id_2", "YY", "Year", 1996L, "Me_1", 2L, "Me_2", 7.0D), | ||
Map.of("Id_1", "A", "Id_2", "YY", "Year", 1995L, "Me_1", 2L, "Me_2", 7.0D), | ||
Map.of("Id_1", "A", "Id_2", "YY", "Year", 1994L, "Me_1", 2L, "Me_2", 7.0D), | ||
Map.of("Id_1", "A", "Id_2", "YY", "Year", 1993L, "Me_1", 2L, "Me_2", 7.0D) | ||
); | ||
|
||
} | ||
|
||
@Test | ||
public void testAnFirstWithPartitionOrderByDPClause() throws ScriptException { | ||
|
||
// Analytical function Test case 3 : first on window with partition, order by and data points | ||
/* Input dataset | ||
+----+----+----+----+----+ | ||
|Id_1|Id_2|Year|Me_1|Me_2| | ||
+----+----+----+----+----+ | ||
| A| XX|1993| 3| 1.0| | ||
| A| XX|1994| 4| 9.0| | ||
| A| XX|1995| 7| 5.0| | ||
| A| XX|1996| 6| 8.0| | ||
| A| YY|1993| 9| 3.0| | ||
| A| YY|1994| 5| 4.0| | ||
| A| YY|1995| 10| 2.0| | ||
| A| YY|1996| 2| 7.0| | ||
+----+----+----+----+----+ | ||
* */ | ||
ScriptContext context = engine.getContext(); | ||
context.setAttribute("ds2", anCountDS2, ScriptContext.ENGINE_SCOPE); | ||
|
||
|
||
engine.eval("res := first_value ( ds2 over ( partition by Id_1 order by Id_2 data points between 2 preceding and 2 following) );"); | ||
assertThat(engine.getContext().getAttribute("res")).isInstanceOf(Dataset.class); | ||
|
||
/* | ||
* | ||
+----+----+----+----+----+----------+----------+ | ||
|Id_1|Id_2|Year|Me_1|Me_2|first_Me_1|first_Me_2| | ||
+----+----+----+----+----+----------+----------+ | ||
| A| XX|1993| 3| 1.0| 3| 1.0| | ||
| A| XX|1994| 4| 9.0| 3| 1.0| | ||
| A| XX|1995| 7| 5.0| 3| 1.0| | ||
| A| XX|1996| 6| 8.0| 4| 9.0| | ||
| A| YY|1993| 9| 3.0| 7| 5.0| | ||
| A| YY|1994| 5| 4.0| 6| 8.0| | ||
| A| YY|1995| 10| 2.0| 9| 3.0| | ||
| A| YY|1996| 2| 7.0| 5| 4.0| | ||
+----+----+----+----+----+----------+----------+ | ||
* */ | ||
assertThat(((Dataset) engine.getContext().getAttribute("res")).getDataAsMap()).containsExactly( | ||
Map.of("Id_1", "A", "Id_2", "XX", "Year", 1993L, "Me_1", 3L, "Me_2", 1.0D), | ||
Map.of("Id_1", "A", "Id_2", "XX", "Year", 1994L, "Me_1", 3L, "Me_2", 1.0D), | ||
Map.of("Id_1", "A", "Id_2", "XX", "Year", 1995L, "Me_1", 3L, "Me_2", 1.0D), | ||
Map.of("Id_1", "A", "Id_2", "XX", "Year", 1996L, "Me_1", 4L, "Me_2", 9.0D), | ||
Map.of("Id_1", "A", "Id_2", "YY", "Year", 1993L, "Me_1", 7L, "Me_2", 5.0D), | ||
Map.of("Id_1", "A", "Id_2", "YY", "Year", 1994L, "Me_1", 6L, "Me_2", 8.0D), | ||
Map.of("Id_1", "A", "Id_2", "YY", "Year", 1995L, "Me_1", 9L, "Me_2", 3.0D), | ||
Map.of("Id_1", "A", "Id_2", "YY", "Year", 1996L, "Me_1", 5L, "Me_2", 4.0D) | ||
); | ||
|
||
} | ||
|
||
@Test | ||
public void testAnFirstPartitionOrderByRangeClause() throws ScriptException { | ||
|
||
// Analytical function Test case 4 : first on window with partition, order by and range | ||
/* Input dataset | ||
+----+----+----+----+----+ | ||
|Id_1|Id_2|Year|Me_1|Me_2| | ||
+----+----+----+----+----+ | ||
| A| XX|1993| 3| 1.0| | ||
| A| XX|1994| 4| 9.0| | ||
| A| XX|1995| 7| 5.0| | ||
| A| XX|1996| 6| 8.0| | ||
| A| YY|1993| 9| 3.0| | ||
| A| YY|1994| 5| 4.0| | ||
| A| YY|1995| 10| 2.0| | ||
| A| YY|1996| 2| 7.0| | ||
+----+----+----+----+----+ | ||
* */ | ||
ScriptContext context = engine.getContext(); | ||
context.setAttribute("ds2", anCountDS2, ScriptContext.ENGINE_SCOPE); | ||
|
||
|
||
engine.eval("res := first_value ( ds2 over ( partition by Id_1 order by Year range between -1 and 1) );"); | ||
assertThat(engine.getContext().getAttribute("res")).isInstanceOf(Dataset.class); | ||
|
||
/* | ||
* | ||
+----+----+----+----+----+----------+----------+ | ||
|Id_1|Id_2|Year|Me_1|Me_2|first_Me_1|first_Me_2| | ||
+----+----+----+----+----+----------+----------+ | ||
| A| XX|1993| 3| 1.0| 3| 1.0| | ||
| A| YY|1993| 9| 3.0| 3| 1.0| | ||
| A| XX|1994| 4| 9.0| 3| 1.0| | ||
| A| YY|1994| 5| 4.0| 3| 1.0| | ||
| A| XX|1995| 7| 5.0| 4| 9.0| | ||
| A| YY|1995| 10| 2.0| 4| 9.0| | ||
| A| XX|1996| 6| 8.0| 7| 5.0| | ||
| A| YY|1996| 2| 7.0| 7| 5.0| | ||
+----+----+----+----+----+----------+----------+ | ||
* */ | ||
assertThat(((Dataset) engine.getContext().getAttribute("res")).getDataAsMap()).containsExactly( | ||
Map.of("Id_1", "A", "Id_2", "XX", "Year", 1993L, "Me_1", 3L, "Me_2", 1.0D), | ||
Map.of("Id_1", "A", "Id_2", "XX", "Year", 1994L, "Me_1", 3L, "Me_2", 1.0D), | ||
Map.of("Id_1", "A", "Id_2", "XX", "Year", 1995L, "Me_1", 3L, "Me_2", 1.0D), | ||
Map.of("Id_1", "A", "Id_2", "XX", "Year", 1996L, "Me_1", 3L, "Me_2", 1.0D), | ||
Map.of("Id_1", "A", "Id_2", "YY", "Year", 1993L, "Me_1", 4L, "Me_2", 9.0D), | ||
Map.of("Id_1", "A", "Id_2", "YY", "Year", 1994L, "Me_1", 4L, "Me_2", 9.0D), | ||
Map.of("Id_1", "A", "Id_2", "YY", "Year", 1995L, "Me_1", 7L, "Me_2", 5.0D), | ||
Map.of("Id_1", "A", "Id_2", "YY", "Year", 1996L, "Me_1", 7L, "Me_2", 5.0D) | ||
); | ||
|
||
} | ||
|
||
} |
136 changes: 136 additions & 0 deletions
136
vtl-spark/src/test/java/fr/insee/vtl/spark/processing.engine/AnalyticLagTest.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,136 @@ | ||
package fr.insee.vtl.spark.processing.engine; | ||
|
||
import fr.insee.vtl.model.Dataset; | ||
import org.junit.jupiter.api.Test; | ||
|
||
import javax.script.ScriptContext; | ||
import javax.script.ScriptException; | ||
import java.util.ArrayList; | ||
import java.util.List; | ||
import java.util.Map; | ||
|
||
import static org.assertj.core.api.Assertions.assertThat; | ||
|
||
public class AnalyticLagTest extends AnalyticTest { | ||
|
||
@Test | ||
public void testAnLagWithCalcClause() throws ScriptException { | ||
|
||
// Analytical function Test case 1 : lead on window with partition, order by and range | ||
/* Input dataset | ||
+----+----+----+----+----+ | ||
|Id_1|Id_2|Year|Me_1|Me_2| | ||
+----+----+----+----+----+ | ||
| A| XX|1993| 3| 1.0| | ||
| A| XX|1994| 4| 9.0| | ||
| A| XX|1995| 7| 5.0| | ||
| A| XX|1996| 6| 8.0| | ||
| A| YY|1993| 9| 3.0| | ||
| A| YY|1994| 5| 4.0| | ||
| A| YY|1995| 10| 2.0| | ||
| A| YY|1996| 2| 7.0| | ||
+----+----+----+----+----+ | ||
* */ | ||
ScriptContext context = engine.getContext(); | ||
context.setAttribute("ds2", anCountDS2, ScriptContext.ENGINE_SCOPE); | ||
engine.eval("res := ds2 [ calc lag_Me_1 := lag ( Me_1 , 1 over ( partition by Id_1 , Id_2 order by Year ) )] ;"); | ||
assertThat(engine.getContext().getAttribute("res")).isInstanceOf(Dataset.class); | ||
|
||
/* | ||
* | ||
+----+----+----+----+----+--------+ | ||
|Id_1|Id_2|Year|Me_1|Me_2|lag_Me_1| | ||
+----+----+----+----+----+--------+ | ||
| A| XX|1993| 3| 1.0| null| | ||
| A| XX|1994| 4| 9.0| 3| | ||
| A| XX|1995| 7| 5.0| 4| | ||
| A| XX|1996| 6| 8.0| 7| | ||
| A| YY|1993| 9| 3.0| null| | ||
| A| YY|1994| 5| 4.0| 9| | ||
| A| YY|1995| 10| 2.0| 5| | ||
| A| YY|1996| 2| 7.0| 10| | ||
+----+----+----+----+----+--------+ | ||
* */ | ||
|
||
List<Map<String, Object>> actualWithNull = ((Dataset) engine.getContext().getAttribute("res")).getDataAsMap(); | ||
|
||
List<Map<String, Object>> actual = new ArrayList<>(); | ||
for (Map<String, Object> map : actualWithNull) { | ||
actual.add(replaceNullValues(map, DEFAULT_NULL_STR)); | ||
} | ||
assertThat(actual).containsExactly( | ||
Map.of("Id_1", "A", "Id_2", "XX", "Year", 1993L, "Me_1", 3L, "Me_2", 1.0D, "lag_Me_1", "null"), | ||
Map.of("Id_1", "A", "Id_2", "XX", "Year", 1994L, "Me_1", 4L, "Me_2", 9.0D, "lag_Me_1", 3L), | ||
Map.of("Id_1", "A", "Id_2", "XX", "Year", 1995L, "Me_1", 7L, "Me_2", 5.0D, "lag_Me_1", 4L), | ||
Map.of("Id_1", "A", "Id_2", "XX", "Year", 1996L, "Me_1", 6L, "Me_2", 8.0D, "lag_Me_1", 7L), | ||
Map.of("Id_1", "A", "Id_2", "YY", "Year", 1993L, "Me_1", 9L, "Me_2", 3.0D, "lag_Me_1", "null"), | ||
Map.of("Id_1", "A", "Id_2", "YY", "Year", 1994L, "Me_1", 5L, "Me_2", 4.0D, "lag_Me_1", 9L), | ||
Map.of("Id_1", "A", "Id_2", "YY", "Year", 1995L, "Me_1", 10L, "Me_2", 2.0D, "lag_Me_1", 5L), | ||
Map.of("Id_1", "A", "Id_2", "YY", "Year", 1996L, "Me_1", 2L, "Me_2", 7.0D, "lag_Me_1", 10L) | ||
); | ||
|
||
} | ||
|
||
/* | ||
* Test case for analytic function lag | ||
* The lag function take two argument: | ||
* - input dataframe | ||
* - step | ||
* Analytic clause restriction: | ||
* - Must have orderClause | ||
* - The windowClause such as data points and range are not allowed | ||
* */ | ||
@Test | ||
public void testAnLag() throws ScriptException { | ||
|
||
// Analytical function Test case 1 : lag on window with partition, order by and range | ||
/* Input dataset | ||
+----+----+----+----+----+ | ||
|Id_1|Id_2|Year|Me_1|Me_2| | ||
+----+----+----+----+----+ | ||
| A| XX|1993| 3| 1.0| | ||
| A| XX|1994| 4| 9.0| | ||
| A| XX|1995| 7| 5.0| | ||
| A| XX|1996| 6| 8.0| | ||
| A| YY|1993| 9| 3.0| | ||
| A| YY|1994| 5| 4.0| | ||
| A| YY|1995| 10| 2.0| | ||
| A| YY|1996| 2| 7.0| | ||
+----+----+----+----+----+ | ||
* */ | ||
ScriptContext context = engine.getContext(); | ||
context.setAttribute("ds2", anCountDS2, ScriptContext.ENGINE_SCOPE); | ||
|
||
|
||
engine.eval("res := lag ( ds2 , 1 over ( partition by Id_1 , Id_2 order by Year ) );"); | ||
assertThat(engine.getContext().getAttribute("res")).isInstanceOf(Dataset.class); | ||
|
||
/* | ||
* | ||
+----+----+----+----+----+---------+---------+ | ||
|Id_1|Id_2|Year|Me_1|Me_2|lead_Me_1|lead_Me_2| | ||
+----+----+----+----+----+---------+---------+ | ||
| A| XX|1993| 3| 1.0| null| null| | ||
| A| XX|1994| 4| 9.0| 3| 1.0| | ||
| A| XX|1995| 7| 5.0| 4| 9.0| | ||
| A| XX|1996| 6| 8.0| 7| 5.0| | ||
| A| YY|1993| 9| 3.0| null| null| | ||
| A| YY|1994| 5| 4.0| 9| 3.0| | ||
| A| YY|1995| 10| 2.0| 5| 4.0| | ||
| A| YY|1996| 2| 7.0| 10| 2.0| | ||
+----+----+----+----+----+---------+---------+ | ||
* */ | ||
assertThat(((Dataset) engine.getContext().getAttribute("res")).getDataAsMap()).containsExactly( | ||
Map.of("Id_1", "A", "Id_2", "XX", "Year", 1993L, "Me_1", null, "Me_2", null), | ||
Map.of("Id_1", "A", "Id_2", "XX", "Year", 1994L, "Me_1", 3L, "Me_2", 1.0D), | ||
Map.of("Id_1", "A", "Id_2", "XX", "Year", 1995L, "Me_1", 4L, "Me_2", 9.0D), | ||
Map.of("Id_1", "A", "Id_2", "XX", "Year", 1996L, "Me_1", 7L, "Me_2", 5.0D), | ||
Map.of("Id_1", "A", "Id_2", "YY", "Year", 1993L, "Me_1", null, "Me_2", null), | ||
Map.of("Id_1", "A", "Id_2", "YY", "Year", 1994L, "Me_1", 9L, "Me_2", 3.0D), | ||
Map.of("Id_1", "A", "Id_2", "YY", "Year", 1995L, "Me_1", 5L, "Me_2", 4.0D), | ||
Map.of("Id_1", "A", "Id_2", "YY", "Year", 1996L, "Me_1", 10L, "Me_2", 2.0D) | ||
); | ||
|
||
} | ||
|
||
} |
Oops, something went wrong.