Skip to content

Commit

Permalink
Tired of scrolling
Browse files Browse the repository at this point in the history
  • Loading branch information
Hadrien Kohl committed Dec 14, 2023
1 parent c923a20 commit d41c0b3
Show file tree
Hide file tree
Showing 17 changed files with 4,461 additions and 3,971 deletions.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,228 @@
package fr.insee.vtl.spark.processing.engine;

import fr.insee.vtl.model.Dataset;
import org.junit.jupiter.api.Test;

import javax.script.ScriptContext;
import javax.script.ScriptException;
import java.util.Map;

import static org.assertj.core.api.Assertions.assertThat;

public class AnalyticFirstTest extends AnalyticTest {

@Test
public void testAnFirstWithPartitionClause() throws ScriptException {

// Analytical function Test case 1 : first on window with partition
/* Input dataset
+----+----+----+----+----+
|Id_1|Id_2|Year|Me_1|Me_2|
+----+----+----+----+----+
| A| XX|1993| 3| 1.0|
| A| XX|1994| 4| 9.0|
| A| XX|1995| 7| 5.0|
| A| XX|1996| 6| 8.0|
| A| YY|1993| 9| 3.0|
| A| YY|1994| 5| 4.0|
| A| YY|1995| 10| 2.0|
| A| YY|1996| 2| 7.0|
+----+----+----+----+----+
* */
ScriptContext context = engine.getContext();
context.setAttribute("ds2", anCountDS2, ScriptContext.ENGINE_SCOPE);


engine.eval("res := first_value ( ds2 over ( partition by Id_1, Id_2) );");
assertThat(engine.getContext().getAttribute("res")).isInstanceOf(Dataset.class);

/*
*
+----+----+----+----+----+----------+----------+
|Id_1|Id_2|Year|Me_1|Me_2|first_Me_1|first_Me_2|
+----+----+----+----+----+----------+----------+
| A| XX|1993| 3| 1.0| 3| 1.0|
| A| XX|1994| 4| 9.0| 3| 1.0|
| A| XX|1995| 7| 5.0| 3| 1.0|
| A| XX|1996| 6| 8.0| 3| 1.0|
| A| YY|1993| 9| 3.0| 9| 3.0|
| A| YY|1994| 5| 4.0| 9| 3.0|
| A| YY|1995| 10| 2.0| 9| 3.0|
| A| YY|1996| 2| 7.0| 9| 3.0|
+----+----+----+----+----+----------+----------+
* */
assertThat(((Dataset) engine.getContext().getAttribute("res")).getDataAsMap()).containsExactly(
Map.of("Id_1", "A", "Id_2", "XX", "Year", 1993L, "Me_1", 3L, "Me_2", 1.0D),
Map.of("Id_1", "A", "Id_2", "XX", "Year", 1994L, "Me_1", 3L, "Me_2", 1.0D),
Map.of("Id_1", "A", "Id_2", "XX", "Year", 1995L, "Me_1", 3L, "Me_2", 1.0D),
Map.of("Id_1", "A", "Id_2", "XX", "Year", 1996L, "Me_1", 3L, "Me_2", 1.0D),
Map.of("Id_1", "A", "Id_2", "YY", "Year", 1993L, "Me_1", 9L, "Me_2", 3.0D),
Map.of("Id_1", "A", "Id_2", "YY", "Year", 1994L, "Me_1", 9L, "Me_2", 3.0D),
Map.of("Id_1", "A", "Id_2", "YY", "Year", 1995L, "Me_1", 9L, "Me_2", 3.0D),
Map.of("Id_1", "A", "Id_2", "YY", "Year", 1996L, "Me_1", 9L, "Me_2", 3.0D)
);

}

@Test
public void testAnFirstPartitionOrderByDesc() throws ScriptException {

// Analytical function Test case 2 : first on window with partition and desc order
/* Input dataset
+----+----+----+----+----+
|Id_1|Id_2|Year|Me_1|Me_2|
+----+----+----+----+----+
| A| XX|1993| 3| 1.0|
| A| XX|1994| 4| 9.0|
| A| XX|1995| 7| 5.0|
| A| XX|1996| 6| 8.0|
| A| YY|1993| 9| 3.0|
| A| YY|1994| 5| 4.0|
| A| YY|1995| 10| 2.0|
| A| YY|1996| 2| 7.0|
+----+----+----+----+----+
* */
ScriptContext context = engine.getContext();
context.setAttribute("ds2", anCountDS2, ScriptContext.ENGINE_SCOPE);


engine.eval("res := first_value ( ds2 over ( partition by Id_1, Id_2 order by Year desc) );");
assertThat(engine.getContext().getAttribute("res")).isInstanceOf(Dataset.class);

/*
*
+----+----+----+----+----+----------+----------+
|Id_1|Id_2|Year|Me_1|Me_2|first_Me_1|first_Me_2|
+----+----+----+----+----+----------+----------+
| A| XX|1996| 6| 8.0| 6| 8.0|
| A| XX|1995| 7| 5.0| 6| 8.0|
| A| XX|1994| 4| 9.0| 6| 8.0|
| A| XX|1993| 3| 1.0| 6| 8.0|
| A| YY|1996| 2| 7.0| 2| 7.0|
| A| YY|1995| 10| 2.0| 2| 7.0|
| A| YY|1994| 5| 4.0| 2| 7.0|
| A| YY|1993| 9| 3.0| 2| 7.0|
+----+----+----+----+----+----------+----------+
* */
assertThat(((Dataset) engine.getContext().getAttribute("res")).getDataAsMap()).containsExactly(
Map.of("Id_1", "A", "Id_2", "XX", "Year", 1996L, "Me_1", 6L, "Me_2", 8.0D),
Map.of("Id_1", "A", "Id_2", "XX", "Year", 1995L, "Me_1", 6L, "Me_2", 8.0D),
Map.of("Id_1", "A", "Id_2", "XX", "Year", 1994L, "Me_1", 6L, "Me_2", 8.0D),
Map.of("Id_1", "A", "Id_2", "XX", "Year", 1993L, "Me_1", 6L, "Me_2", 8.0D),
Map.of("Id_1", "A", "Id_2", "YY", "Year", 1996L, "Me_1", 2L, "Me_2", 7.0D),
Map.of("Id_1", "A", "Id_2", "YY", "Year", 1995L, "Me_1", 2L, "Me_2", 7.0D),
Map.of("Id_1", "A", "Id_2", "YY", "Year", 1994L, "Me_1", 2L, "Me_2", 7.0D),
Map.of("Id_1", "A", "Id_2", "YY", "Year", 1993L, "Me_1", 2L, "Me_2", 7.0D)
);

}

@Test
public void testAnFirstWithPartitionOrderByDPClause() throws ScriptException {

// Analytical function Test case 3 : first on window with partition, order by and data points
/* Input dataset
+----+----+----+----+----+
|Id_1|Id_2|Year|Me_1|Me_2|
+----+----+----+----+----+
| A| XX|1993| 3| 1.0|
| A| XX|1994| 4| 9.0|
| A| XX|1995| 7| 5.0|
| A| XX|1996| 6| 8.0|
| A| YY|1993| 9| 3.0|
| A| YY|1994| 5| 4.0|
| A| YY|1995| 10| 2.0|
| A| YY|1996| 2| 7.0|
+----+----+----+----+----+
* */
ScriptContext context = engine.getContext();
context.setAttribute("ds2", anCountDS2, ScriptContext.ENGINE_SCOPE);


engine.eval("res := first_value ( ds2 over ( partition by Id_1 order by Id_2 data points between 2 preceding and 2 following) );");
assertThat(engine.getContext().getAttribute("res")).isInstanceOf(Dataset.class);

/*
*
+----+----+----+----+----+----------+----------+
|Id_1|Id_2|Year|Me_1|Me_2|first_Me_1|first_Me_2|
+----+----+----+----+----+----------+----------+
| A| XX|1993| 3| 1.0| 3| 1.0|
| A| XX|1994| 4| 9.0| 3| 1.0|
| A| XX|1995| 7| 5.0| 3| 1.0|
| A| XX|1996| 6| 8.0| 4| 9.0|
| A| YY|1993| 9| 3.0| 7| 5.0|
| A| YY|1994| 5| 4.0| 6| 8.0|
| A| YY|1995| 10| 2.0| 9| 3.0|
| A| YY|1996| 2| 7.0| 5| 4.0|
+----+----+----+----+----+----------+----------+
* */
assertThat(((Dataset) engine.getContext().getAttribute("res")).getDataAsMap()).containsExactly(
Map.of("Id_1", "A", "Id_2", "XX", "Year", 1993L, "Me_1", 3L, "Me_2", 1.0D),
Map.of("Id_1", "A", "Id_2", "XX", "Year", 1994L, "Me_1", 3L, "Me_2", 1.0D),
Map.of("Id_1", "A", "Id_2", "XX", "Year", 1995L, "Me_1", 3L, "Me_2", 1.0D),
Map.of("Id_1", "A", "Id_2", "XX", "Year", 1996L, "Me_1", 4L, "Me_2", 9.0D),
Map.of("Id_1", "A", "Id_2", "YY", "Year", 1993L, "Me_1", 7L, "Me_2", 5.0D),
Map.of("Id_1", "A", "Id_2", "YY", "Year", 1994L, "Me_1", 6L, "Me_2", 8.0D),
Map.of("Id_1", "A", "Id_2", "YY", "Year", 1995L, "Me_1", 9L, "Me_2", 3.0D),
Map.of("Id_1", "A", "Id_2", "YY", "Year", 1996L, "Me_1", 5L, "Me_2", 4.0D)
);

}

@Test
public void testAnFirstPartitionOrderByRangeClause() throws ScriptException {

// Analytical function Test case 4 : first on window with partition, order by and range
/* Input dataset
+----+----+----+----+----+
|Id_1|Id_2|Year|Me_1|Me_2|
+----+----+----+----+----+
| A| XX|1993| 3| 1.0|
| A| XX|1994| 4| 9.0|
| A| XX|1995| 7| 5.0|
| A| XX|1996| 6| 8.0|
| A| YY|1993| 9| 3.0|
| A| YY|1994| 5| 4.0|
| A| YY|1995| 10| 2.0|
| A| YY|1996| 2| 7.0|
+----+----+----+----+----+
* */
ScriptContext context = engine.getContext();
context.setAttribute("ds2", anCountDS2, ScriptContext.ENGINE_SCOPE);


engine.eval("res := first_value ( ds2 over ( partition by Id_1 order by Year range between -1 and 1) );");
assertThat(engine.getContext().getAttribute("res")).isInstanceOf(Dataset.class);

/*
*
+----+----+----+----+----+----------+----------+
|Id_1|Id_2|Year|Me_1|Me_2|first_Me_1|first_Me_2|
+----+----+----+----+----+----------+----------+
| A| XX|1993| 3| 1.0| 3| 1.0|
| A| YY|1993| 9| 3.0| 3| 1.0|
| A| XX|1994| 4| 9.0| 3| 1.0|
| A| YY|1994| 5| 4.0| 3| 1.0|
| A| XX|1995| 7| 5.0| 4| 9.0|
| A| YY|1995| 10| 2.0| 4| 9.0|
| A| XX|1996| 6| 8.0| 7| 5.0|
| A| YY|1996| 2| 7.0| 7| 5.0|
+----+----+----+----+----+----------+----------+
* */
assertThat(((Dataset) engine.getContext().getAttribute("res")).getDataAsMap()).containsExactly(
Map.of("Id_1", "A", "Id_2", "XX", "Year", 1993L, "Me_1", 3L, "Me_2", 1.0D),
Map.of("Id_1", "A", "Id_2", "XX", "Year", 1994L, "Me_1", 3L, "Me_2", 1.0D),
Map.of("Id_1", "A", "Id_2", "XX", "Year", 1995L, "Me_1", 3L, "Me_2", 1.0D),
Map.of("Id_1", "A", "Id_2", "XX", "Year", 1996L, "Me_1", 3L, "Me_2", 1.0D),
Map.of("Id_1", "A", "Id_2", "YY", "Year", 1993L, "Me_1", 4L, "Me_2", 9.0D),
Map.of("Id_1", "A", "Id_2", "YY", "Year", 1994L, "Me_1", 4L, "Me_2", 9.0D),
Map.of("Id_1", "A", "Id_2", "YY", "Year", 1995L, "Me_1", 7L, "Me_2", 5.0D),
Map.of("Id_1", "A", "Id_2", "YY", "Year", 1996L, "Me_1", 7L, "Me_2", 5.0D)
);

}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
package fr.insee.vtl.spark.processing.engine;

import fr.insee.vtl.model.Dataset;
import org.junit.jupiter.api.Test;

import javax.script.ScriptContext;
import javax.script.ScriptException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;

import static org.assertj.core.api.Assertions.assertThat;

public class AnalyticLagTest extends AnalyticTest {

@Test
public void testAnLagWithCalcClause() throws ScriptException {

// Analytical function Test case 1 : lead on window with partition, order by and range
/* Input dataset
+----+----+----+----+----+
|Id_1|Id_2|Year|Me_1|Me_2|
+----+----+----+----+----+
| A| XX|1993| 3| 1.0|
| A| XX|1994| 4| 9.0|
| A| XX|1995| 7| 5.0|
| A| XX|1996| 6| 8.0|
| A| YY|1993| 9| 3.0|
| A| YY|1994| 5| 4.0|
| A| YY|1995| 10| 2.0|
| A| YY|1996| 2| 7.0|
+----+----+----+----+----+
* */
ScriptContext context = engine.getContext();
context.setAttribute("ds2", anCountDS2, ScriptContext.ENGINE_SCOPE);
engine.eval("res := ds2 [ calc lag_Me_1 := lag ( Me_1 , 1 over ( partition by Id_1 , Id_2 order by Year ) )] ;");
assertThat(engine.getContext().getAttribute("res")).isInstanceOf(Dataset.class);

/*
*
+----+----+----+----+----+--------+
|Id_1|Id_2|Year|Me_1|Me_2|lag_Me_1|
+----+----+----+----+----+--------+
| A| XX|1993| 3| 1.0| null|
| A| XX|1994| 4| 9.0| 3|
| A| XX|1995| 7| 5.0| 4|
| A| XX|1996| 6| 8.0| 7|
| A| YY|1993| 9| 3.0| null|
| A| YY|1994| 5| 4.0| 9|
| A| YY|1995| 10| 2.0| 5|
| A| YY|1996| 2| 7.0| 10|
+----+----+----+----+----+--------+
* */

List<Map<String, Object>> actualWithNull = ((Dataset) engine.getContext().getAttribute("res")).getDataAsMap();

List<Map<String, Object>> actual = new ArrayList<>();
for (Map<String, Object> map : actualWithNull) {
actual.add(replaceNullValues(map, DEFAULT_NULL_STR));
}
assertThat(actual).containsExactly(
Map.of("Id_1", "A", "Id_2", "XX", "Year", 1993L, "Me_1", 3L, "Me_2", 1.0D, "lag_Me_1", "null"),
Map.of("Id_1", "A", "Id_2", "XX", "Year", 1994L, "Me_1", 4L, "Me_2", 9.0D, "lag_Me_1", 3L),
Map.of("Id_1", "A", "Id_2", "XX", "Year", 1995L, "Me_1", 7L, "Me_2", 5.0D, "lag_Me_1", 4L),
Map.of("Id_1", "A", "Id_2", "XX", "Year", 1996L, "Me_1", 6L, "Me_2", 8.0D, "lag_Me_1", 7L),
Map.of("Id_1", "A", "Id_2", "YY", "Year", 1993L, "Me_1", 9L, "Me_2", 3.0D, "lag_Me_1", "null"),
Map.of("Id_1", "A", "Id_2", "YY", "Year", 1994L, "Me_1", 5L, "Me_2", 4.0D, "lag_Me_1", 9L),
Map.of("Id_1", "A", "Id_2", "YY", "Year", 1995L, "Me_1", 10L, "Me_2", 2.0D, "lag_Me_1", 5L),
Map.of("Id_1", "A", "Id_2", "YY", "Year", 1996L, "Me_1", 2L, "Me_2", 7.0D, "lag_Me_1", 10L)
);

}

/*
* Test case for analytic function lag
* The lag function take two argument:
* - input dataframe
* - step
* Analytic clause restriction:
* - Must have orderClause
* - The windowClause such as data points and range are not allowed
* */
@Test
public void testAnLag() throws ScriptException {

// Analytical function Test case 1 : lag on window with partition, order by and range
/* Input dataset
+----+----+----+----+----+
|Id_1|Id_2|Year|Me_1|Me_2|
+----+----+----+----+----+
| A| XX|1993| 3| 1.0|
| A| XX|1994| 4| 9.0|
| A| XX|1995| 7| 5.0|
| A| XX|1996| 6| 8.0|
| A| YY|1993| 9| 3.0|
| A| YY|1994| 5| 4.0|
| A| YY|1995| 10| 2.0|
| A| YY|1996| 2| 7.0|
+----+----+----+----+----+
* */
ScriptContext context = engine.getContext();
context.setAttribute("ds2", anCountDS2, ScriptContext.ENGINE_SCOPE);


engine.eval("res := lag ( ds2 , 1 over ( partition by Id_1 , Id_2 order by Year ) );");
assertThat(engine.getContext().getAttribute("res")).isInstanceOf(Dataset.class);

/*
*
+----+----+----+----+----+---------+---------+
|Id_1|Id_2|Year|Me_1|Me_2|lead_Me_1|lead_Me_2|
+----+----+----+----+----+---------+---------+
| A| XX|1993| 3| 1.0| null| null|
| A| XX|1994| 4| 9.0| 3| 1.0|
| A| XX|1995| 7| 5.0| 4| 9.0|
| A| XX|1996| 6| 8.0| 7| 5.0|
| A| YY|1993| 9| 3.0| null| null|
| A| YY|1994| 5| 4.0| 9| 3.0|
| A| YY|1995| 10| 2.0| 5| 4.0|
| A| YY|1996| 2| 7.0| 10| 2.0|
+----+----+----+----+----+---------+---------+
* */
assertThat(((Dataset) engine.getContext().getAttribute("res")).getDataAsMap()).containsExactly(
Map.of("Id_1", "A", "Id_2", "XX", "Year", 1993L, "Me_1", null, "Me_2", null),
Map.of("Id_1", "A", "Id_2", "XX", "Year", 1994L, "Me_1", 3L, "Me_2", 1.0D),
Map.of("Id_1", "A", "Id_2", "XX", "Year", 1995L, "Me_1", 4L, "Me_2", 9.0D),
Map.of("Id_1", "A", "Id_2", "XX", "Year", 1996L, "Me_1", 7L, "Me_2", 5.0D),
Map.of("Id_1", "A", "Id_2", "YY", "Year", 1993L, "Me_1", null, "Me_2", null),
Map.of("Id_1", "A", "Id_2", "YY", "Year", 1994L, "Me_1", 9L, "Me_2", 3.0D),
Map.of("Id_1", "A", "Id_2", "YY", "Year", 1995L, "Me_1", 5L, "Me_2", 4.0D),
Map.of("Id_1", "A", "Id_2", "YY", "Year", 1996L, "Me_1", 10L, "Me_2", 2.0D)
);

}

}
Loading

0 comments on commit d41c0b3

Please sign in to comment.