Skip to content

Commit

Permalink
Create Attribute Type Rules for Interval Join, Scatter Plot, Sort Par…
Browse files Browse the repository at this point in the history
…titions, Type Casting (#2005)

This is the second PR for the attribute type checking feature. The first
one is #1924.

## Description of Attribute Type Rules
### Interval Join
`leftAttributeName` (and `rightAttributeName`) must be `integer`,
`long`, `double`, or `timestamp`.

And, `leftAttributeName` attribute must have the same type as the
`rightAttributeName`.

```JSON
{
  "attributeTypeRules": {
    "leftAttributeName": {
      "enum": ["integer", "long", "double", "timestamp"]
    },
    "rightAttributeName": {
      "const": {
        "$data": "leftAttributeName"
      }
    }
  }
}
```

Note: We intentionally put `enum` test in front of `const` test, because
we want to test whether they have the correct type. Or, if we put the
`const` test first, i.e `rightAttributeName` rule first, and if
`leftAttributeName`'s attribute type is an invalid type like `string`,
then it will prompt the user that `rightAttributeName` should have the
same attribute type as `leftAttributeName` -- `string` -- which is
incorrect since both should not be a `string` type.

### Scatter Plot

`xColumn` and `yColumn` attributes must be of `integer` or `double`
type.

```JSON
{
  "attributeTypeRules": {
    "xColumn":{
      "enum": ["integer", "double"]
    },
    "yColumn":{
      "enum": ["integer", "double"]
    }
  }
}
```

Note: it may support `long` in the future. See
#1954.

### Sort Partitions

`sortAttributeName` attribute type must be `integer`, `long`, or
`double`.

```JSON
{
  "attributeTypeRules": {
    "sortAttributeName":{
      "enum": ["integer", "long", "double"]
    }
  }
}
```

Note: May support `timestamp` in the future. See
#1954.

### Type Casting

For example, if we want to convert an attribute to `integer`, it must
have attribute type of `string`, `long`, `double`, or `boolean`. A type
should not convert to the type itself. See the schema for detail.

```JSON
{
	"attributeTypeRules": {
		"attribute": {
			"allOf": [{
					"if": {
						"resultType": {
							"valEnum": ["integer"]
						}
					},
					"then": {
						"enum": ["string", "long", "double", "boolean"]
					}
				},
				{
					"if": {
						"resultType": {
							"valEnum": ["double"]
						}
					},
					"then": {
						"enum": ["string", "integer", "long", "boolean"]
					}
				},
				{
					"if": {
						"resultType": {
							"valEnum": ["boolean"]
						}
					},
					"then": {
						"enum": ["string", "integer", "long", "double"]
					}
				},
				{
					"if": {
						"resultType": {
							"valEnum": ["long"]
						}
					},
					"then": {
						"enum": ["string", "integer", "double", "boolean", "timestamp"]
					}
				},
				{
					"if": {
						"resultType": {
							"valEnum": ["timestamp"]
						}
					},
					"then": {
						"enum": ["string", "long"]
					}
				}
			]
		}
	}
}
```

Note: The type constraint is enforced in
`core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/tuple/schema/AttributeTypeUtils.scala`.

---------

Co-authored-by: Yicong Huang <17627829+Yicong-Huang@users.noreply.github.com>
  • Loading branch information
aahei and Yicong-Huang authored Jun 24, 2023
1 parent 49addfe commit 73d2ca7
Show file tree
Hide file tree
Showing 5 changed files with 100 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ import java.sql.Timestamp
}
},
"then": {
"enum": ["integer", "long", "double"]
"enum": ["integer", "long", "double", "timestamp"]
}
},
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ package edu.uci.ics.texera.workflow.operators.intervalJoin
import com.fasterxml.jackson.annotation.{JsonProperty, JsonPropertyDescription}
import com.fasterxml.jackson.databind.annotation.JsonDeserialize
import com.google.common.base.Preconditions
import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaTitle
import com.kjetland.jackson.jsonSchema.annotations.{JsonSchemaInject, JsonSchemaTitle}
import edu.uci.ics.amber.engine.architecture.deploysemantics.layer.OpExecConfig
import edu.uci.ics.texera.workflow.common.metadata.annotations.{
AutofillAttributeName,
Expand All @@ -23,6 +23,20 @@ import edu.uci.ics.texera.workflow.common.workflow.HashPartition
* 1. The tuples in both inputs come in ascending order
* 2. The left input join key takes as points, join condition is: left key in the range of (right key, right key + constant)
*/
@JsonSchemaInject(json = """
{
"attributeTypeRules": {
"leftAttributeName": {
"enum": ["integer", "long", "double", "timestamp"]
},
"rightAttributeName": {
"const": {
"$data": "leftAttributeName"
}
}
}
}
""")
class IntervalJoinOpDesc extends OperatorDescriptor {

@JsonProperty(required = true)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ package edu.uci.ics.texera.workflow.operators.sortPartitions

import com.fasterxml.jackson.annotation.{JsonProperty, JsonPropertyDescription}
import com.google.common.base.Preconditions
import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaTitle
import com.kjetland.jackson.jsonSchema.annotations.{JsonSchemaInject, JsonSchemaTitle}
import edu.uci.ics.amber.engine.architecture.deploysemantics.layer.OpExecConfig
import edu.uci.ics.texera.workflow.common.metadata.annotations.AutofillAttributeName
import edu.uci.ics.texera.workflow.common.metadata.{
Expand All @@ -15,6 +15,15 @@ import edu.uci.ics.texera.workflow.common.operators.OperatorDescriptor
import edu.uci.ics.texera.workflow.common.tuple.schema.{OperatorSchemaInfo, Schema}
import edu.uci.ics.texera.workflow.common.workflow.RangePartition

@JsonSchemaInject(json = """
{
"attributeTypeRules": {
"sortAttributeName":{
"enum": ["integer", "long", "double"]
}
}
}
""")
class SortPartitionsOpDesc extends OperatorDescriptor {

@JsonProperty(required = true)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,72 @@

import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.annotation.JsonPropertyDescription;
import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaInject;
import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaTitle;
import edu.uci.ics.texera.workflow.common.metadata.annotations.AutofillAttributeName;
import edu.uci.ics.texera.workflow.common.tuple.schema.AttributeType;

@JsonSchemaInject(json =
"{" +
" \"attributeTypeRules\": {" +
" \"attribute\": {" +
" \"allOf\": [" +
" {" +
" \"if\": {" +
" \"resultType\": {" +
" \"valEnum\": [\"integer\"]" +
" }" +
" }," +
" \"then\": {" +
" \"enum\": [\"string\", \"long\", \"double\", \"boolean\"]" +
" }" +
" }," +
" {" +
" \"if\": {" +
" \"resultType\": {" +
" \"valEnum\": [\"double\"]" +
" }" +
" }," +
" \"then\": {" +
" \"enum\": [\"string\", \"integer\", \"long\", \"boolean\"]" +
" }" +
" }," +
" {" +
" \"if\": {" +
" \"resultType\": {" +
" \"valEnum\": [\"boolean\"]" +
" }" +
" }," +
" \"then\": {" +
" \"enum\": [\"string\", \"integer\", \"long\", \"double\"]" +
" }" +
" }," +
" {" +
" \"if\": {" +
" \"resultType\": {" +
" \"valEnum\": [\"long\"]" +
" }" +
" }," +
" \"then\": {" +
" \"enum\": [\"string\", \"integer\", \"double\", \"boolean\", \"timestamp\"]" +
" }" +
" }," +
" {" +
" \"if\": {" +
" \"resultType\": {" +
" \"valEnum\": [\"timestamp\"]" +
" }" +
" }," +
" \"then\": {" +
" \"enum\": [\"string\", \"long\"]" +
" }" +
" }" +
" " +
" ]" +
" }" +
" }" +
"}"
)
public class TypeCastingUnit {
@JsonProperty(required = true)
@JsonSchemaTitle("Attribute")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.annotation.JsonPropertyDescription;
import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaInject;
import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaTitle;
import edu.uci.ics.amber.engine.architecture.deploysemantics.layer.OpExecConfig;
import edu.uci.ics.amber.engine.architecture.deploysemantics.layer.OpExecFunc;
Expand Down Expand Up @@ -34,6 +35,17 @@
* This is the description of the operator
*/

@JsonSchemaInject(json =
"{" +
" \"attributeTypeRules\": {" +
" \"xColumn\":{" +
" \"enum\": [\"integer\", \"double\"]" +
" }," +
" \"yColumn\":{" +
" \"enum\": [\"integer\", \"double\"]" +
" }" +
" }" +
"}")
public class ScatterplotOpDesc extends VisualizationOperator {
@JsonProperty(required = true)
@JsonSchemaTitle("X-Column")
Expand Down

0 comments on commit 73d2ca7

Please sign in to comment.