Skip to content

Commit

Permalink
Merge branch 'develop' into feature/gt-3.0
Browse files Browse the repository at this point in the history
* develop:
  PR feedback.
  PR feedback. Regression fix. Release notes update.
  Added forced truncation of WKT types in Markdown/HTML rendering.
  Ensure default tile size is applied to `raster` reader.
  Fix nodata doc
  Doc supervised, set tile size to 256 for visual
  Update doc to use rf_local_is_in when masking; fix locationtech#351
  Close locationtech#310 move reference to static
  rf_local_is_in python implementation
  Updated intro section. Added additional raster-read section.
  Applying pre-partitioning to DataSources.
  Expanded RasterRefSpec to ensure lazy tiles provide metadata without I/O.
  Fix unit tests for rf_local_is_in
  Attempting to keep TravisCI from timing out by using jobs.
  Add rf_local_is_in function
  • Loading branch information
metasim committed Nov 8, 2019
2 parents 2341ad6 + ade36ab commit 58ec89a
Show file tree
Hide file tree
Showing 26 changed files with 343 additions and 228 deletions.
14 changes: 6 additions & 8 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
sudo: false
dist: xenial
language: python

Expand Down Expand Up @@ -28,11 +27,10 @@ install:
- pip install rasterio shapely pandas numpy pweave
- wget -O - https://piccolo.link/sbt-1.2.8.tgz | tar xzf -

script:
- sbt/bin/sbt -java-home $JAVA_HOME -batch test
- sbt/bin/sbt -java-home $JAVA_HOME -batch it:test
# - sbt -Dfile.encoding=UTF8 clean coverage test coverageReport
# Tricks to avoid unnecessary cache updates
- find $HOME/.sbt -name "*.lock" | xargs rm
- find $HOME/.ivy2 -name "ivydata-*.properties" | xargs rm

jobs:
include:
- stage: "Unit Tests"
script: sbt/bin/sbt -java-home $JAVA_HOME -batch test
- stage: "Integration Tests"
script: sbt/bin/sbt -java-home $JAVA_HOME -batch it:test
Original file line number Diff line number Diff line change
Expand Up @@ -405,6 +405,9 @@ trait RasterFunctions {
/** Cellwise inequality comparison between a tile and a scalar. */
def rf_local_unequal[T: Numeric](tileCol: Column, value: T): Column = Unequal(tileCol, value)

/** Test if each cell value is in provided array */
def rf_local_is_in(tileCol: Column, arrayCol: Column) = IsIn(tileCol, arrayCol)

/** Return a tile with ones where the input is NoData, otherwise zero */
def rf_local_no_data(tileCol: Column): Column = Undefined(tileCol)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ object TileRasterizerAggregate {
}
}

// Scan table and constuct what the TileLayerMetadata would be in the specified destination CRS.
// Scan table and construct what the TileLayerMetadata would be in the specified destination CRS.
val tlm: TileLayerMetadata[SpatialKey] = df
.select(
ProjectedLayerMetadataAggregate(
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
/*
* This software is licensed under the Apache 2 license, quoted below.
*
* Copyright 2019 Astraea, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* [http://www.apache.org/licenses/LICENSE-2.0]
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*
* SPDX-License-Identifier: Apache-2.0
*
*/

package org.locationtech.rasterframes.expressions.localops

import geotrellis.raster.Tile
import geotrellis.raster.mapalgebra.local.IfCell
import org.apache.spark.sql.Column
import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{TypeCheckFailure, TypeCheckSuccess}
import org.apache.spark.sql.types.{ArrayType, DataType}
import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
import org.apache.spark.sql.catalyst.expressions.{BinaryExpression, Expression, ExpressionDescription}
import org.apache.spark.sql.catalyst.util.ArrayData
import org.apache.spark.sql.rf.TileUDT
import org.locationtech.rasterframes.encoders.CatalystSerializer._
import org.locationtech.rasterframes.expressions.DynamicExtractors._
import org.locationtech.rasterframes.expressions._

@ExpressionDescription(
usage = "_FUNC_(tile, rhs) - In each cell of `tile`, return true if the value is in rhs.",
arguments = """
Arguments:
* tile - tile column to apply abs
* rhs - array to test against
""",
examples = """
Examples:
> SELECT _FUNC_(tile, array(lit(33), lit(66), lit(99)));
..."""
)
case class IsIn(left: Expression, right: Expression) extends BinaryExpression with CodegenFallback {
override val nodeName: String = "rf_local_is_in"

override def dataType: DataType = left.dataType

@transient private lazy val elementType: DataType = right.dataType.asInstanceOf[ArrayType].elementType

override def checkInputDataTypes(): TypeCheckResult =
if(!tileExtractor.isDefinedAt(left.dataType)) {
TypeCheckFailure(s"Input type '${left.dataType}' does not conform to a raster type.")
} else right.dataType match {
case _: ArrayType TypeCheckSuccess
case _ TypeCheckFailure(s"Input type '${right.dataType}' does not conform to ArrayType.")
}

override protected def nullSafeEval(input1: Any, input2: Any): Any = {
implicit val tileSer = TileUDT.tileSerializer
val (childTile, childCtx) = tileExtractor(left.dataType)(row(input1))

val arr = input2.asInstanceOf[ArrayData].toArray[AnyRef](elementType)

childCtx match {
case Some(ctx) => ctx.toProjectRasterTile(op(childTile, arr)).toInternalRow
case None => op(childTile, arr).toInternalRow
}

}

protected def op(left: Tile, right: IndexedSeq[AnyRef]): Tile = {
def fn(i: Int): Boolean = right.contains(i)
IfCell(left, fn(_), 1, 0)
}

}

object IsIn {
def apply(left: Column, right: Column): Column =
new Column(IsIn(left.expr, right.expr))
}
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ package object expressions {
registry.registerExpression[GreaterEqual]("rf_local_greater_equal")
registry.registerExpression[Equal]("rf_local_equal")
registry.registerExpression[Unequal]("rf_local_unequal")
registry.registerExpression[IsIn]("rf_local_is_in")
registry.registerExpression[Undefined]("rf_local_no_data")
registry.registerExpression[Defined]("rf_local_data")
registry.registerExpression[Sum]("rf_tile_sum")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,14 @@ package org.locationtech.rasterframes.util
import geotrellis.raster.render.ColorRamps
import org.apache.spark.sql.Dataset
import org.apache.spark.sql.functions.{base64, concat, concat_ws, length, lit, substring, when}
import org.apache.spark.sql.jts.JTSTypes
import org.apache.spark.sql.types.{StringType, StructField}
import org.locationtech.rasterframes.expressions.DynamicExtractors
import org.locationtech.rasterframes.{rfConfig, rf_render_png, rf_resample}
import org.apache.spark.sql.rf.WithTypeConformity

/**
* DataFrame extensiosn for rendering sample content in a number of ways
* DataFrame extension for rendering sample content in a number of ways
*/
trait DataFrameRenderers {
private val truncateWidth = rfConfig.getInt("max-truncate-row-element-length")
Expand All @@ -47,8 +49,9 @@ trait DataFrameRenderers {
lit("\"></img>")
)
else {
val isGeom = WithTypeConformity(c.dataType).conformsTo(JTSTypes.GeometryTypeInstance)
val str = resolved.cast(StringType)
if (truncate)
if (truncate || isGeom)
when(length(str) > lit(truncateWidth),
concat(substring(str, 1, truncateWidth), lit("..."))
)
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ import scala.xml.parsing.XhtmlParser
class ExtensionMethodSpec extends TestEnvironment with TestData with SubdivideSupport {
lazy val rf = sampleTileLayerRDD.toLayer

describe("DataFrame exention methods") {
describe("DataFrame extension methods") {
it("should maintain original type") {
val df = rf.withPrefixedColumnNames("_foo_")
"val rf2: RasterFrameLayer = df" should compile
Expand All @@ -48,7 +48,7 @@ class ExtensionMethodSpec extends TestEnvironment with TestData with SubdivideSu
"val Some(col) = df.spatialKeyColumn" should compile
}
}
describe("RasterFrameLayer exention methods") {
describe("RasterFrameLayer extension methods") {
it("should provide spatial key column") {
noException should be thrownBy {
rf.spatialKeyColumn
Expand Down Expand Up @@ -123,6 +123,10 @@ class ExtensionMethodSpec extends TestEnvironment with TestData with SubdivideSu

val md3 = rf.toMarkdown(truncate=true, renderTiles = false)
md3 shouldNot include("<img")

// Should truncate JTS types even when we don't ask for it.
val md4 = rf.withGeometry().select("geometry").toMarkdown(truncate = false)
md4 should include ("...")
}

it("should render HTML") {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -974,4 +974,28 @@ class RasterFunctionsSpec extends TestEnvironment with RasterMatchers {
val dResult = df.select($"ld").as[Tile].first()
dResult should be (randNDPRT.localDefined())
}

it("should check values isin"){
checkDocs("rf_local_is_in")

// tile is 3 by 3 with values, 1 to 9
val df = Seq(byteArrayTile).toDF("t")
.withColumn("one", lit(1))
.withColumn("five", lit(5))
.withColumn("ten", lit(10))
.withColumn("in_expect_2", rf_local_is_in($"t", array($"one", $"five")))
.withColumn("in_expect_1", rf_local_is_in($"t", array($"ten", $"five")))
.withColumn("in_expect_0", rf_local_is_in($"t", array($"ten")))

val e2Result = df.select(rf_tile_sum($"in_expect_2")).as[Double].first()
e2Result should be (2.0)

val e1Result = df.select(rf_tile_sum($"in_expect_1")).as[Double].first()
e1Result should be (1.0)

val e0Result = df.select($"in_expect_0").as[Tile].first()
e0Result.toArray() should contain only (0)

// lazy val invalid = df.select(rf_local_is_in($"t", lit("foobar"))).as[Tile].first()
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -253,14 +253,18 @@ class RasterRefSpec extends TestEnvironment with TestData {
}
}

it("should construct a RasterRefTile without I/O") {
it("should construct and inspect a RasterRefTile without I/O") {
new Fixture {
// SimpleRasterInfo is a proxy for header data requests.
val start = SimpleRasterInfo.cacheStats.hitCount()
val startStats = SimpleRasterInfo.cacheStats
val t: ProjectedRasterTile = RasterRefTile(subRaster)
val result = Seq(t, subRaster.tile).toDF("tile").first()
val end = SimpleRasterInfo.cacheStats.hitCount()
end should be(start)
val df = Seq(t, subRaster.tile).toDF("tile")
val result = df.first()
SimpleRasterInfo.cacheStats.hitCount() should be(startStats.hitCount())
SimpleRasterInfo.cacheStats.missCount() should be(startStats.missCount())
val info = df.select(rf_dimensions($"tile"), rf_extent($"tile")).first()
SimpleRasterInfo.cacheStats.hitCount() should be(startStats.hitCount() + 2)
SimpleRasterInfo.cacheStats.missCount() should be(startStats.missCount())
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ class GeoTiffDataSource

def shortName() = GeoTiffDataSource.SHORT_NAME

/** Read single geotiff as a relation. */
def createRelation(sqlContext: SQLContext, parameters: Map[String, String]) = {
require(parameters.path.isDefined, "Valid URI 'path' parameter required.")
sqlContext.withRasterFrames
Expand All @@ -57,6 +58,7 @@ class GeoTiffDataSource
GeoTiffRelation(sqlContext, p)
}

/** Write dataframe containing bands into a single geotiff. Note: performs a driver collect, and is not "big data" friendly. */
override def createRelation(sqlContext: SQLContext, mode: SaveMode, parameters: Map[String, String], df: DataFrame): BaseRelation = {
require(parameters.path.isDefined, "Valid URI 'path' parameter required.")
val path = parameters.path.get
Expand All @@ -67,8 +69,6 @@ class GeoTiffDataSource

require(tileCols.nonEmpty, "Could not find any tile columns.")



val destCRS = parameters.crs.orElse(df.asLayerSafely.map(_.crs)).getOrElse(
throw new IllegalArgumentException("A destination CRS must be provided")
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ class RasterSourceDataSource extends DataSourceRegister with RelationProvider {
override def shortName(): String = SHORT_NAME
override def createRelation(sqlContext: SQLContext, parameters: Map[String, String]): BaseRelation = {
val bands = parameters.bandIndexes
val tiling = parameters.tileDims
val tiling = parameters.tileDims.orElse(Some(NOMINAL_TILE_DIMS))
val lazyTiles = parameters.lazyTiles
val spec = parameters.pathSpec
val catRef = spec.fold(_.registerAsTable(sqlContext), identity)
Expand Down
Loading

0 comments on commit 58ec89a

Please sign in to comment.