-
Notifications
You must be signed in to change notification settings - Fork 45
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add the initial shapefile datasource prototype
- Loading branch information
Showing
10 changed files
with
201 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
25 changes: 25 additions & 0 deletions
25
...c/main/scala/org/locationtech/rasterframes/datasource/shapefile/ShapeFileDataSource.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
package org.locationtech.rasterframes.datasource.shapefile | ||
|
||
import org.apache.spark.sql.connector.catalog.{Table, TableProvider} | ||
import org.apache.spark.sql.connector.expressions.Transform | ||
import org.apache.spark.sql.sources.DataSourceRegister | ||
import org.apache.spark.sql.types.StructType | ||
import org.apache.spark.sql.util.CaseInsensitiveStringMap | ||
|
||
import java.util | ||
|
||
class ShapeFileDataSource extends TableProvider with DataSourceRegister { | ||
|
||
def inferSchema(caseInsensitiveStringMap: CaseInsensitiveStringMap): StructType = | ||
getTable(null, Array.empty[Transform], caseInsensitiveStringMap.asCaseSensitiveMap()).schema() | ||
|
||
def getTable(structType: StructType, transforms: Array[Transform], map: util.Map[String, String]): Table = | ||
new ShapeFileTable() | ||
|
||
def shortName(): String = ShapeFileDataSource.SHORT_NAME | ||
} | ||
|
||
object ShapeFileDataSource { | ||
final val SHORT_NAME = "shapefile" | ||
final val URL_PARAM = "url" | ||
} |
33 changes: 33 additions & 0 deletions
33
...rc/main/scala/org/locationtech/rasterframes/datasource/shapefile/ShapeFilePartition.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
package org.locationtech.rasterframes.datasource.shapefile | ||
|
||
import org.locationtech.rasterframes.encoders.syntax._ | ||
|
||
import geotrellis.vector.Geometry | ||
import org.apache.spark.sql.catalyst.InternalRow | ||
import org.apache.spark.sql.connector.read.{InputPartition, PartitionReader, PartitionReaderFactory} | ||
import org.geotools.data.shapefile.ShapefileDataStore | ||
import org.geotools.data.simple.SimpleFeatureIterator | ||
|
||
import java.net.URL | ||
|
||
case class ShapeFilePartition(url: URL) extends InputPartition | ||
|
||
class ShapeFilePartitionReaderFactory extends PartitionReaderFactory { | ||
override def createReader(partition: InputPartition): PartitionReader[InternalRow] = partition match { | ||
case p: ShapeFilePartition => new ShapeFilePartitionReader(p) | ||
case _ => throw new UnsupportedOperationException("Partition processing is unsupported by the reader.") | ||
} | ||
} | ||
|
||
class ShapeFilePartitionReader(partition: ShapeFilePartition) extends PartitionReader[InternalRow] { | ||
import geotrellis.shapefile.ShapeFileReader._ | ||
|
||
@transient lazy val ds = new ShapefileDataStore(partition.url) | ||
@transient lazy val partitionValues: SimpleFeatureIterator = ds.getFeatureSource.getFeatures.features | ||
|
||
def next: Boolean = partitionValues.hasNext | ||
|
||
def get: InternalRow = partitionValues.next.geom[Geometry].toInternalRow | ||
|
||
def close(): Unit = { partitionValues.close(); ds.dispose() } | ||
} |
26 changes: 26 additions & 0 deletions
26
.../main/scala/org/locationtech/rasterframes/datasource/shapefile/ShapeFileScanBuilder.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
package org.locationtech.rasterframes.datasource.shapefile | ||
|
||
import org.locationtech.rasterframes.datasource.stac.api.encoders._ | ||
import org.apache.spark.sql.connector.read.{Batch, InputPartition, PartitionReaderFactory, Scan, ScanBuilder} | ||
import org.apache.spark.sql.types.StructType | ||
|
||
import java.net.URL | ||
|
||
class ShapeFileScanBuilder(url: URL) extends ScanBuilder { | ||
def build(): Scan = new ShapeFileBatchScan(url) | ||
} | ||
|
||
/** Batch Reading Support. The schema is repeated here as it can change after column pruning, etc. */ | ||
class ShapeFileBatchScan(url: URL) extends Scan with Batch { | ||
def readSchema(): StructType = geometryExpressionEncoder.schema | ||
|
||
override def toBatch: Batch = this | ||
|
||
/** | ||
* Unfortunately, we can only load everything into a single partition, due to the nature of STAC API endpoints. | ||
* To perform a distributed load, we'd need to know some internals about how the next page token is computed. | ||
* This can be a good idea for the STAC Spec extension. | ||
* */ | ||
def planInputPartitions(): Array[InputPartition] = Array(ShapeFilePartition(url)) | ||
def createReaderFactory(): PartitionReaderFactory = new ShapeFilePartitionReaderFactory() | ||
} |
33 changes: 33 additions & 0 deletions
33
...ce/src/main/scala/org/locationtech/rasterframes/datasource/shapefile/ShapeFileTable.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
package org.locationtech.rasterframes.datasource.shapefile | ||
|
||
import org.locationtech.rasterframes.datasource.stac.api.encoders._ | ||
|
||
import org.apache.spark.sql.connector.catalog.{SupportsRead, Table, TableCapability} | ||
import org.apache.spark.sql.connector.read.ScanBuilder | ||
import org.apache.spark.sql.types.StructType | ||
import org.apache.spark.sql.util.CaseInsensitiveStringMap | ||
import org.locationtech.rasterframes.datasource.shapefile.ShapeFileDataSource.URL_PARAM | ||
import org.locationtech.rasterframes.datasource.urlParam | ||
import java.net.URL | ||
|
||
import scala.collection.JavaConverters._ | ||
import java.util | ||
|
||
class ShapeFileTable extends Table with SupportsRead { | ||
import ShapeFileTable._ | ||
|
||
def name(): String = this.getClass.toString | ||
|
||
def schema(): StructType = geometryExpressionEncoder.schema | ||
|
||
def capabilities(): util.Set[TableCapability] = Set(TableCapability.BATCH_READ).asJava | ||
|
||
def newScanBuilder(options: CaseInsensitiveStringMap): ScanBuilder = | ||
new ShapeFileScanBuilder(options.url) | ||
} | ||
|
||
object ShapeFileTable { | ||
implicit class CaseInsensitiveStringMapOps(val options: CaseInsensitiveStringMap) extends AnyVal { | ||
def url: URL = urlParam(URL_PARAM, options).getOrElse(throw new IllegalArgumentException("Missing URL.")) | ||
} | ||
} |
11 changes: 11 additions & 0 deletions
11
datasource/src/main/scala/org/locationtech/rasterframes/datasource/shapefile/package.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
package org.locationtech.rasterframes.datasource | ||
|
||
import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder | ||
import org.locationtech.jts.geom.Geometry | ||
|
||
package object shapefile extends Serializable { | ||
// see org.locationtech.geomesa.spark.jts.encoders.SpatialEncoders | ||
// GeometryUDT should be registered before the encoder below is used | ||
// TODO: use TypedEncoders derived from UDT instances? | ||
@transient implicit lazy val geometryExpressionEncoder: ExpressionEncoder[Option[Geometry]] = ExpressionEncoder() | ||
} |
38 changes: 38 additions & 0 deletions
38
...st/scala/org/locationtech/rasterframes/datasource/shapefile/ShapeFileDataSourceTest.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
package org.locationtech.rasterframes.datasource.shapefile | ||
|
||
import org.locationtech.rasterframes._ | ||
|
||
import geotrellis.shapefile.ShapeFileReader | ||
import org.locationtech.jts.geom.Geometry | ||
import org.locationtech.rasterframes.TestEnvironment | ||
|
||
import java.net.URL | ||
|
||
class ShapeFileDataSourceTest extends TestEnvironment { self => | ||
import spark.implicits._ | ||
|
||
describe("ShapeFile Spark reader") { | ||
it("should read a shapefile") { | ||
val url = "https://github.com/locationtech/geotrellis/raw/master/shapefile/data/shapefiles/demographics/demographics.shp" | ||
import ShapeFileReader._ | ||
|
||
val expected = ShapeFileReader | ||
.readSimpleFeatures(new URL(url)) | ||
.map(_.geom[Geometry]) | ||
.take(2) | ||
|
||
val results = | ||
spark | ||
.read | ||
.format("shapefile") | ||
.option("url", url) | ||
.load() | ||
.limit(2) | ||
|
||
// results.printSchema() | ||
|
||
results.as[Option[Geometry]].collect() shouldBe expected | ||
} | ||
|
||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters