Skip to content

Commit

Permalink
Merge branch 'databrickslabs-main'
Browse files Browse the repository at this point in the history
  • Loading branch information
a0x8o committed Dec 22, 2023
2 parents d25c9dc + d48c233 commit 8b4b566
Show file tree
Hide file tree
Showing 191 changed files with 1,515 additions and 4,295 deletions.
20 changes: 12 additions & 8 deletions .github/actions/r_build/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,21 +10,21 @@ runs:
- name: Create download location for Spark
shell: bash
run: |
sudo mkdir -p /usr/spark-download/unzipped
sudo mkdir -p /usr/spark-download/raw
sudo chown -R $USER: /usr/spark-download/
sudo mkdir -p /usr/spark-download-${{ matrix.spark }}/unzipped
sudo mkdir -p /usr/spark-download-${{ matrix.spark }}/raw
sudo chown -R $USER: /usr/spark-download-${{ matrix.spark }}/
- name: Cache Spark download
id: cache-spark
uses: actions/cache@v3
with:
path: /usr/spark-download/unzipped
path: /usr/spark-download-${{ matrix.spark }}/unzipped
key: r_build-spark
- if: ${{ steps.cache-spark.outputs.cache-hit != 'true' }}
name: Download and unpack Spark
shell: bash
run: |
wget -P /usr/spark-download/raw https://archive.apache.org/dist/spark/spark-${{ matrix.spark }}/spark-${{ matrix.spark }}-bin-hadoop3.tgz
tar zxvf /usr/spark-download/raw/spark-${{ matrix.spark }}-bin-hadoop3.tgz -C /usr/spark-download/unzipped
wget -P /usr/spark-download-${{ matrix.spark }}/raw https://archive.apache.org/dist/spark/spark-${{ matrix.spark }}/spark-${{ matrix.spark }}-bin-hadoop3.tgz
tar zxvf /usr/spark-download-${{ matrix.spark }}/raw/spark-${{ matrix.spark }}-bin-hadoop3.tgz -C /usr/spark-download-${{ matrix.spark }}/unzipped
- name: Create R environment
shell: bash
run: |
Expand All @@ -51,24 +51,28 @@ runs:
cd R
Rscript --vanilla generate_docs.R
env:
SPARK_HOME: /usr/spark-download/unzipped/spark-${{ matrix.spark }}-bin-hadoop3
SPARK_HOME: /usr/spark-download-${{ matrix.spark }}/unzipped/spark-${{ matrix.spark }}-bin-hadoop3
- name: Build R package
shell: bash
run: |
cd R
Rscript --vanilla build_r_package.R
env:
SPARK_HOME: /usr/spark-download/unzipped/spark-${{ matrix.spark }}-bin-hadoop3
SPARK_HOME: /usr/spark-download-${{ matrix.spark }}/unzipped/spark-${{ matrix.spark }}-bin-hadoop3
- name: Test SparkR package
shell: bash
run: |
cd R/sparkR-mosaic
Rscript --vanilla tests.R
env:
SPARK_HOME: /usr/spark-download-${{ matrix.spark }}/unzipped/spark-${{ matrix.spark }}-bin-hadoop3
- name: Test sparklyr package
shell: bash
run: |
cd R/sparklyr-mosaic
Rscript --vanilla tests.R
env:
SPARK_HOME: /usr/spark-download-${{ matrix.spark }}/unzipped/spark-${{ matrix.spark }}-bin-hadoop3
- name: Copy R artifacts to GH Actions run
shell: bash
run: |
Expand Down
25 changes: 18 additions & 7 deletions .github/actions/scala_build/action.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
name: build mosaic scala
description: build mosaic scala
inputs:
inputs:
skip_tests:
description: 'Skip Scala Tests? true or false'
default: 'false'
Expand All @@ -19,12 +19,23 @@ runs:
- name: Add packaged GDAL dependencies
shell: bash
run : |
sudo apt-get update && sudo apt-get install -y unixodbc libcurl3-gnutls libsnappy-dev libopenjp2-7
pip install databricks-mosaic-gdal==${{ matrix.gdal }}
sudo tar -xf /opt/hostedtoolcache/Python/${{ matrix.python }}/x64/lib/python3.9/site-packages/databricks-mosaic-gdal/resources/gdal-${{ matrix.gdal }}-filetree.tar.xz -C /
sudo tar -xhf /opt/hostedtoolcache/Python/${{ matrix.python }}/x64/lib/python3.9/site-packages/databricks-mosaic-gdal/resources/gdal-${{ matrix.gdal }}-symlinks.tar.xz -C /
pip install numpy==${{ matrix.numpy }}
pip install gdal==${{ matrix.gdal }}
# - update apt
sudo apt-add-repository "deb http://archive.ubuntu.com/ubuntu $(lsb_release -sc)-backports main universe multiverse restricted"
sudo apt-add-repository "deb http://archive.ubuntu.com/ubuntu $(lsb_release -sc)-updates main universe multiverse restricted"
sudo apt-add-repository "deb http://archive.ubuntu.com/ubuntu $(lsb_release -sc)-security main multiverse restricted universe"
sudo apt-add-repository "deb http://archive.ubuntu.com/ubuntu $(lsb_release -sc) main multiverse restricted universe"
sudo apt-get update -y
# - install numpy first
pip install --upgrade pip
pip install 'numpy>=${{ matrix.numpy }}'
# - install natives
sudo apt-get install -y unixodbc libcurl3-gnutls libsnappy-dev libopenjp2-7
sudo apt-get install -y gdal-bin libgdal-dev python3-gdal
# - install gdal with numpy
pip install --no-cache-dir --force-reinstall 'GDAL[numpy]==${{ matrix.gdal }}'
sudo wget -P /usr/lib -nc https://github.com/databrickslabs/mosaic/raw/main/resources/gdal/jammy/libgdalalljni.so
sudo wget -P /usr/lib -nc https://github.com/databrickslabs/mosaic/raw/main/resources/gdal/jammy/libgdalalljni.so.30
#sudo wget -P /usr/lib -nc https://github.com/databrickslabs/mosaic/raw/main/resources/gdal/jammy/libgdalalljni.so.30.0.3
- name: Test and build the scala JAR - skip tests is false
if: inputs.skip_tests == 'false'
shell: bash
Expand Down
12 changes: 6 additions & 6 deletions .github/workflows/build_main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,16 @@ on:
- "**"
jobs:
build:
runs-on: ubuntu-20.04
runs-on: ubuntu-22.04
env:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
strategy:
matrix:
python: [ 3.9.5 ]
python: [ 3.10.12 ]
numpy: [ 1.21.5 ]
gdal: [ 3.4.3 ]
spark: [ 3.3.2 ]
R: [ 4.1.2 ]
gdal: [ 3.4.1 ]
spark: [ 3.4.0 ]
R: [ 4.2.2 ]
steps:
- name: checkout code
uses: actions/checkout@v2
Expand All @@ -31,4 +31,4 @@ jobs:
- name: build R
uses: ./.github/actions/r_build
- name: upload artefacts
uses: ./.github/actions/upload_artefacts
uses: ./.github/actions/upload_artefacts
14 changes: 7 additions & 7 deletions .github/workflows/build_python.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,24 +7,24 @@ on:

jobs:
build:
runs-on: ubuntu-20.04
runs-on: ubuntu-22.04
env:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
strategy:
matrix:
python: [ 3.9.5 ]
python: [ 3.10.12 ]
numpy: [ 1.21.5 ]
gdal: [ 3.4.3 ]
spark: [ 3.3.2 ]
R: [ 4.1.2 ]
gdal: [ 3.4.1 ]
spark: [ 3.4.0 ]
R: [ 4.2.2 ]
steps:
- name: checkout code
uses: actions/checkout@v2
- name: build scala with skipping tests
uses: ./.github/actions/scala_build
with:
with:
skip_tests: "true"
- name: build python
uses: ./.github/actions/python_build
- name: upload artefacts
uses: ./.github/actions/upload_artefacts
uses: ./.github/actions/upload_artefacts
14 changes: 7 additions & 7 deletions .github/workflows/build_r.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,24 +8,24 @@ on:

jobs:
build:
runs-on: ubuntu-20.04
runs-on: ubuntu-22.04
env:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
strategy:
matrix:
python: [ 3.9.5 ]
python: [ 3.10.12 ]
numpy: [ 1.21.5 ]
gdal: [ 3.4.3 ]
spark: [ 3.3.2 ]
R: [ 4.1.2 ]
gdal: [ 3.4.1 ]
spark: [ 3.4.0 ]
R: [ 4.2.2 ]
steps:
- name: checkout code
uses: actions/checkout@v2
- name: build scala with skipping tests
uses: ./.github/actions/scala_build
with:
with:
skip_tests: 'true'
- name: build r artefacts
uses: ./.github/actions/r_build
- name: upload artefacts
uses: ./.github/actions/upload_artefacts
uses: ./.github/actions/upload_artefacts
10 changes: 5 additions & 5 deletions .github/workflows/build_scala.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,16 @@ on:

jobs:
build:
runs-on: ubuntu-20.04
runs-on: ubuntu-22.04
env:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
strategy:
matrix:
python: [ 3.9.5 ]
python: [ 3.10.12 ]
numpy: [ 1.21.5 ]
gdal: [ 3.4.3 ]
spark: [ 3.3.2 ]
R: [ 4.1.2 ]
gdal: [ 3.4.1 ]
spark: [ 3.4.0 ]
R: [ 4.2.2 ]
steps:
- name: checkout code
uses: actions/checkout@v2
Expand Down
9 changes: 7 additions & 2 deletions .github/workflows/docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,15 @@ on:
- feature/new_docs
jobs:
build:
runs-on: ubuntu-latest
runs-on: ubuntu-22.04
strategy:
matrix:
python: [ 3.9.5 ]
steps:
- name: install pandoc
run: sudo apt-get install pandoc
run: |
sudo apt-get install pandoc
sudo apt-get install --reinstall python3-pkg-resources
- uses: actions/setup-python@v2
- uses: actions/checkout@v2
with:
Expand Down
10 changes: 6 additions & 4 deletions .github/workflows/pypi-release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,14 @@ on:
jobs:
build-n-publish:
name: Build project and publish to TestPyPI
runs-on: ubuntu-20.04
runs-on: ubuntu-22.04
strategy:
matrix:
python: [3.9]
spark: [3.2.1]
python: [ 3.10.12 ]
numpy: [ 1.21.5 ]
gdal: [ 3.4.1 ]
spark: [ 3.4.0 ]
R: [ 4.2.2 ]
steps:
- name: checkout code
uses: actions/checkout@v2
Expand All @@ -24,4 +27,3 @@ jobs:
user: __token__
password: ${{ secrets.LABS_PYPI_TOKEN }}
packages_dir: python/dist/

13 changes: 13 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,16 @@
## v0.3.14
- Fixes for Warning and Error messages on mosaic_enable call.
- Performance improvements for raster functions.
- Fix support for GDAL configuration via spark config (use 'spark.databricks.labs.mosaic.gdal.' prefix).

## v0.3.13
- R bindings generation fixed and improved.
- Remove usage of /vsimem/ drivers for GDAL due to memory leaks.
- Add support for MapAlgebra expressions via RST_MapAlgebra.
- Add support for custom combine python functions via RST_DerivedBand.
- Improve test coverage.
- Allow for GDAL configuration via spark config (use 'spark.databricks.labs.mosaic.gdal.' prefix).

## v0.3.12
- Make JTS default Geometry Provider
- Add raster tile functions.
Expand Down
4 changes: 2 additions & 2 deletions R/install_deps.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
options(repos = c(CRAN = "https://packagemanager.posit.co/cran/__linux__/focal/latest"))
options(repos = c(CRAN = "https://packagemanager.posit.co/cran/__linux__/jammy/latest"))

install.packages(c("pkgbuild", "testthat", "roxygen2", "sparklyr"))
install.packages(c("pkgbuild", "testthat", "roxygen2", "sparklyr"))
2 changes: 1 addition & 1 deletion R/sparkR-mosaic/sparkrMosaic/DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: sparkrMosaic
Title: SparkR bindings for Databricks Mosaic
Version: 0.3.12
Version: 0.3.14
Authors@R:
person("Robert", "Whiffin", , "robert.whiffin@databricks.com", role = c("aut", "cre")
)
Expand Down
3 changes: 1 addition & 2 deletions R/sparkR-mosaic/tests.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
library(testthat)

spark_location <- "/usr/spark-download/unzipped/spark-3.3.2-bin-hadoop3"
Sys.setenv(SPARK_HOME = spark_location)
spark_location <- Sys.getenv("SPARK_HOME")
library(SparkR, lib.loc = c(file.path(spark_location, "R", "lib")))
.libPaths(c(file.path(spark_location, "R", "lib"), .libPaths()))

Expand Down
2 changes: 1 addition & 1 deletion R/sparklyr-mosaic/sparklyrMosaic/DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: sparklyrMosaic
Title: sparklyr bindings for Databricks Mosaic
Version: 0.3.12
Version: 0.3.14
Authors@R:
person("Robert", "Whiffin", , "robert.whiffin@databricks.com", role = c("aut", "cre")
)
Expand Down
5 changes: 3 additions & 2 deletions R/sparklyr-mosaic/tests.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,9 @@ if(length(getOption("repos")) < 1) {
install.packages("sparklyr", repos="")
library(sparklyr)

spark_home_set("/usr/spark-download/unzipped/spark-3.3.2-bin-hadoop3")
install.packages("sparklyrMosaic_0.3.12.tar.gz", repos = NULL)
spark_home <- Sys.getenv("SPARK_HOME")
spark_home_set(spark_home)
install.packages("sparklyrMosaic_0.3.14.tar.gz", repos = NULL)
library(sparklyrMosaic)

# find the mosaic jar in staging
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ Mosaic was created to simplify the implementation of scalable geospatial data pi
Mosaic provides geospatial tools for
* Data ingestion (WKT, WKB, GeoJSON)
* Data processing
* Geometry and geography `ST_` operations (with default [JTS](https://github.com/locationtech/jts) or [ESRI](https://github.com/Esri/geometry-api-java))
* Geometry and geography `ST_` operations via [JTS](https://github.com/locationtech/jts)
* Indexing (with default [H3](https://github.com/uber/h3) or BNG)
* Chipping of polygons and lines over an indexing grid [co-developed with Ordnance Survey and Microsoft](https://databricks.com/blog/2021/10/11/efficient-point-in-polygon-joins-via-pyspark-and-bng-geospatial-indexing.html)
* Data visualization ([Kepler](https://github.com/keplergl/kepler.gl))
Expand Down
1 change: 1 addition & 0 deletions docs/docs-requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
setuptools==68.1.2
Sphinx==4.4.0
sphinx-material==0.0.35
nbsphinx==0.8.8
Expand Down
8 changes: 7 additions & 1 deletion docs/source/api/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,10 @@ API Documentation
spatial-functions
spatial-indexing
spatial-predicates
spatial-aggregations
<<<<<<< HEAD
spatial-aggregations
=======
spatial-aggregations
raster-functions
rasterio-udfs
>>>>>>> databrickslabs-main
Loading

0 comments on commit 8b4b566

Please sign in to comment.