Skip to content

Commit 44f9bb4

Browse files
thomas-maschlerThomas Maschlerpomadchin
authored
Spark 3.4 (#611)
* Spark 3.4, multi build CI * escape var * update frameless, remove 3.4.0 from CI for now * change block var * fix reference * don't use anchors * use spark version in Jar name * comment out tests * add poetry.lock * don't continue on error * update poetry lock * update frameless * add tests back in * change ulimit * set timeZone * support pyspark 3.2-3.4 * add env var for tests --------- Co-authored-by: Thomas Maschler <[email protected]> Co-authored-by: Grigory <[email protected]>
1 parent efc1bac commit 44f9bb4

File tree

12 files changed

+1633
-1302
lines changed

12 files changed

+1633
-1302
lines changed

.github/actions/init-python-env/action.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,9 @@ inputs:
99
poetry_version:
1010
description: 'Version of Poetry to configure'
1111
default: '1.3.2'
12+
spark_version:
13+
description: 'Version of Spark to configure'
14+
default: '3.4.0'
1215

1316
runs:
1417
using: "composite"
@@ -36,5 +39,7 @@ runs:
3639

3740
- name: Install Poetry project dependencies
3841
# if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
42+
env:
43+
SPARK_VERSION: ${{ inputs.spark_version }}
3944
shell: bash
4045
run: make init-python

.github/workflows/ci.yml

Lines changed: 80 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,13 @@ jobs:
1515
build-scala:
1616
runs-on: ubuntu-20.04
1717

18+
strategy:
19+
matrix:
20+
spark_version:
21+
- "3.2.4"
22+
- "3.3.2"
23+
- "3.4.0"
24+
1825
steps:
1926
- name: Checkout Repository
2027
uses: actions/checkout@v3
@@ -25,22 +32,30 @@ jobs:
2532
uses: ./.github/actions/init-scala-env
2633

2734
- name: Compile Scala Project
35+
env:
36+
SPARK_VERSION: ${{ matrix.spark_version }}
2837
run: make compile-scala
2938

3039
- name: Test Scala Project
3140
# python/* branches are not supposed to change scala code, trust them
3241
if: ${{ !startsWith(github.event.inputs.from_branch, 'python/') }}
33-
run: make test-scala
42+
env:
43+
SPARK_VERSION: ${{ matrix.spark_version }}
44+
run:
45+
ulimit -c unlimited
46+
make test-scala
3447

3548
- name: Build Spark Assembly
49+
env:
50+
SPARK_VERSION: ${{ matrix.spark_version }}
3651
shell: bash
3752
run: make build-scala
3853

3954
- name: Cache Spark Assembly
4055
uses: actions/cache@v3
4156
with:
4257
path: ./dist/*
43-
key: dist-${{ github.sha }}
58+
key: dist-${{ matrix.spark_version }}-${{ github.sha }}
4459

4560
build-python:
4661
# scala/* branches are not supposed to change python code, trust them
@@ -50,7 +65,13 @@ jobs:
5065

5166
strategy:
5267
matrix:
53-
python: [ "3.8" ]
68+
python:
69+
- "3.8"
70+
- "3.9"
71+
spark_version:
72+
- "3.2.4"
73+
- "3.3.2"
74+
- "3.4.0"
5475

5576
steps:
5677
- name: Checkout Repository
@@ -61,6 +82,7 @@ jobs:
6182
- uses: ./.github/actions/init-python-env
6283
with:
6384
python_version: ${{ matrix.python }}
85+
spark_version: ${{ matrix.spark_version }}
6486

6587
- name: Static checks
6688
shell: bash
@@ -69,18 +91,27 @@ jobs:
6991
- uses: actions/cache@v3
7092
with:
7193
path: ./dist/*
72-
key: dist-${{ github.sha }}
94+
key: dist-${{ matrix.spark_version }}-${{ github.sha }}
7395

7496
- name: Run tests
97+
env:
98+
SPARK_VERSION: ${{ matrix.spark_version }}
7599
shell: bash
76100
run: make test-python-quick
77101

78-
publish:
79-
name: Publish Artifacts
102+
publish-scala:
103+
name: Publish Scala Artifacts
80104
needs: [ build-scala, build-python ]
81105
runs-on: ubuntu-20.04
82106
if: (github.event_name != 'pull_request') && startsWith(github.ref, 'refs/tags/v')
83107

108+
strategy:
109+
matrix:
110+
spark_version:
111+
- "3.2.4"
112+
- "3.3.2"
113+
- "3.4.0"
114+
84115
steps:
85116
- name: Checkout Repository
86117
uses: actions/checkout@v3
@@ -94,17 +125,58 @@ jobs:
94125
shell: bash
95126
env:
96127
GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
128+
SPARK_VERSION: ${{ matrix.spark_version }}
97129
run: make publish-scala
98130

131+
- name: Build Spark Assembly
132+
env:
133+
SPARK_VERSION: ${{ matrix.spark_version }}
134+
shell: bash
135+
run: make build-scala
136+
137+
- name: Cache Spark Assembly
138+
uses: actions/cache@v3
139+
with:
140+
path: ./dist/*
141+
key: dist-${{ matrix.spark_version }}-${{ github.ref }}
142+
143+
144+
publish-python:
145+
name: Publish Scala Artifacts
146+
needs: [ publish-scala ]
147+
runs-on: ubuntu-20.04
148+
if: (github.event_name != 'pull_request') && startsWith(github.ref, 'refs/tags/v')
149+
150+
strategy:
151+
matrix:
152+
python:
153+
- "3.8"
154+
- "3.9"
155+
spark_version:
156+
- "3.2.4"
157+
- "3.3.2"
158+
- "3.4.0"
159+
160+
steps:
161+
- name: Checkout Repository
162+
uses: actions/checkout@v3
163+
with:
164+
fetch-depth: 0
165+
99166
- uses: ./.github/actions/init-python-env
100167
with:
101-
python_version: "3.8"
168+
python_version: ${{ matrix.python }}
169+
spark_version: ${{ matrix.spark_version }}
170+
171+
- uses: actions/cache@v3
172+
with:
173+
path: ./dist/*
174+
key: dist-${{ matrix.spark_version }}-${{ github.ref }}
102175

103176
- name: Build Python whl
104177
shell: bash
105178
run: make build-python
106179

107-
108180
# TODO: Where does this go, do we need it?
109181
# - name: upload artefacts
110182
# uses: ./.github/actions/upload_artefacts

Makefile

Lines changed: 23 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
1-
SHELL := /usr/bin/env bash
1+
SHELL := env SPARK_VERSION=$(SPARK_VERSION) /usr/bin/env bash
2+
SPARK_VERSION ?= 3.4.0
23

34
.PHONY: init test lint build docs notebooks help
45

6+
DIST_DIR = ./dist
7+
58
help:
69
@echo "init - Setup the repository"
710
@echo "clean - clean all compiled python files, build artifacts and virtual envs. Run \`make init\` anew afterwards."
@@ -18,27 +21,32 @@ test: test-scala test-python
1821
###############
1922

2023
compile-scala:
21-
sbt -v -batch compile test:compile it:compile
24+
sbt -v -batch compile test:compile it:compile -DrfSparkVersion=${SPARK_VERSION}
2225

2326
test-scala: test-core-scala test-datasource-scala test-experimental-scala
2427

2528
test-core-scala:
26-
sbt -batch core/test
29+
sbt -batch core/test -DrfSparkVersion=${SPARK_VERSION}
2730

2831
test-datasource-scala:
29-
sbt -batch datasource/test
32+
sbt -batch datasource/test -DrfSparkVersion=${SPARK_VERSION}
3033

3134
test-experimental-scala:
32-
sbt -batch experimental/test
35+
sbt -batch experimental/test -DrfSparkVersion=${SPARK_VERSION}
36+
37+
build-scala: clean-build-scala
38+
sbt "pyrasterframes/assembly" -DrfSparkVersion=${SPARK_VERSION}
3339

34-
build-scala:
35-
sbt "pyrasterframes/assembly"
40+
clean-build-scala:
41+
if [ -d "$(DIST_DIR)" ]; then \
42+
find ./dist -name 'pyrasterframes-assembly-${SPARK_VERSION}*.jar' -exec rm -fr {} +; \
43+
fi
3644

3745
clean-scala:
38-
sbt clean
46+
sbt clean -DrfSparkVersion=${SPARK_VERSION}
3947

4048
publish-scala:
41-
sbt publish
49+
sbt publish -DrfSparkVersion=${SPARK_VERSION}
4250

4351
################
4452
# PYTHON
@@ -49,9 +57,11 @@ init-python:
4957
./.venv/bin/python -m pip install --upgrade pip
5058
poetry self add "poetry-dynamic-versioning[plugin]"
5159
poetry install
60+
poetry add pyspark@${SPARK_VERSION}
5261
poetry run pre-commit install
5362

5463
test-python: build-scala
64+
poetry add pyspark@${SPARK_VERSION}
5565
poetry run pytest -vv python/tests --cov=python/pyrasterframes --cov=python/geomesa_pyspark --cov-report=term-missing
5666

5767
test-python-quick:
@@ -72,8 +82,10 @@ notebooks-python: clean-notebooks-python
7282
clean-python: clean-build-python clean-test-python clean-venv-python clean-docs-python clean-notebooks-python
7383

7484
clean-build-python:
75-
find ./dist -name 'pyrasterframes*.whl' -exec rm -fr {} +
76-
find ./dist -name 'pyrasterframes*.tar.gz' -exec rm -fr {} +
85+
if [ -d "$(DIST_DIR)" ]; then \
86+
find ./dist -name 'pyrasterframes*.whl' -exec rm -fr {} +; \
87+
find ./dist -name 'pyrasterframes*.tar.gz' -exec rm -fr {} +; \
88+
fi
7789

7890
clean-test-python:
7991
rm -f .coverage

build.sbt

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ ThisBuild / dynverSonatypeSnapshots := true
2727
ThisBuild / publishMavenStyle := true
2828
ThisBuild / Test / publishArtifact := false
2929

30+
3031
addCommandAlias("makeSite", "docs/makeSite")
3132
addCommandAlias("previewSite", "docs/previewSite")
3233
addCommandAlias("ghpagesPushSite", "docs/ghpagesPushSite")
@@ -38,13 +39,15 @@ lazy val IntegrationTest = config("it") extend Test
3839
lazy val root = project
3940
.withId("RasterFrames")
4041
.aggregate(core, datasource)
41-
.settings(publish / skip := true)
42+
.settings(
43+
publish / skip := true)
4244

4345
lazy val `rf-notebook` = project
4446
.dependsOn(pyrasterframes)
4547
.disablePlugins(CiReleasePlugin)
4648
.enablePlugins(RFAssemblyPlugin, DockerPlugin)
47-
.settings(publish / skip := true)
49+
.settings(
50+
publish / skip := true)
4851

4952
lazy val core = project
5053
.enablePlugins(BuildInfoPlugin)
@@ -79,7 +82,7 @@ lazy val core = project
7982
ExclusionRule(organization = "com.github.mpilquist")
8083
),
8184
scaffeine,
82-
sparktestingbase excludeAll ExclusionRule("org.scala-lang.modules", "scala-xml_2.12"),
85+
sparktestingbase().value % Test excludeAll ExclusionRule("org.scala-lang.modules", "scala-xml_2.12"),
8386
`scala-logging`
8487
),
8588
libraryDependencies ++= {

datasource/src/test/scala/org/locationtech/rasterframes/datasource/slippy/SlippyDataSourceSpec.scala

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,6 @@ class SlippyDataSourceSpec extends TestEnvironment with TestData with BeforeAndA
3434

3535
def tileFilesCount(dir: File): Long = {
3636
val r = countFiles(dir, ".png")
37-
println(dir, r)
3837
r
3938
}
4039

0 commit comments

Comments
 (0)