Skip to content

Commit

Permalink
spark-4.0.0-preview2
Browse files Browse the repository at this point in the history
  • Loading branch information
pan3793 committed Sep 18, 2024
1 parent 860aeae commit fd0cfc1
Show file tree
Hide file tree
Showing 8 changed files with 26 additions and 16 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/master.yml
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ jobs:
comment: 'verify-on-spark-3.4-binary'
- java: 17
spark: '3.5'
spark-archive: '-Pscala-2.13 -Dspark.archive.mirror=https://archive.apache.org/dist/spark/spark-4.0.0-preview1 -Dspark.archive.name=spark-4.0.0-preview1-bin-hadoop3.tgz'
spark-archive: '-Pscala-2.13 -Dspark.archive.mirror=https://dist.apache.org/repos/dist/dev/spark/v4.0.0-preview2-rc1-bin -Dspark.archive.name=spark-4.0.0-preview2-bin-hadoop3.tgz'
exclude-tags: '-Dmaven.plugin.scalatest.exclude.tags=org.scalatest.tags.Slow,org.apache.kyuubi.tags.DeltaTest,org.apache.kyuubi.tags.IcebergTest,org.apache.kyuubi.tags.PaimonTest,org.apache.kyuubi.tags.SparkLocalClusterTest'
comment: 'verify-on-spark-4.0-binary'
env:
Expand Down
14 changes: 6 additions & 8 deletions HOW-TO.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ git clone https://github.com/pan3793/kyuubi.git -b kyuubi-next kyuubi-next
### Requirements

- `JAVA_HOME` points to Java 17
- `SPARK_HOME` points to `/path/of/spark-4.0.0-preview1-bin-hadoop3`
- `SPARK_HOME` points to `/path/of/spark-4.0.0-preview2-bin-hadoop3`

### Run

Expand All @@ -52,17 +52,15 @@ The gRPC service listens 10999 by default.

### Connect to Kyuubi Connect

Spark Connect Scala client(use the latest `spark-shell` as example)
Spark Connect Scala client (Requires: Java 17, Spark 4.0.0-preview2)
```
git clone git@github.com:apache/spark.git
cd spark
build/sbt package -Phive
SPARK_PREPEND_CLASSES=1 bin/spark-shell --remote sc://H27212-MAC-01.local:10999 --user_id chengpan --user_name chengpan
cd /path/of/spark-4.0.0-preview2-bin-hadoop3
bin/spark-shell --remote sc://H27212-MAC-01.local:10999 --user_id chengpan --user_name chengpan
```

PySpark Connect client (Requires: Python >=3.9)
```
pip install pyspark-connect==4.0.0.dev1
pip install pyspark-connect==4.0.0.dev2
pyspark --remote sc://H27212-MAC-01.local:10999 --user_id chengpan --user_name chengpan
```

Expand All @@ -72,7 +70,7 @@ Welcome to
____ __
/ __/__ ___ _____/ /__
_\ \/ _ \/ _ `/ __/ '_/
/___/ .__/\_,_/_/ /_/\_\ version 4.0.0-SNAPSHOT
/___/ .__/\_,_/_/ /_/\_\ version 4.0.0-preview2
/_/
Type in expressions to have them evaluated.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,8 @@ trait DropNamespaceSuiteBase extends DDLCommandTestUtils {
sql(s"DROP NAMESPACE $catalogName.unknown")
}.getMessage
assert(message.contains(s"'unknown' not found") ||
message.contains(s"The schema `unknown` cannot be found"))
message.contains(s"The schema `unknown` cannot be found") ||
message.contains("SCHEMA_NOT_FOUND"))
}

test("drop non-empty namespace with a non-cascading mode") {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,7 @@ object SparkSQLEngine extends Logging {
// "Cannot mutate ReadOnlySQLConf" exception when task calling HiveResult.getBinaryFormatter.
// Here we follow the HiveResult.getBinaryFormatter behavior to set it to UTF8 if configuration
// is absent to reserve the legacy behavior for compatibility.
_sparkConf.setIfMissing("spark.sql.binaryOutputStyle", "UTF8")
_sparkConf.setIfMissing("spark.sql.binaryOutputStyle", "UTF-8")
_sparkConf.setIfMissing("spark.master", "local")
_sparkConf.set(
"spark.redaction.regex",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ object SparkCatalogUtils extends Logging {
private def getGlobalTempViewManager(
spark: SparkSession,
schemaPattern: String): Seq[String] = {
val database = spark.sharedState.globalTempViewManager.database
val database = spark.conf.get("spark.sql.globalTempDatabase")
Option(database).filter(_.matches(schemaPattern)).toSeq
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -514,7 +514,6 @@ class SparkOperationSuite extends WithSparkSQLEngine with HiveMetadataTests with
assert(status.getStatusCode === TStatusCode.ERROR_STATUS)
if (SPARK_ENGINE_RUNTIME_VERSION >= "3.4") {
assert(errorMessage.contains("[SCHEMA_NOT_FOUND]"))
assert(errorMessage.contains(s"The schema `$dbName` cannot be found."))
} else {
assert(errorMessage.contains(s"Database '$dbName' not found"))
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,6 @@ class KyuubiGrpcSession(
}

sessionConf.set(FRONTEND_PROTOCOLS, Seq("GRPC"))
sessionConf.set("spark.jars.packages", "org.apache.spark:spark-connect_2.13:4.0.0-preview1")

lazy val engine: EngineRef = new EngineRef(
sessionConf,
Expand Down
17 changes: 15 additions & 2 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@
DO NOT forget to change the following properties when change the minor version of Spark:
`delta.version`, `delta.artifact`, `maven.plugin.scalatest.exclude.tags`
-->
<spark.version>4.0.0-preview1</spark.version>
<spark.version>4.0.0-preview2</spark.version>
<spark.binary.version>4.0</spark.binary.version>
<spark.archive.scala.suffix>-scala${scala.binary.version}</spark.archive.scala.suffix>
<spark.archive.name>spark-${spark.version}-bin-hadoop3.tgz</spark.archive.name>
Expand Down Expand Up @@ -1261,6 +1261,18 @@
<name>Maven Repository</name>
<url>https://repo.maven.apache.org/maven2</url>
</repository>

<repository>
<releases>
<enabled>true</enabled>
</releases>
<snapshots>
<enabled>false</enabled>
</snapshots>
<id>staging</id>
<name>Staging Repo</name>
<url>https://repository.apache.org/content/repositories/orgapachespark-1468/</url>
</repository>
</repositories>

<pluginRepositories>
Expand Down Expand Up @@ -2021,7 +2033,7 @@
<module>extensions/spark/kyuubi-spark-connector-hive</module>
</modules>
<properties>
<spark.version>4.0.0-preview1</spark.version>
<spark.version>4.0.0-preview2</spark.version>
<spark.binary.version>4.0</spark.binary.version>
<antlr4.version>4.13.1</antlr4.version>
<delta.version>4.0.0rc1</delta.version>
Expand All @@ -2033,6 +2045,7 @@
<!-- TODO: update once Paimon support Spark 4.0 -->
<paimon.artifact>paimon-spark-3.5</paimon.artifact>
<maven.plugin.scalatest.exclude.tags>org.scalatest.tags.Slow,org.apache.kyuubi.tags.IcebergTest,org.apache.kyuubi.tags.PaimonTest,org.apache.kyuubi.tags.HudiTest</maven.plugin.scalatest.exclude.tags>
<spark.archive.mirror>https://dist.apache.org/repos/dist/dev/spark/v4.0.0-preview2-rc1-bin</spark.archive.mirror>
<spark.archive.name>spark-${spark.version}-bin-hadoop3.tgz</spark.archive.name>
</properties>
</profile>
Expand Down

0 comments on commit fd0cfc1

Please sign in to comment.