Skip to content

Commit

Permalink
Create initial structure for Delta Kernel development
Browse files Browse the repository at this point in the history
See #1783 for details.

This PR just sets up the `kernel/` subdirectory and sbt for future development.

#1786 updates the github actions and is based off of this PR

Closes #1785
  • Loading branch information
allisonport-db committed May 30, 2023
1 parent 458bcae commit b221b69
Show file tree
Hide file tree
Showing 9 changed files with 1,045 additions and 0 deletions.
3 changes: 3 additions & 0 deletions kernel/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Delta Kernel

[TODO] - For now refer to [delta-io/delta#1783](https://github.com/delta-io/delta/issues/1783) for further information.
110 changes: 110 additions & 0 deletions kernel/build.sbt
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
/*
* Copyright (2021) The Delta Lake Project Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

val scala212 = "2.12.15"
scalaVersion := scala212

lazy val commonSettings = Seq(
organization := "io.delta",
scalaVersion := scala212,
fork := true,
scalacOptions ++= Seq("-target:jvm-1.8", "-Ywarn-unused:imports"),
javacOptions ++= Seq("-source", "1.8"),
// -target cannot be passed as a parameter to javadoc. See https://github.com/sbt/sbt/issues/355
Compile / compile / javacOptions ++= Seq("-target", "1.8", "-Xlint:unchecked"),
// Configurations to speed up tests and reduce memory footprint
Test / javaOptions += "-Xmx1024m",
)

// TODO javastyle checkstyle tests
// TODO unidoc/javadoc settings

lazy val kernelApi = (project in file("kernel-api"))
.settings(
name := "delta-kernel-api",
commonSettings,
scalaStyleSettings,
releaseSettings,
libraryDependencies ++= Seq()
)

val hadoopVersion = "3.3.1"
val deltaStorageVersion = "2.2.0"
val scalaTestVersion = "3.2.15"
val deltaSparkVersion = deltaStorageVersion
val sparkVersion = "3.3.2"

lazy val kernelDefault = (project in file("kernel-default"))
.dependsOn(kernelApi)
.settings(
name := "delta-kernel-default",
commonSettings,
scalaStyleSettings,
releaseSettings,
libraryDependencies ++= Seq(
"org.apache.hadoop" % "hadoop-client-api" % hadoopVersion, // Configuration, Path
"io.delta" % "delta-storage" % deltaStorageVersion, // LogStore
"com.fasterxml.jackson.core" % "jackson-databind" % "2.13.5", // ObjectMapper
"org.apache.parquet" % "parquet-hadoop" % "1.12.3",

"org.scalatest" %% "scalatest" % scalaTestVersion % "test",
"io.delta" %% "delta-core" % deltaSparkVersion % "test",
"org.apache.spark" %% "spark-sql" % sparkVersion % "test", // SparkSession
"org.apache.spark" %% "spark-sql" % sparkVersion % "test" classifier "tests",
"org.apache.spark" %% "spark-core" % sparkVersion % "test" classifier "tests",
"org.apache.spark" %% "spark-catalyst" % sparkVersion % "test" classifier "tests",
"junit" % "junit" % "4.11" % "test",
"com.novocode" % "junit-interface" % "0.11" % "test"
)
)

/*
***********************
* ScalaStyle settings *
***********************
*/
ThisBuild / scalastyleConfig := baseDirectory.value / "scalastyle-config.xml"

// Not used since scala is test-only
lazy val compileScalastyle = taskKey[Unit]("compileScalastyle")
lazy val testScalastyle = taskKey[Unit]("testScalastyle")

lazy val scalaStyleSettings = Seq(
compileScalastyle := (Compile / scalastyle).toTask("").value,
Compile / compile := ((Compile / compile) dependsOn compileScalastyle).value,
testScalastyle := (Test / scalastyle).toTask("").value,
Test / test := ((Test / test) dependsOn testScalastyle).value
)

/*
********************
* Release settings *
********************
*/

// Don't release the root project
publishArtifact := false
publish / skip := true

lazy val releaseSettings = Seq(
// Java only release settings
crossPaths := false, // drop off Scala suffix from artifact names
autoScalaLibrary := false, // exclude scala-library from dependencies in generated pom.xml

// Other release settings
publishArtifact := true,
Test / publishArtifact := false
)
183 changes: 183 additions & 0 deletions kernel/build/sbt
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
#!/usr/bin/env bash

#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

#
# This file contains code from the Apache Spark project (original license above).
# It contains modifications, which are licensed as follows:
#

#
# Copyright (2021) The Delta Lake Project Authors.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#


# When creating new tests for Spark SQL Hive, the HADOOP_CLASSPATH must contain the hive jars so
# that we can run Hive to generate the golden answer. This is not required for normal development
# or testing.
if [ -n "$HIVE_HOME" ]; then
for i in "$HIVE_HOME"/lib/*
do HADOOP_CLASSPATH="$HADOOP_CLASSPATH:$i"
done
export HADOOP_CLASSPATH
fi

realpath () {
(
TARGET_FILE="$1"

cd "$(dirname "$TARGET_FILE")"
TARGET_FILE="$(basename "$TARGET_FILE")"

COUNT=0
while [ -L "$TARGET_FILE" -a $COUNT -lt 100 ]
do
TARGET_FILE="$(readlink "$TARGET_FILE")"
cd $(dirname "$TARGET_FILE")
TARGET_FILE="$(basename $TARGET_FILE)"
COUNT=$(($COUNT + 1))
done

echo "$(pwd -P)/"$TARGET_FILE""
)
}

if [[ "$JENKINS_URL" != "" ]]; then
# Make Jenkins use Google Mirror first as Maven Central may ban us
SBT_REPOSITORIES_CONFIG="$(dirname "$(realpath "$0")")/sbt-config/repositories"
export SBT_OPTS="-Dsbt.override.build.repos=true -Dsbt.repository.config=$SBT_REPOSITORIES_CONFIG"
fi

. "$(dirname "$(realpath "$0")")"/sbt-launch-lib.bash


declare -r noshare_opts="-Dsbt.global.base=project/.sbtboot -Dsbt.boot.directory=project/.boot -Dsbt.ivy.home=project/.ivy"
declare -r sbt_opts_file=".sbtopts"
declare -r etc_sbt_opts_file="/etc/sbt/sbtopts"

usage() {
cat <<EOM
Usage: $script_name [options]
-h | -help print this message
-v | -verbose this runner is chattier
-d | -debug set sbt log level to debug
-no-colors disable ANSI color codes
-sbt-create start sbt even if current directory contains no sbt project
-sbt-dir <path> path to global settings/plugins directory (default: ~/.sbt)
-sbt-boot <path> path to shared boot directory (default: ~/.sbt/boot in 0.11 series)
-ivy <path> path to local Ivy repository (default: ~/.ivy2)
-mem <integer> set memory options (default: $sbt_mem, which is $(get_mem_opts $sbt_mem))
-no-share use all local caches; no sharing
-no-global uses global caches, but does not use global ~/.sbt directory.
-jvm-debug <port> Turn on JVM debugging, open at the given port.
-batch Disable interactive mode
# sbt version (default: from project/build.properties if present, else latest release)
-sbt-version <version> use the specified version of sbt
-sbt-jar <path> use the specified jar as the sbt launcher
-sbt-rc use an RC version of sbt
-sbt-snapshot use a snapshot version of sbt
# java version (default: java from PATH, currently $(java -version 2>&1 | grep version))
-java-home <path> alternate JAVA_HOME
# jvm options and output control
JAVA_OPTS environment variable, if unset uses "$java_opts"
SBT_OPTS environment variable, if unset uses "$default_sbt_opts"
.sbtopts if this file exists in the current directory, it is
prepended to the runner args
/etc/sbt/sbtopts if this file exists, it is prepended to the runner args
-Dkey=val pass -Dkey=val directly to the java runtime
-J-X pass option -X directly to the java runtime
(-J is stripped)
-S-X add -X to sbt's scalacOptions (-S is stripped)
-PmavenProfiles Enable a maven profile for the build.
In the case of duplicated or conflicting options, the order above
shows precedence: JAVA_OPTS lowest, command line options highest.
EOM
}

process_my_args () {
while [[ $# -gt 0 ]]; do
case "$1" in
-no-colors) addJava "-Dsbt.log.noformat=true" && shift ;;
-no-share) addJava "$noshare_opts" && shift ;;
-no-global) addJava "-Dsbt.global.base=$(pwd)/project/.sbtboot" && shift ;;
-sbt-boot) require_arg path "$1" "$2" && addJava "-Dsbt.boot.directory=$2" && shift 2 ;;
-sbt-dir) require_arg path "$1" "$2" && addJava "-Dsbt.global.base=$2" && shift 2 ;;
-debug-inc) addJava "-Dxsbt.inc.debug=true" && shift ;;
-batch) exec </dev/null && shift ;;

-sbt-create) sbt_create=true && shift ;;

*) addResidual "$1" && shift ;;
esac
done

# Now, ensure sbt version is used.
[[ "${sbt_version}XXX" != "XXX" ]] && addJava "-Dsbt.version=$sbt_version"
}

loadConfigFile() {
cat "$1" | sed '/^\#/d'
}

# if sbtopts files exist, prepend their contents to $@ so it can be processed by this runner
[[ -f "$etc_sbt_opts_file" ]] && set -- $(loadConfigFile "$etc_sbt_opts_file") "$@"
[[ -f "$sbt_opts_file" ]] && set -- $(loadConfigFile "$sbt_opts_file") "$@"

exit_status=127
saved_stty=""

restoreSttySettings() {
stty $saved_stty
saved_stty=""
}

onExit() {
if [[ "$saved_stty" != "" ]]; then
restoreSttySettings
fi
exit $exit_status
}

saveSttySettings() {
saved_stty=$(stty -g 2>/dev/null)
if [[ ! $? ]]; then
saved_stty=""
fi
}

saveSttySettings
trap onExit INT

run "$@"

exit_status=$?
onExit
12 changes: 12 additions & 0 deletions kernel/build/sbt-config/repositories
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
[repositories]
local
local-preloaded-ivy: file:///${sbt.preloaded-${sbt.global.base-${user.home}/.sbt}/preloaded/}, [organization]/[module]/[revision]/[type]s/[artifact](-[classifier]).[ext]
local-preloaded: file:///${sbt.preloaded-${sbt.global.base-${user.home}/.sbt}/preloaded/}
gcs-maven-central-mirror: https://maven-central.storage-download.googleapis.com/repos/central/data/
maven-central
typesafe-ivy-releases: https://repo.typesafe.com/typesafe/ivy-releases/, [organization]/[module]/[revision]/[type]s/[artifact](-[classifier]).[ext], bootOnly
sbt-ivy-snapshots: https://repo.scala-sbt.org/scalasbt/ivy-snapshots/, [organization]/[module]/[revision]/[type]s/[artifact](-[classifier]).[ext], bootOnly
sbt-plugin-releases: https://repo.scala-sbt.org/scalasbt/sbt-plugin-releases/, [organization]/[module]/(scala_[scalaVersion]/)(sbt_[sbtVersion]/)[revision]/[type]s/[artifact](-[classifier]).[ext]
bintray-typesafe-sbt-plugin-releases: https://dl.bintray.com/typesafe/sbt-plugins/, [organization]/[module]/(scala_[scalaVersion]/)(sbt_[sbtVersion]/)[revision]/[type]s/[artifact](-[classifier]).[ext]
bintray-spark-packages: https://dl.bintray.com/spark-packages/maven/
typesafe-releases: https://repo.typesafe.com/typesafe/releases/
Loading

0 comments on commit b221b69

Please sign in to comment.