forked from NVIDIA/spark-rapids
-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add shim layers for GpuWindowInPandas. (NVIDIA#1124)
Databricks Spark requires different output columns from that of Apache Spark. Also add the python daemon module since Databricks changes the API. Signed-off-by: Firestarman <[email protected]>
- Loading branch information
1 parent
cc0bfa3
commit 9148176
Showing
9 changed files
with
348 additions
and
41 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,168 @@ | ||
# | ||
# Licensed to the Apache Software Foundation (ASF) under one or more | ||
# contributor license agreements. See the NOTICE file distributed with | ||
# this work for additional information regarding copyright ownership. | ||
# The ASF licenses this file to You under the Apache License, Version 2.0 | ||
# (the "License"); you may not use this file except in compliance with | ||
# the License. You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
# | ||
|
||
import os | ||
import signal | ||
import select | ||
import socket | ||
import sys | ||
import traceback | ||
import time | ||
import gc | ||
from errno import EINTR, EAGAIN | ||
from socket import AF_INET, SOCK_STREAM, SOMAXCONN | ||
from signal import SIGHUP, SIGTERM, SIGCHLD, SIG_DFL, SIG_IGN | ||
|
||
from pyspark.serializers import read_int, write_int, UTF8Deserializer | ||
from pyspark.daemon import worker | ||
|
||
from rapids.worker import initialize_gpu_mem | ||
utf8_deserializer = UTF8Deserializer() | ||
|
||
|
||
def manager(): | ||
# Create a new process group to corral our children | ||
os.setpgid(0, 0) | ||
|
||
# Create a listening socket on the AF_INET loopback interface | ||
listen_sock = socket.socket(AF_INET, SOCK_STREAM) | ||
listen_sock.bind(('127.0.0.1', 0)) | ||
listen_sock.listen(max(1024, SOMAXCONN)) | ||
listen_host, listen_port = listen_sock.getsockname() | ||
|
||
# re-open stdin/stdout in 'wb' mode | ||
stdin_bin = os.fdopen(sys.stdin.fileno(), 'rb', 4) | ||
stdout_bin = os.fdopen(sys.stdout.fileno(), 'wb', 4) | ||
write_int(listen_port, stdout_bin) | ||
stdout_bin.flush() | ||
|
||
def shutdown(code): | ||
signal.signal(SIGTERM, SIG_DFL) | ||
# Send SIGHUP to notify workers of shutdown | ||
os.kill(0, SIGHUP) | ||
sys.exit(code) | ||
|
||
def handle_sigterm(*args): | ||
shutdown(1) | ||
signal.signal(SIGTERM, handle_sigterm) # Gracefully exit on SIGTERM | ||
signal.signal(SIGHUP, SIG_IGN) # Don't die on SIGHUP | ||
signal.signal(SIGCHLD, SIG_IGN) | ||
|
||
reuse = os.environ.get("SPARK_REUSE_WORKER") | ||
|
||
# Initialization complete | ||
try: | ||
while True: | ||
try: | ||
ready_fds = select.select([0, listen_sock], [], [], 1)[0] | ||
except select.error as ex: | ||
if ex[0] == EINTR: | ||
continue | ||
else: | ||
raise | ||
|
||
if 0 in ready_fds: | ||
try: | ||
worker_pid = read_int(stdin_bin) | ||
except EOFError: | ||
# Spark told us to exit by closing stdin | ||
shutdown(0) | ||
try: | ||
os.kill(worker_pid, signal.SIGKILL) | ||
except OSError: | ||
pass # process already died | ||
|
||
if listen_sock in ready_fds: | ||
try: | ||
sock, _ = listen_sock.accept() | ||
except OSError as e: | ||
if e.errno == EINTR: | ||
continue | ||
raise | ||
|
||
# Launch a worker process | ||
try: | ||
pid = os.fork() | ||
except OSError as e: | ||
if e.errno in (EAGAIN, EINTR): | ||
time.sleep(1) | ||
pid = os.fork() # error here will shutdown daemon | ||
else: | ||
outfile = sock.makefile(mode='wb') | ||
write_int(e.errno, outfile) # Signal that the fork failed | ||
outfile.flush() | ||
outfile.close() | ||
sock.close() | ||
continue | ||
|
||
if pid == 0: | ||
# in child process | ||
listen_sock.close() | ||
|
||
# It should close the standard input in the child process so that | ||
# Python native function executions stay intact. | ||
# | ||
# Note that if we just close the standard input (file descriptor 0), | ||
# the lowest file descriptor (file descriptor 0) will be allocated, | ||
# later when other file descriptors should happen to open. | ||
# | ||
# Therefore, here we redirects it to '/dev/null' by duplicating | ||
# another file descriptor for '/dev/null' to the standard input (0). | ||
# See SPARK-26175. | ||
devnull = open(os.devnull, 'r') | ||
os.dup2(devnull.fileno(), 0) | ||
devnull.close() | ||
|
||
try: | ||
# GPU context setup | ||
initialize_gpu_mem() | ||
|
||
infile = sock.makefile(mode="rb") | ||
executor_username = utf8_deserializer.loads(infile) | ||
# Acknowledge that the fork was successful | ||
outfile = sock.makefile(mode="wb") | ||
write_int(os.getpid(), outfile) | ||
outfile.flush() | ||
outfile.close() | ||
authenticated = False | ||
while True: | ||
code = worker(sock, authenticated, executor_username) | ||
if code == 0: | ||
authenticated = True | ||
if not reuse or code: | ||
# wait for closing | ||
try: | ||
while sock.recv(1024): | ||
pass | ||
except Exception: | ||
pass | ||
break | ||
gc.collect() | ||
except: | ||
traceback.print_exc() | ||
os._exit(1) | ||
else: | ||
os._exit(0) | ||
else: | ||
sock.close() | ||
|
||
finally: | ||
shutdown(1) | ||
|
||
|
||
if __name__ == '__main__': | ||
manager() |
42 changes: 42 additions & 0 deletions
42
...park300/src/main/scala/com/nvidia/spark/rapids/shims/spark300/GpuWindowInPandasExec.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
/* | ||
* Copyright (c) 2020, NVIDIA CORPORATION. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package com.nvidia.spark.rapids.shims.spark300 | ||
|
||
import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression, NamedExpression, SortOrder} | ||
import org.apache.spark.sql.execution.SparkPlan | ||
import org.apache.spark.sql.rapids.execution.python.GpuWindowInPandasExecBase | ||
import org.apache.spark.sql.vectorized.ColumnarBatch | ||
|
||
/* | ||
* This GpuWindowInPandasExec aims at accelerating the data transfer between | ||
* JVM and Python, and scheduling GPU resources for Python processes | ||
*/ | ||
case class GpuWindowInPandasExec( | ||
windowExpression: Seq[Expression], | ||
partitionSpec: Seq[Expression], | ||
orderSpec: Seq[SortOrder], | ||
child: SparkPlan) extends GpuWindowInPandasExecBase { | ||
|
||
override final def pythonModuleKey: String = "spark" | ||
|
||
// Apache Spark expects input columns before the result columns | ||
override def output: Seq[Attribute] = child.output ++ windowExpression | ||
.map(_.asInstanceOf[NamedExpression].toAttribute) | ||
|
||
// Return the join batch directly per Apache Spark's expectation. | ||
override def projectResult(joinedBatch: ColumnarBatch): ColumnarBatch = joinedBatch | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
68 changes: 68 additions & 0 deletions
68
...300db/src/main/scala/com/nvidia/spark/rapids/shims/spark300db/GpuWindowInPandasExec.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
/* | ||
* Copyright (c) 2020, NVIDIA CORPORATION. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package com.nvidia.spark.rapids.shims.spark300db | ||
|
||
import com.nvidia.spark.rapids.{GpuBindReferences, GpuBoundReference, GpuProjectExec, GpuWindowExpression} | ||
import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression, NamedExpression, SortOrder} | ||
import org.apache.spark.sql.execution.SparkPlan | ||
import org.apache.spark.sql.rapids.execution.python.GpuWindowInPandasExecBase | ||
import org.apache.spark.sql.vectorized.{ColumnVector, ColumnarBatch} | ||
|
||
/* | ||
* This GpuWindowInPandasExec aims at accelerating the data transfer between | ||
* JVM and Python, and scheduling GPU resources for Python processes | ||
*/ | ||
case class GpuWindowInPandasExec( | ||
projectList: Seq[Expression], | ||
partitionSpec: Seq[Expression], | ||
orderSpec: Seq[SortOrder], | ||
child: SparkPlan) extends GpuWindowInPandasExecBase { | ||
|
||
override final def pythonModuleKey: String = "databricks" | ||
|
||
// On Databricks, the projectList contains not only the window expression, but may also contains | ||
// the input attributes. So we need to extract the window expressions from it. | ||
override def windowExpression: Seq[Expression] = projectList.filter { expr => | ||
expr.find(node => node.isInstanceOf[GpuWindowExpression]).isDefined | ||
} | ||
|
||
// On Databricks, the projectList is expected to be the final output, and it is nondeterministic. | ||
// It may contain the input attributes or not, or even part of the input attributes. So | ||
// we need to project the joined batch per this projectList. | ||
// But for the schema, just return it directly. | ||
override def output: Seq[Attribute] = projectList | ||
.map(_.asInstanceOf[NamedExpression].toAttribute) | ||
|
||
override def projectResult(joinedBatch: ColumnarBatch): ColumnarBatch = { | ||
// Project the data | ||
withResource(joinedBatch) { joinBatch => | ||
GpuProjectExec.project(joinBatch, outReferences) | ||
} | ||
} | ||
|
||
private val outReferences = { | ||
val references = windowExpression.zipWithIndex.map { case (e, i) => | ||
// Results of window expressions will be on the right side of child's output | ||
GpuBoundReference(child.output.size + i, e.dataType, e.nullable) | ||
} | ||
val unboundToRefMap = windowExpression.zip(references).toMap | ||
// Bound the project list for GPU | ||
GpuBindReferences.bindGpuReferences( | ||
projectList.map(_.transform(unboundToRefMap)), child.output) | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.