forked from qubole/spark-acid
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merged in SPAR-4319 (pull request qubole#25)
fix: dev: SPAR-4319: Added blobstore commit marker while doing insert overwrite. Also set hive conf when reading from spark. Approved-by: Amogh Margoor <[email protected]>
- Loading branch information
Sourabh Goyal
committed
Jun 12, 2020
1 parent
d07a726
commit e76547a
Showing
6 changed files
with
263 additions
and
30 deletions.
There are no files selected for viewing
161 changes: 161 additions & 0 deletions
161
src/it/scala/com/qubole/spark/hiveacid/BlobstoreCommitMarkerSuite.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,161 @@ | ||
package com.qubole.spark.hiveacid | ||
|
||
import com.qubole.shaded.hadoop.hive.ql.io.AcidUtils | ||
import org.apache.hadoop.fs.{FileSystem, Path, PathFilter} | ||
import org.apache.log4j.{Level, LogManager, Logger} | ||
import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach, FunSuite} | ||
|
||
import scala.collection.mutable.ArrayBuffer | ||
import scala.util.control.NonFatal | ||
|
||
class BlobstoreCommitMarkerSuite extends FunSuite with BeforeAndAfterEach with BeforeAndAfterAll { | ||
|
||
val log: Logger = LogManager.getLogger(this.getClass) | ||
log.setLevel(Level.INFO) | ||
|
||
var helper: TestHelper = _ | ||
val isDebug = true | ||
|
||
val DEFAULT_DBNAME = "BlobstoreCommitMarkerDB" | ||
val cols: Map[String, String] = Map( | ||
("intCol","int"), | ||
("doubleCol","double"), | ||
("floatCol","float"), | ||
("booleanCol","boolean") | ||
) | ||
|
||
override def beforeAll() { | ||
try { | ||
helper = new TestHelper | ||
if (isDebug) { | ||
log.setLevel(Level.DEBUG) | ||
} | ||
helper.init(isDebug) | ||
|
||
// DB | ||
helper.hiveExecute("DROP DATABASE IF EXISTS "+ DEFAULT_DBNAME +" CASCADE") | ||
helper.hiveExecute("CREATE DATABASE "+ DEFAULT_DBNAME) | ||
} catch { | ||
case NonFatal(e) => log.info("failed " + e) | ||
} | ||
} | ||
|
||
override protected def afterAll(): Unit = { | ||
helper.hiveExecute("DROP DATABASE IF EXISTS "+ DEFAULT_DBNAME +" CASCADE") | ||
helper.destroy() | ||
} | ||
|
||
test("Check for blobstore marker in insert overwrite into full acid partitioned table") { | ||
val partitionedTableName = "partitionedTbl" | ||
val partitionedTable = new Table(DEFAULT_DBNAME, partitionedTableName, cols, Table.orcPartitionedFullACIDTable, | ||
isPartitioned = true) | ||
|
||
def code(): Unit = { | ||
helper.recreate(partitionedTable) | ||
helper.sparkSQL(partitionedTable.insertOverwriteSparkTableKeyRange(1,1)) | ||
val metadata = helper.getTableMetadata(partitionedTable) | ||
val basePath = metadata.rootPath | ||
val fs = FileSystem.get(basePath.toUri, helper.spark.sessionState.newHadoopConf()) | ||
for (i <- metadata.getRawPartitions()) { | ||
val partitionPath = new Path(basePath, i.getName) | ||
log.info(s"partition path to check for blobstore marker commit: ${partitionPath}") | ||
val iter = fs.listFiles(partitionPath, true) | ||
val pathStatuses = new ArrayBuffer[Path] | ||
while (iter.hasNext) { | ||
pathStatuses.append(iter.next.getPath) | ||
} | ||
val filteredPaths = fs.listStatus(pathStatuses.toArray, new PathFilter { | ||
override def accept(path: Path): Boolean = { | ||
log.info(s"path to filter is: ${path}") | ||
val filter = (path.getParent.getName.startsWith("base_") && | ||
path.getName == AcidUtils.BlobstoreCommitMarker.BLOBSTORE_COMMIT_MARKER) | ||
filter | ||
} | ||
}) | ||
val filteredPathString = filteredPaths.mkString(",") | ||
log.info(s"filteredPaths: ${filteredPathString}") | ||
|
||
assert(filteredPaths.size > 0, s"blobstore commit marker was not written in " + | ||
s"${filteredPathString}") | ||
} | ||
} | ||
helper.myRun(s"check for blob store commit marker " + | ||
s" when insert overwrite into partitioned table: ${partitionedTable}", code) | ||
|
||
} | ||
|
||
test("check for blobstore marker for insert overwrite for non partitioned table") { | ||
val nonPartitionedTableName = "nonPartitionedTbl" | ||
val nonPartitionedTable = new Table(DEFAULT_DBNAME, nonPartitionedTableName, cols, Table.orcFullACIDTable, | ||
isPartitioned = false) | ||
|
||
def code(): Unit = { | ||
helper.recreate(nonPartitionedTable) | ||
helper.sparkSQL(nonPartitionedTable.insertOverwriteSparkTableKeyRange(1,1)) | ||
val metadata = helper.getTableMetadata(nonPartitionedTable) | ||
val basePath = metadata.rootPath | ||
val fs = FileSystem.get(basePath.toUri, helper.spark.sessionState.newHadoopConf()) | ||
|
||
log.info(s"partition path to check for blobstore marker commit: ${basePath}") | ||
val iter = fs.listFiles(basePath, true) | ||
val pathStatuses = new ArrayBuffer[Path] | ||
while (iter.hasNext) { | ||
pathStatuses.append(iter.next.getPath) | ||
} | ||
val filteredPaths = fs.listStatus(pathStatuses.toArray, new PathFilter { | ||
override def accept(path: Path): Boolean = { | ||
log.info(s"path to filter is: ${path}") | ||
val filter = (path.getParent.getName.startsWith("base_") && | ||
path.getName == AcidUtils.BlobstoreCommitMarker.BLOBSTORE_COMMIT_MARKER) | ||
filter | ||
} | ||
}) | ||
val filteredPathString = filteredPaths.mkString(",") | ||
log.info(s"filteredPaths: ${filteredPathString}") | ||
|
||
assert(filteredPaths.size > 0, s"blobstore commit marker was not written in ${filteredPathString}") | ||
} | ||
helper.myRun(s"check for blob store commit marker " + | ||
s" when insert overwrite into non partitioned table: ${nonPartitionedTableName}", code) | ||
} | ||
|
||
|
||
test("Check blobstore marker not present in insert into full acid partitioned table") { | ||
val partitionedTableName = "partitionedTbl" | ||
val partitionedTable = new Table(DEFAULT_DBNAME, partitionedTableName, cols, Table.orcPartitionedFullACIDTable, | ||
isPartitioned = true) | ||
|
||
def code(): Unit = { | ||
helper.recreate(partitionedTable) | ||
helper.sparkSQL(partitionedTable.insertIntoSparkTableKeyRange(1,1)) | ||
val metadata = helper.getTableMetadata(partitionedTable) | ||
val basePath = metadata.rootPath | ||
val fs = FileSystem.get(basePath.toUri, helper.spark.sessionState.newHadoopConf()) | ||
for (i <- metadata.getRawPartitions()) { | ||
val partitionPath = new Path(basePath, i.getName) | ||
log.info(s"partition path to check for blobstore marker commit: ${partitionPath}") | ||
val iter = fs.listFiles(partitionPath, true) | ||
val pathStatuses = new ArrayBuffer[Path] | ||
while (iter.hasNext) { | ||
pathStatuses.append(iter.next.getPath) | ||
} | ||
val filteredPaths = fs.listStatus(pathStatuses.toArray, new PathFilter { | ||
override def accept(path: Path): Boolean = { | ||
log.info(s"path to filter is: ${path}") | ||
val filter = (path.getParent.getName.startsWith("delta_") && | ||
path.getName == AcidUtils.BlobstoreCommitMarker.BLOBSTORE_COMMIT_MARKER) | ||
filter | ||
} | ||
}) | ||
val filteredPathString = filteredPaths.mkString(",") | ||
log.info(s"filteredPaths: ${filteredPathString}") | ||
|
||
assert(filteredPaths.size == 0, s"blobstore commit marker should not be written in " + | ||
s"${filteredPathString}") | ||
} | ||
} | ||
helper.myRun(s"check that blob store commit marker is not present" + | ||
s" when insert into partitioned table: ${partitionedTable}", code) | ||
|
||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters