Skip to content

Commit

Permalink
mapOncePerWorker will no longer ignore failed execution
Browse files Browse the repository at this point in the history
behaviour is verified by 1 test case

re-enable some commented tests

minor refactoring to integration tests

remove the useless SnapshotRunner.scala

showcase is now moved into parent to use its scala toolchain, all submodules & gradle config are simplified

fix savePages, now guarded by 3 unit tests

remove PartialFunctionWrapper.scala as the serialization problem is gone
  • Loading branch information
tribbloid committed Oct 23, 2023
1 parent 67995e6 commit 698b295
Show file tree
Hide file tree
Showing 99 changed files with 885 additions and 558 deletions.
6 changes: 3 additions & 3 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
[submodule "spookystuff-showcase"]
path = spookystuff-showcase
url = https://github.com/tribbloid/spookystuff-showcase
[submodule "parent/showcase"]
path = parent/showcase
url = https://github.com/tribbloid/spookystuff-showcase
35 changes: 11 additions & 24 deletions build.gradle.kts
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@

import com.github.benmanes.gradle.versions.updates.DependencyUpdatesTask
import org.gradle.api.specs.Spec

val vs = versions()

buildscript {
Expand All @@ -11,6 +15,12 @@ buildscript {
// }
}

tasks.named<DependencyUpdatesTask>("dependencyUpdates").configure {
filterConfigurations = Spec<Configuration> {
!it.name.startsWith("incrementalScalaAnalysis")
}
}

plugins {
// base
`java-library`
Expand All @@ -26,7 +36,7 @@ plugins {
id("io.github.gradle-nexus.publish-plugin") version "1.3.0"

// TODO: DO NOT upgrade until it is solved: https://github.com/ben-manes/gradle-versions-plugin/issues/727
id("com.github.ben-manes.versions") version "0.44.0"
id("com.github.ben-manes.versions") version "0.49.0"
id("project-report")

id("com.github.johnrengelman.shadow") version "8.1.1"
Expand Down Expand Up @@ -70,8 +80,6 @@ allprojects {
mavenCentral()
mavenLocal()
// jcenter()
// maven("https://dl.bintray.com/kotlin/kotlin-dev")
// maven("https://scala-ci.typesafe.com/artifactory/scala-integration/") // scala SNAPSHOT
}

idea {
Expand Down Expand Up @@ -111,18 +119,6 @@ subprojects {

// apply(plugin = "ru.tinkoff.gradle.jarjar")

// resolving version conflicts
// TODO: remove, already defined in `constraints` as below
// configurations.all {
// resolutionStrategy.dependencySubstitution {
// substitute(
// module("com.chuusai:shapeless_${vs.scalaBinaryV}")
// ).apply {
// using(module("com.chuusai:shapeless_${vs.scalaBinaryV}:${vs.shapelessV}"))
// }
// }
// }

// https://stackoverflow.com/questions/23261075/compiling-scala-before-alongside-java-with-gradle

task("dependencyTree") {
Expand All @@ -132,15 +128,6 @@ subprojects {

dependencies {

// see https://github.com/gradle/gradle/issues/13067
fun both(notation: Any) {
implementation(notation)
testFixturesImplementation(notation)
}

// both("${vs.scala.group}:scala-compiler:${vs.scala.v}")
both("${vs.scala.group}:scala-library:${vs.scala.v}")
both("${vs.scala.group}:scala-reflect:${vs.scala.v}")
}

tasks {
Expand Down
Empty file modified dev/CI-apache-local.sh
100644 → 100755
Empty file.
2 changes: 2 additions & 0 deletions dev/CI-apache-stable.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

# TODO: this file is merely kept for backward compatibility



CRDIR="$(cd "`dirname "$0"`"; pwd)"

"$CRDIR"/CI/main.sh apache-stable "${@}"
4 changes: 4 additions & 0 deletions dev/CI/pipeline.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@

FWDIR="$(cd "`dirname "$0"`"/..; pwd)"

echo "[ENV]"
echo "SPARK_HOME=${SPARK_HOME}"
echo "SPARK_SCALA_VERSION=${SPARK_SCALA_VERSION}"

echo "[COMPILING]" && \
"${FWDIR}"/make-all.sh "${@}" && \
echo "[RUNNING TESTS]" && \
Expand Down
6 changes: 5 additions & 1 deletion gradle.properties
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,9 @@ noUav

sparkVersion=3.5.0

#splainVersion=1.1.0-SNAPSHOT
#splainVersion=1.0.3

org.gradle.parallel=true
org.gradle.caching=true
#org.gradle.caching=true
#org.gradle.daemon=true
16 changes: 15 additions & 1 deletion parent/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ plugins {
subprojects {

apply(plugin = "scala")
apply(plugin = "io.github.cosmicsilence.scalafix")

configurations.all {

Expand Down Expand Up @@ -148,6 +147,17 @@ subprojects {
}
}

apply(plugin = "io.github.cosmicsilence.scalafix")
scalafix {
// configFile = file("config/myscalafix.conf")
// includes = ["/com/**/*.scala"]
// excludes = ["**/generated/**"]
// ignoreSourceSets = ["scoverage"]

semanticdb.autoConfigure.set(true)
semanticdb.version.set("4.8.11")
}

dependencies {

constraints {
Expand Down Expand Up @@ -178,6 +188,10 @@ subprojects {
testFixturesImplementation(notation)
}

// both("${vs.scala.group}:scala-compiler:${vs.scala.v}")
both("${vs.scala.group}:scala-library:${vs.scala.v}")
both("${vs.scala.group}:scala-reflect:${vs.scala.v}")

both("org.apache.spark:spark-sql_${vs.scala.binaryV}:${vs.sparkV}")
both("org.apache.spark:spark-mllib_${vs.scala.binaryV}:${vs.sparkV}")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ object Const extends CommonConst {

val defaultJoinField: Field = Field("A", isWeak = true)

val mimeDetector: DefaultDetector = new DefaultDetector()
val tikaDetector: DefaultDetector = new DefaultDetector()

val defaultDocumentFilter: DocFilterImpl.MustHaveTitle.type = DocFilterImpl.MustHaveTitle
val defaultImageFilter: DocFilterImpl.AcceptStatusCode2XX.type = DocFilterImpl.AcceptStatusCode2XX
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import com.tribbloids.spookystuff.execution.ExplorePlan.ExeID
import com.tribbloids.spookystuff.execution.{ExploreRunner, NodeKey}
import com.tribbloids.spookystuff.row.{DataRow, RowReducer}
import com.tribbloids.spookystuff.utils.Caching
import com.tribbloids.spookystuff.utils.Caching.{ConcurrentCache, ConcurrentMap, ConcurrentSet}
import com.tribbloids.spookystuff.utils.Caching.{ConcurrentMap, ConcurrentSet}

/**
* Singleton, always in the JVM and shared by all executors on the same machine This is a makeshift implementation,
Expand All @@ -14,7 +14,8 @@ object ExploreRunnerCache {

// (NodeKey, ExecutionID) -> Squashed Rows
// exeID is used to segment Squashed Rows from different jobs
val committedVisited: ConcurrentCache[(NodeKey, ExeID), Iterable[DataRow]] = Caching.ConcurrentCache()
val committedVisited: Caching.ConcurrentCache[(NodeKey, ExeID), Iterable[DataRow]] =
Caching.ConcurrentCache()

val onGoings: ConcurrentMap[ExeID, ConcurrentSet[ExploreRunner]] =
Caching.ConcurrentMap() // executionID -> running ExploreStateView
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,13 @@ import com.tribbloids.spookystuff.SpookyContext
import com.tribbloids.spookystuff.actions.{Trace, TraceView}
import com.tribbloids.spookystuff.doc.DocOption
import com.tribbloids.spookystuff.utils.Caching
import com.tribbloids.spookystuff.utils.Caching.ConcurrentCache

/**
* Backed by a WeakHashMap, the web cache temporarily store all trace -> Array[Page] until next GC. Always enabled
*/
object InMemoryDocCache extends AbstractDocCache {

val internal: ConcurrentCache[Trace, Seq[DocOption]] = Caching.ConcurrentCache()
val internal: Caching.ConcurrentCache[Trace, Seq[DocOption]] = Caching.ConcurrentCache()

def cacheable(v: Seq[DocOption]): Boolean = {
v.exists(v => v.cacheLevel.isInstanceOf[DocCacheLevel.InMemory])
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,4 +80,14 @@ case class SpookyConf(
earliestTimeFromDuration
}
}

def previewMode: this.type = {

val sampler: Samplers.FirstN = Samplers.FirstN(1)
this.defaultJoinSampler = sampler
this.defaultJoinSampler = sampler
this.defaultExploreRange = 0 to 2

this
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ case class Doc(
metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, uri.substring(slash + 1))
val stream = TikaInputStream.get(raw, metadata)
try {
val mediaType = Const.mimeDetector.detect(stream, metadata)
val mediaType = Const.tikaDetector.detect(stream, metadata)
// val mimeType = mediaType.getBaseType.toString
// val charset = new CharsetDetector().getString(content, null)
// ContentType.create(mimeType, charset)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,29 +8,27 @@ import org.apache.tika.sax.ToXMLContentHandler
import org.jsoup.Jsoup
import org.jsoup.nodes.Element

import scala.collection.JavaConverters

/**
* Created by peng on 11/30/14.
*/
object HtmlElement {

def apply(html: String, uri: String): HtmlElement = new HtmlElement(null, html, None, uri)

def breadcrumb(e: Element): Seq[String] = {

import JavaConverters._
import scala.jdk.CollectionConverters._

e.parents().asScala.toSeq.map(_.tagName()).reverse :+ e.tagName()
}

def apply(html: String, uri: String): HtmlElement = new HtmlElement(null, html, None, uri)

def fromBytes(content: Array[Byte], charSet: String, mimeType: String, uri: String): HtmlElement = {

val handler = new ToXMLContentHandler()

val metadata = new Metadata()
val stream = TikaInputStream.get(content, metadata)
val html =
val html: CSSQuery =
try {
metadata.set(HttpHeaders.CONTENT_ENCODING, charSet)
metadata.set(HttpHeaders.CONTENT_TYPE, mimeType)
Expand All @@ -52,7 +50,7 @@ class HtmlElement private (
override val uri: String
) extends Unstructured {

import JavaConverters._
import scala.jdk.CollectionConverters._

// constructor for HtmlElement returned by .children()
private def this(_parsed: Element) = this(
Expand Down
Loading

0 comments on commit 698b295

Please sign in to comment.