You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
The last part of the installation needs its own "mkdir lib". Now, at least on Ubuntu, Maven is part of the standard system, but sbt is not and I was off to a wild goose chase for a while to find a good way to install it. The first two methods I tried were defunct, finally I settled for the script https://raw.githubusercontent.com/paulp/sbt-extras/master/sbt , no doubt that script may disappear or become undesirable with time but it would be a courtesy to users of this module to point to a currently working method.
Also, the Spark script invocation needs to be corrected with the name of the example "ml-forest-example.scala" . I am not really a Spark user, just downloaded it for this purpose and got errors
scala> :load "../redis/spark-redis-ml/scripts/ml-forest-example.scala"
Loading ../redis/spark-redis-ml/scripts/ml-forest-example.scala...
import scala.collection.mutable
import scala.language.reflectiveCalls
import org.apache.spark.ml.{Pipeline, PipelineStage}
import org.apache.spark.ml.classification.{RandomForestClassificationModel, RandomForestClassifier}
import org.apache.spark.ml.feature.{StringIndexer, VectorIndexer}
import org.apache.spark.ml.linalg.Vector
import org.apache.spark.ml.regression.{RandomForestRegressionModel, RandomForestRegressor}
import org.apache.spark.ml.tree.{CategoricalSplit, ContinuousSplit, Split}
import org.apache.spark.mllib.util.MLUtils
import org.apache.spark.sql.{SparkSession, _}
import redis.clients.jedis.Protocol.Command
import redis.clients.jedis.{Jedis, _}
import com.redislabs.client.redisml.MLClient
import com.redislabs.provider.redis.ml.Forest
loadData: (spark: org.apache.spark.sql.SparkSession, path: String, format: String, expectedNumFeatures: Option[Int])org.apache.spark.sql.DataFrame
loadDatasets: (input: String, dataFormat: String, testInput: String, algo: String, fracTest: Double)(org.apache.spark.sql.DataFrame, org.apache.spark.sql.DataFrame)
defined class Params
params: Params = Params(file:///root/spark/data/mllib/sample_libsvm_data.txt,,libsvm,classification,5,32,1,0.0,10,auto,0.2,false,None,10)
algo: String = classification
RandomForestExample with parameters:
Params(file:///root/spark/data/mllib/sample_libsvm_data.txt,,libsvm,classification,5,32,1,0.0,10,auto,0.2,false,None,10)
org.apache.spark.sql.AnalysisException: Path does not exist: file:/root/spark/data/mllib/sample_libsvm_data.txt;
at org.apache.spark.sql.execution.datasources.DataSource$$anonfun$14.apply(DataSource.scala:382)
at org.apache.spark.sql.execution.datasources.DataSource$$anonfun$14.apply(DataSource.scala:370)
at scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:241)
at scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:241)
at scala.collection.immutable.List.foreach(List.scala:381)
at scala.collection.TraversableLike$class.flatMap(TraversableLike.scala:241)
at scala.collection.immutable.List.flatMap(List.scala:344)
at org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:370)
at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:152)
at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:135)
at loadData(:54)
at loadDatasets(:54)
... 76 elided
stages: scala.collection.mutable.ArrayBuffer[org.apache.spark.ml.PipelineStage] = ArrayBuffer()
labelColName: String = indexedLabel
res4: Any = ArrayBuffer(strIdx_348bb105c92b)
featuresIndexer: org.apache.spark.ml.feature.VectorIndexer = vecIdx_8176e0e50d19
res5: stages.type = ArrayBuffer(strIdx_348bb105c92b, vecIdx_8176e0e50d19)
dt: org.apache.spark.ml.classification.RandomForestClassifier = rfc_4bee75d8596f
res6: stages.type = ArrayBuffer(strIdx_348bb105c92b, vecIdx_8176e0e50d19, rfc_4bee75d8596f)
pipeline: org.apache.spark.ml.Pipeline = pipeline_b4782508197c
startTime: Long = 22579135088679
:45: error: not found: value training
val pipelineModel = pipeline.fit(training)
The text was updated successfully, but these errors were encountered:
The last part of the installation needs its own "mkdir lib". Now, at least on Ubuntu, Maven is part of the standard system, but sbt is not and I was off to a wild goose chase for a while to find a good way to install it. The first two methods I tried were defunct, finally I settled for the script https://raw.githubusercontent.com/paulp/sbt-extras/master/sbt , no doubt that script may disappear or become undesirable with time but it would be a courtesy to users of this module to point to a currently working method.
Also, the Spark script invocation needs to be corrected with the name of the example "ml-forest-example.scala" . I am not really a Spark user, just downloaded it for this purpose and got errors
scala> :load "../redis/spark-redis-ml/scripts/ml-forest-example.scala"
Loading ../redis/spark-redis-ml/scripts/ml-forest-example.scala...
import scala.collection.mutable
import scala.language.reflectiveCalls
import org.apache.spark.ml.{Pipeline, PipelineStage}
import org.apache.spark.ml.classification.{RandomForestClassificationModel, RandomForestClassifier}
import org.apache.spark.ml.feature.{StringIndexer, VectorIndexer}
import org.apache.spark.ml.linalg.Vector
import org.apache.spark.ml.regression.{RandomForestRegressionModel, RandomForestRegressor}
import org.apache.spark.ml.tree.{CategoricalSplit, ContinuousSplit, Split}
import org.apache.spark.mllib.util.MLUtils
import org.apache.spark.sql.{SparkSession, _}
import redis.clients.jedis.Protocol.Command
import redis.clients.jedis.{Jedis, _}
import com.redislabs.client.redisml.MLClient
import com.redislabs.provider.redis.ml.Forest
loadData: (spark: org.apache.spark.sql.SparkSession, path: String, format: String, expectedNumFeatures: Option[Int])org.apache.spark.sql.DataFrame
loadDatasets: (input: String, dataFormat: String, testInput: String, algo: String, fracTest: Double)(org.apache.spark.sql.DataFrame, org.apache.spark.sql.DataFrame)
defined class Params
params: Params = Params(file:///root/spark/data/mllib/sample_libsvm_data.txt,,libsvm,classification,5,32,1,0.0,10,auto,0.2,false,None,10)
algo: String = classification
RandomForestExample with parameters:
Params(file:///root/spark/data/mllib/sample_libsvm_data.txt,,libsvm,classification,5,32,1,0.0,10,auto,0.2,false,None,10)
org.apache.spark.sql.AnalysisException: Path does not exist: file:/root/spark/data/mllib/sample_libsvm_data.txt;
at org.apache.spark.sql.execution.datasources.DataSource$$anonfun$14.apply(DataSource.scala:382)
at org.apache.spark.sql.execution.datasources.DataSource$$anonfun$14.apply(DataSource.scala:370)
at scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:241)
at scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:241)
at scala.collection.immutable.List.foreach(List.scala:381)
at scala.collection.TraversableLike$class.flatMap(TraversableLike.scala:241)
at scala.collection.immutable.List.flatMap(List.scala:344)
at org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:370)
at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:152)
at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:135)
at loadData(:54)
at loadDatasets(:54)
... 76 elided
stages: scala.collection.mutable.ArrayBuffer[org.apache.spark.ml.PipelineStage] = ArrayBuffer()
labelColName: String = indexedLabel
res4: Any = ArrayBuffer(strIdx_348bb105c92b)
featuresIndexer: org.apache.spark.ml.feature.VectorIndexer = vecIdx_8176e0e50d19
res5: stages.type = ArrayBuffer(strIdx_348bb105c92b, vecIdx_8176e0e50d19)
dt: org.apache.spark.ml.classification.RandomForestClassifier = rfc_4bee75d8596f
res6: stages.type = ArrayBuffer(strIdx_348bb105c92b, vecIdx_8176e0e50d19, rfc_4bee75d8596f)
pipeline: org.apache.spark.ml.Pipeline = pipeline_b4782508197c
startTime: Long = 22579135088679
:45: error: not found: value training
val pipelineModel = pipeline.fit(training)
The text was updated successfully, but these errors were encountered: