Tranformer
import org.apache.spark.ml.feature.{VectorAssembler, Tokenizer} import org.apache.spark.sql.SQLContext import org.apache.spark.sql.functions.{rand, randn} import org.apache.spark.{SparkContext, SparkConf} import org.apache.spark.ml.classification.LogisticRegression import org.apache.spark.ml.param.ParamMap import org.apache.spark.mllib.linalg.Vectors // Tranformer Examples val sqlContext = new SQLContext(sc) val df = sqlContext.createDataFrame(Seq( (0, "Are new to Scala and want to work with other Scala developers"), (1, "Are experienced with Scala and want to mentor others"), (2, "Would like to offer a workshop or lightning talk.") )).toDF("label", "sentence") // Example of tokenizer val tokenizer = new Tokenizer().setInputCol("sentence").setOutputCol("words") val newDF = tokenizer.transform(df) newDF.show() // Example of vectorAssembler val randomDF = sqlContext.range(0, 10).select("id") .withColumn("uniform1", rand(10L)) .withColumn("uniform2", rand(11L)) .withColumn("normal", randn(10L)) val assembler = new VectorAssembler() .setInputCols(Array("uniform1", "uniform2", "normal")) .setOutputCol("features") val newRandomDF = assembler.transform(randomDF) newRandomDF.show() newRandomDF.select("id", "features").show() // Estimator Example val df = sqlContext.createDataFrame(Seq( (1.0, Vectors.dense(2.0, -1.0, 5.0)), (0.0, Vectors.dense(7.0, 9.0, 0.0)), (0.0, Vectors.dense(3.0, 4.0, 1.0)), (1.0, Vectors.dense(5.0, -3.0, 2.0)) )).toDF("label", "features") // define an estimator val lr = new LogisticRegression() //setRegParam regularization parameter that penalizes the predictors to avoid over-fitting lr.setMaxIter(10).setRegParam(0.01) val model = lr.fit(df) // output will have 3 new columns: rawPredictions(prediction interval), probabilities(associated probabilities // corresponding to prediction) intervals & prediction model.transform(df).show() // Using ParamMap val paramMap = ParamMap(lr.maxIter -> 20, lr.regParam -> 0.02) val model2 = lr.fit(df, paramMap) model2.transform(df).show()
0 Comments
Leave a Reply. |
Archives
October 2016
Categories
All
|