Normalizer
import org.apache.spark.ml.feature._ import org.apache.spark.sql.SQLContext import org.apache.spark.sql.functions.{rand, randn} import org.apache.spark.{SparkContext, SparkConf} val sqlContext = new SQLContext(sc) val randomDF = sqlContext.range(0, 10).select("id").withColumn("uniform1", rand(10L)).withColumn("uniform2", rand(11L)).withColumn("normal", randn(10L)) val assembler = new VectorAssembler().setInputCols(Array("uniform1", "uniform2", "normal")).setOutputCol("features") val newRandomDF = assembler.transform(randomDF) newRandomDF.show() newRandomDF.select("id", "features").show() // Example of normalizer val normalizer = new Normalizer().setInputCol("features").setOutputCol("scaledFeat").setP(1.0) normalizer.transform(newRandomDF.select("id", "features")).show() // Example Standard Scalar // create a StandardScalar model val standardScalar = new StandardScaler().setInputCol("features").setOutputCol("scaledFeat").setWithStd(true).setWithMean(true) // Since standardScalar is a model we use the fit and pass DF as argument val standardScalarFit = standardScalar.fit(newRandomDF.select("id", "features")) standardScalarFit.transform(newRandomDF.select("id", "features")).show() // Example of MinMaxScalar val minMaxScalar = new MinMaxScaler().setInputCol("features").setOutputCol("scaledFeat").setMin(1).setMax(3) val minMaxScalarFit = minMaxScalar.fit(newRandomDF.select("id", "features")) minMaxScalarFit.transform(newRandomDF.select("id", "features")).show()
0 Comments
Leave a Reply. |
Archives
October 2016
Categories
All
|