KMenas演算法比較簡單,不詳細介紹了,直接上代碼。 運行結果: ...
KMenas演算法比較簡單,不詳細介紹了,直接上代碼。
import org.apache.log4j.{Level, Logger} import org.apache.spark.{SparkConf, SparkContext} import org.apache.spark.mllib.linalg.Vectors import org.apache.spark.mllib.clustering._ /** * Created by Administrator on 2017/7/11. */ object Kmenas { def main(args:Array[String]): Unit ={ // 設置運行環境 val conf = new SparkConf().setAppName("KMeans Test") .setMaster("spark://master:7077").setJars(Seq("E:\\Intellij\\Projects\\MachineLearning\\MachineLearning.jar")) val sc = new SparkContext(conf) Logger.getRootLogger.setLevel(Level.WARN) // 讀取樣本數據並解析 val data = sc.textFile("hdfs://master:9000/ml/data/kmeans_data.txt") val parsedData = data.map(s => Vectors.dense(s.split(' ').map(_.toDouble))).cache() // 新建KMeans聚類模型並訓練 val initMode = "k-means||" val numClusters = 2 val numIterations = 500 val model = new KMeans(). setInitializationMode(initMode). setK(numClusters). setMaxIterations(numIterations). run(parsedData) val centers = model.clusterCenters println("Centers:") for (i <- 0 to centers.length - 1) { println(centers(i)(0) + "\t" + centers(i)(1)) } // 誤差計算 val Error = model.computeCost(parsedData) println("Errors = " + Error) } }
運行結果: