话不多说。直接上代码咯。欢迎交流。
/**
* Created by whuscalaman on 1/7/16.
*/
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.mllib.classification.SVMWithSGD
import org.apache.spark.mllib.linalg.Vectors
import org.apache.spark.mllib.regression.LabeledPoint
object svmpredict {
def main(args: Array[String]) {
val conf = new SparkConf().setMaster("local[1]").setAppName("svmpredict")
val sc = new SparkContext(conf)
val data = sc.textFile("file:///root/spark-1.5.2-bin-hadoop2.6/data/mllib/sample_svm_data.txt")
val parsedData = data.map { line =>
val parts = line.split(" ")
// LabeledPoint(parts(0).toDouble,parts.tail.map(x=>x.toDouble).toArray)
LabeledPoint(parts(0).toDouble, Vectors.dense(parts(1).split(" ").map(x => x.toDouble)))
}
val numIterations = 20
val model = SVMWithSGD.train(parsedData, numIterations)
val labelAndPreds = parsedData.map { point =>
val prediction = model.predict(point.features)
(point.label, prediction)
}
val trainErr = labelAndPreds.filter(r => r._1 != r._2).count.toDouble / parsedData.count
println("Training Error = " + trainErr)
}
}
我的代码都 是在IDEA中写的。以本地化运行模式运行通过的,我的spark版本是最新的spark1.5