Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import org.apache.spark.{SparkConf, SparkContext}
- object GradeAnalysis {
- def main(args: Array[String]): Unit = {
- val conf = new SparkConf().setAppName("Grade Analysis").setMaster("local")
- val sc = new SparkContext(conf)
- // 读取成绩文件
- val hadoopGrades = sc.textFile("grade/Hadoop.txt")
- val sparkGrades = sc.textFile("grade/Spark.txt")
- // 解析文件内容
- val hadoopScores = hadoopGrades.map(line => {
- val parts = line.split(" ")
- (parts(0), parts(1).toInt)
- })
- val sparkScores = sparkGrades.map(line => {
- val parts = line.split(" ")
- (parts(0), parts(1).toInt)
- })
- // 合并两个RDD
- val allScores = hadoopScores.union(sparkScores)
- // 计算每名学生的总成绩和次数
- val studentScores = allScores.map {
- case (student, score) => (student, (score, 1))
- }
- val totalScores = studentScores.reduceByKey {
- case ((score1, count1), (score2, count2)) => (score1 + score2, count1 + count2)
- }
- // 计算平均成绩
- val averageScores = totalScores.map {
- case (student, (totalScore, count)) => (student, totalScore.toDouble / count)
- }
- // 输出每名学生的平均成绩
- println("Average Scores:")
- averageScores.collect().foreach {
- case (student, avgScore) => println(s"$student: $avgScore")
- }
- sc.stop()
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement