Advertisement
z7z7z7

Untitled

Jun 25th, 2024
88
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Scala 1.38 KB | None | 0 0
  1. import org.apache.spark.{SparkConf, SparkContext}
  2.  
  3. object GradeAnalysis {
  4.   def main(args: Array[String]): Unit = {
  5.     val conf = new SparkConf().setAppName("Grade Analysis").setMaster("local")
  6.     val sc = new SparkContext(conf)
  7.  
  8.     // 读取成绩文件
  9.     val hadoopGrades = sc.textFile("grade/Hadoop.txt")
  10.     val sparkGrades = sc.textFile("grade/Spark.txt")
  11.  
  12.     // 解析文件内容
  13.     val hadoopScores = hadoopGrades.map(line => {
  14.       val parts = line.split(" ")
  15.       (parts(0), parts(1).toInt)
  16.     })
  17.  
  18.     val sparkScores = sparkGrades.map(line => {
  19.       val parts = line.split(" ")
  20.       (parts(0), parts(1).toInt)
  21.     })
  22.  
  23.     // 合并两个RDD
  24.     val allScores = hadoopScores.union(sparkScores)
  25.  
  26.     // 计算每名学生的总成绩和次数
  27.     val studentScores = allScores.map {
  28.       case (student, score) => (student, (score, 1))
  29.     }
  30.  
  31.     val totalScores = studentScores.reduceByKey {
  32.       case ((score1, count1), (score2, count2)) => (score1 + score2, count1 + count2)
  33.     }
  34.  
  35.     // 计算平均成绩
  36.     val averageScores = totalScores.map {
  37.       case (student, (totalScore, count)) => (student, totalScore.toDouble / count)
  38.     }
  39.  
  40.     // 输出每名学生的平均成绩
  41.     println("Average Scores:")
  42.     averageScores.collect().foreach {
  43.       case (student, avgScore) => println(s"$student: $avgScore")
  44.     }
  45.  
  46.     sc.stop()
  47.   }
  48. }
  49.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement