SparkRDD轉(zhuǎn)DataFrame的兩種方式

SparkRDD轉(zhuǎn)DataFrame 映射的方式

package com.gofun.sparkSql

import org.apache.log4j.{Level, Logger}
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.sql.SQLContext

/**
  * Create by IntelliJ IDEA.
  * Author gofun
  * 2017/10/10 20:18
  */
object RDD2DataFrameReflection {
  Logger.getLogger("org.apache.spark").setLevel(Level.WARN)
  Logger.getLogger("org.apache.eclipse.jetty.server").setLevel(Level.OFF)

  def main(args: Array[String]): Unit = {
    rdd2DataFrame()
  }

  //rdd與dataframe轉(zhuǎn)換
  //在scala中使用反射方式,進行RDD到DataFrame轉(zhuǎn)換,需要手動導(dǎo)入
  def rdd2DataFrame(): Unit = {
    val conf = new SparkConf().setAppName("rdd2DataFrame").setMaster("local[2]")
    conf.set("hbase.zookeeper.quorum", "192.168.157.200:2181")
    val sc = new SparkContext(conf)
    val sqlContext = new SQLContext(sc)
    //    System.setProperty("hadoop.home.dir", "E:\\hadoop-2.5.0-cdh5.3.6")
    val lines = sc.textFile("hdfs://192.xxx.xxx.200:8020/test/student.txt")
    val students = lines.map(_.split("\t"))
      .map { line =>
        Student(line(0).trim().toInt, line(1).trim())
      }
    val studentDF = sqlContext.createDataFrame(students)
    studentDF.registerTempTable("studentTable")
    val df = sqlContext.sql("select * from studentTable")
    df.rdd.collect().foreach(println)
    sc.stop()
  }
}

case class Student(id: Int, name: String)

SparkRDD轉(zhuǎn)DataFrame 構(gòu)造元數(shù)據(jù)的方式

package com.gofun.sparkSql

import org.apache.log4j.{Level, Logger}
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.sql.{Row, RowFactory, SQLContext}
import org.apache.spark.sql.types._

/**
  * Create by IntelliJ IDEA.
  * Author gofun
  * 2017/10/10 20:19
  */
object RDD2DataFrameProgrammatically extends App {
  Logger.getLogger("org.apache.spark").setLevel(Level.WARN)
  Logger.getLogger("org.apache.eclipse.jetty.server").setLevel(Level.OFF)

  override def main(args: Array[String]): Unit = {
    rdd2DataFrame()
  }

  //構(gòu)造元數(shù)據(jù)的方式加載數(shù)據(jù)將rdd轉(zhuǎn)換為dataFrame
  def rdd2DataFrame(): Unit = {
    val conf = new SparkConf().setAppName("RDD2DataFrameProgrammatically").setMaster("local[2]")
    val sc = new SparkContext(conf)
    val sqlContext = new SQLContext(sc)
    val lines = sc.textFile("hdfs://192.xxx.xx.200:8020/test/student.txt")
    val studentRdd = lines.map(_.split(" ")).map(line => RowFactory.create(Integer.valueOf(line(0)), String.valueOf(line(1)), Integer.valueOf(line(2))))
    val fields = new scala.collection.mutable.ArrayBuffer[StructField]()
    fields += DataTypes.createStructField("id", DataTypes.IntegerType, true)
    fields += DataTypes.createStructField("name", DataTypes.StringType, true)
    fields += DataTypes.createStructField("age", DataTypes.IntegerType, true)
    val structType = DataTypes.createStructType(fields.toArray)
    val studentDF = sqlContext.createDataFrame(studentRdd, structType)
    studentDF.registerTempTable("student")
    val df = sqlContext.sql("select name from student")
    df.rdd.collect().foreach(println)
    sc.stop()
  }

  def rdd2DataFrame2(): Unit = {
    val conf = new SparkConf().setAppName("RDD2DataFrameProgrammatically").setMaster("local[2]")
    val sc = new SparkContext(conf)
    val sqlContext = new SQLContext(sc)
    val lines = sc.textFile("hdfs://192.168.64.200:8020/test/student.txt", 1)
    val studentRDD = lines.map(line => Row(line.split(" ")(0).toInt, line.split(" ")(1), line.split(" ")(2).toInt))
    val structType = StructType(Array(StructField("id", IntegerType, true), StructField("name", StringType, true), StructField("age", IntegerType, true)))
    val studentDF = sqlContext.createDataFrame(studentRDD, structType)
    studentDF.registerTempTable("student")
    val df = sqlContext.sql("select name,age from student")
    df.rdd.foreach(println)
  }

}
?著作權(quán)歸作者所有,轉(zhuǎn)載或內(nèi)容合作請聯(lián)系作者
【社區(qū)內(nèi)容提示】社區(qū)部分內(nèi)容疑似由AI輔助生成,瀏覽時請結(jié)合常識與多方信息審慎甄別。
平臺聲明:文章內(nèi)容(如有圖片或視頻亦包括在內(nèi))由作者上傳并發(fā)布,文章內(nèi)容僅代表作者本人觀點,簡書系信息發(fā)布平臺,僅提供信息存儲服務(wù)。

相關(guān)閱讀更多精彩內(nèi)容

友情鏈接更多精彩內(nèi)容