How to Convert a Spark DataFrame to Map in Scala
Hi Friends,
In this post, I am going to explain that how we can convert Spark DataFrame to Map.
Input DataFrame :
Output map :
Below is the explained code with all the steps along with its Output.
import org.apache.spark.SparkConf
import org.apache.spark.sql.{Row, SparkSession}
import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructType}
object ConvertDFToMap {
def main(args: Array[String]) {
//Creating SparkSession
lazy val conf = new SparkConf().setAppName("DataFrame-to-Map").set("spark.default.parallelism", "1")
.setIfMissing("spark.master", "local[*]")
lazy val sparkSession = SparkSession.builder().config(conf).getOrCreate()
// Creating Seq of Raw Data to create DataFrame to test
val rawData = Seq(
Row(1, "Anamika", "Uttar Pradesh"),
Row(2, "Ananya", "Delhi"),
Row(3, "Ambika", "Madhya Pradesh"),
Row(4, "Abhinav", "Rajasthan"),
Row(5, "Avinash", "Noida")
)
// Creating Schema to create DataFrame from Seq (rawData)
val Schema = List(
StructField("id", IntegerType, true),
StructField("name", StringType, true),
StructField("address", StringType, true)
)
// Creating DataFrame from above rawData and Schema
val rawDF = sparkSession.createDataFrame(
sparkSession.sparkContext.parallelize(rawData),
StructType(Schema))
// Printing DataFrame
rawDF.show(false)
// collecting the Data from DataFrame to creating Map
val collectedDF = rawDF.collect
// Converting DataFrame to Map using above collected Data
val convertDFToMap = collectedDF.map(row =>
row.getValuesMap(rawDF.columns).map(x => (x._1, x._2.toString)))
// Printing converted Map Data
convertDFToMap.foreach(println)
}
}
In this post, I am going to explain that how we can convert Spark DataFrame to Map.
Input DataFrame :
Output map :
Below is the explained code with all the steps along with its Output.
import org.apache.spark.SparkConf
import org.apache.spark.sql.{Row, SparkSession}
import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructType}
object ConvertDFToMap {
def main(args: Array[String]) {
//Creating SparkSession
lazy val conf = new SparkConf().setAppName("DataFrame-to-Map").set("spark.default.parallelism", "1")
.setIfMissing("spark.master", "local[*]")
lazy val sparkSession = SparkSession.builder().config(conf).getOrCreate()
// Creating Seq of Raw Data to create DataFrame to test
val rawData = Seq(
Row(1, "Anamika", "Uttar Pradesh"),
Row(2, "Ananya", "Delhi"),
Row(3, "Ambika", "Madhya Pradesh"),
Row(4, "Abhinav", "Rajasthan"),
Row(5, "Avinash", "Noida")
)
// Creating Schema to create DataFrame from Seq (rawData)
val Schema = List(
StructField("id", IntegerType, true),
StructField("name", StringType, true),
StructField("address", StringType, true)
)
// Creating DataFrame from above rawData and Schema
val rawDF = sparkSession.createDataFrame(
sparkSession.sparkContext.parallelize(rawData),
StructType(Schema))
// Printing DataFrame
rawDF.show(false)
// collecting the Data from DataFrame to creating Map
val collectedDF = rawDF.collect
// Converting DataFrame to Map using above collected Data
val convertDFToMap = collectedDF.map(row =>
row.getValuesMap(rawDF.columns).map(x => (x._1, x._2.toString)))
// Printing converted Map Data
convertDFToMap.foreach(println)
}
}
Output for the input and output steps :
I hope, this post was helpful, please do like, comment and share.
Thank You!
Comments
Post a Comment