package tanuneko import java.io.StringWriter import com.opencsv.CSVWriter import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{Path, FileSystem} import org.apache.spark.{SparkConf, SparkContext} import scala.collection.JavaConverters._ /** * Created by neko32 on 2016/01/11. */ object CSVWrite { case class Neko(name:String, age:Int) def main(args:Array[String]):Unit = { val conf = new SparkConf().setMaster("local").setAppName("WriteJson") val sc = new SparkContext(conf) val hadoopConf = new Configuration val fs = FileSystem.get(hadoopConf) val outFile = "/user/neko32/spark/study4/csvwrite" val nekoz = sc.parallelize(List(Neko("Tora", 8), Neko("Mikeyo", 4))) val stred = nekoz.map(n => List(n.age.toString, n.name).toArray) val nekoz2 = stred.mapPartitions { n => val writer = new StringWriter val csvWriter = new CSVWriter(writer) csvWriter.writeAll(n.toList.asJava) Iterator(writer.toString) }.cache nekoz2 foreach println if(fs.exists(new Path(outFile))) { fs.delete(new Path(outFile), true) } nekoz2 saveAsTextFile(outFile) } }
火曜日, 1月 12, 2016
Spark - CSVファイルの書き込み
以下の例はRDDの内容(Neko case class)をCSVファイルとしてHDFSに書き込む例.
登録:
コメントの投稿 (Atom)
0 件のコメント:
コメントを投稿