HDFS上のCSV風テキストを最終的に主キー - 非キー別タプルに変換する例.
// read input file separated by comma
// suppose this input file consists of 4 cols (tradeid, version, instrument and trader name)
val myrdd = sc.textFile("/user/neko32/spark/study1/contract.txt")
// transform string to list
val lines = myrdd.map(x => x.split(",").toList)
// transform line list to tuple
val myIn = lines.map(x => (x(0), x(1), x(2), x(3)))
// transform line as tupleN to key-val pairs
val byKey = myIn.map{case (c,v,i,n) => (c,v) -> (i,n)}
// reduce by key
val reducedByKey = byKey.reduceByKey{ case (x,y) => (x._1 + y._1, x._2 + y._2) }
0 件のコメント:
コメントを投稿