木曜日, 12月 31, 2015

Scala - Sequenceマッチ

Scalaのmatch文は極めて強力で,そのうちの一つのリスト要素へのパターンマッチも便利なものの一つだろう.以下の例はMyCatへのコンストラクタマッチとリスト要素へのシーケンスパターンマッチの組み合わせの例.もしパターンマッチ内でジェネリック型も含めた型パターンマッチ等をしていて,かつJavaからそのscalaコードから作られたクラスを利用する場合は,Javaの型消去の特性に気を付けるように.


object Matcher {

  def main(args:Array[String]):Unit = {
    val nekoz = genCats()
    nekoz.foreach{ x =>
      x match {
        case MyCat(a,b) if a > 5 => println("Adult neko.")
        case _ => println("..")
      }
    }
    nekoz match {
      case l @ List(MyCat(8, _), _*) => println(s"okay, starting with 8 year old cat - $l")
      case _ => println("..")
   }

  }


  def genCats() = {
    MyCat(8, "Tora") :: MyCat(4, "Mikeyo") :: List()
  }

}

case class MyCat(age:Int, name:String)


水曜日, 12月 30, 2015

Scala - ワードカウント

SparkではreduceByKeyで簡単にワードカウントができるが,scalaではfoldLeftで実装出来る.

    val x = """kiji,saru,saru#saru,inu,kiji,saru#inu,kiji,kiji#kiji#inu,inu#saru"""
    val v = x.split("#").flatMap(_.split(",")).map(s => (s,1))
    val z = v.foldLeft(Map[String,Int]())((accm, elem) => {
      if(accm.contains(elem._1))
        accm.updated(elem._1, elem._2 + accm(elem._1))
      else
        accm + elem
    })

火曜日, 12月 29, 2015

scala - ディレクトリのファイル操作

妻のパソコンの,とあるフォルダにある画像ファイルのタイムスタンプが何故かおかしくなってしまった.ファイルの数が膨大ということもあり,scalaでさっとコードを書いて修正した.

import java.io.File
import java.time.{LocalDateTime, ZoneId}
import java.util.Date

import org.apache.commons.io.FileUtils

/**
  * Created by neko32 on 2015/12/28.
  */
class FileDateChanger(preProcessDir:String, postProcessDir:String) {

  implicit def dateToLDate(d:Date):LocalDateTime = {
    d.toInstant().atZone(ZoneId.systemDefault()).toLocalDateTime()
  }

  def process(): Unit = {
    clean()
    val files = new File(preProcessDir).listFiles().filter(f => dateToLDate(new Date(f.lastModified())).getYear() == 2007)
    val totalNum = files.size
    var at = 0
    println(s"Total number of files - ${totalNum}")
    files.foreach { f =>
      var dat = new Date(f.lastModified())
      var f2 = new File(postProcessDir + "\\" + f.getName())
      println(s"processing .. ${f.getName()}[${new Date(f.lastModified())}]")
      FileUtils.copyFile(f, f2)
      f2.setLastModified(dat.plusYears(8).atZone(ZoneId.systemDefault()).toInstant().toEpochMilli)
      println(s"to ${new Date(f2.lastModified())}")
      if(at % 10 == 0) println(s"processed ${at}")
      at += 1
    }
    println(s"done. # of processed files [${at}]")
  }

  private def clean(): Unit = {
    println(s"cleaning ${postProcessDir}")
    new File(postProcessDir).listFiles().foreach(_.delete())
    println("cleaning done.")
  }
}

object Runner {
  def main(args:Array[String]) = {
    val pre = "C:\\tmp\\pre_process"
    val post = "C:\\tmp\\post_process"
    new FileDateChanger(pre, post).process()
  }
}

日曜日, 12月 27, 2015

Scala - 乱数生成プロバイダを用いるstream


    val r = new Random(System.currentTimeMillis())
    def rand(max:Int):Stream[Int] = Stream.cons(r.nextInt(max), rand(max))
    rand(100) take 10 foreach println
    // assign index starting from 1. zipWithIndex()'s index starts from 0
    rand(50) take 5 zip(Stream from 1) foreach println

金曜日, 12月 25, 2015

Scala - Enumeration

ScalaでEnumerationを継承してenumを定義する例.

object Weekdays extends Enumeration {
  type WEEKDAY = Value
  val MON, TUE, WED = Value
}

object MyMain {

  def isMonday(w:Weekdays.Value) = {
    w == Weekdays.MON
  }

  def main(args:Array[String]) = {
    val w = Weekdays.MON
    val w2 = Weekdays.WED
    println(isMonday(w))
    println(isMonday(w2))
  }
}

木曜日, 12月 24, 2015

Scala - akka アクタ

Akka actorのサンプルメモ..

import akka.actor._

/**
  * Created by neko32 on 2015/12/23.
  */

sealed abstract trait Message
case class Request(a:Int, b:Int, op:Operator) extends Message
case class Response(a:Int, b:Int, op:Operator, result:Int) extends Message
case class ExecStart(cmd:String) extends Message
case class ExecEnd(a:Any) extends Message

sealed abstract class Operator(a:Int, b:Int) {
  def calc():Int
}
case class Plus(a:Int, b:Int) extends Operator(a,b) {
  override def calc() = a + b
  override def toString() = " + "
}
case class Minus(a:Int, b:Int) extends Operator(a,b) {
  override def calc() = a - b
  override def toString() = " - "
}
case class Multiply(a:Int, b:Int) extends Operator(a,b) {
  override def calc() = a * b
  override def toString() = " * "
}
case class Divide(a:Int, b:Int) extends Operator(a,b) {
  require(b != 0, "zero divide not allowed")
  override def calc() = a / b
  override def toString() = " / "
}

class CalcWorker extends Actor {
  override def receive = {
    case Request(a,b,op) if op.isInstanceOf[Plus] => println("sending.."); sender ! Response(a, b, op, a + b)
    case Request(a,b,op) if op.isInstanceOf[Minus] => sender ! Response(a, b, op, a - b)
    case Request(a,b,op) if op.isInstanceOf[Multiply] => sender ! Response(a, b, op, a * b)
    case Request(a,b,op) if op.isInstanceOf[Divide] => sender ! Response(a, b, op, a / b)
    case ExecEnd => context.stop(self)
  }
}

trait Requester extends Actor with ActorLogging {
  override def receive = {
    case Response(a, b, op, rez) =>
      log.info(s"got result from calc agent .. ${a}${op}${b} = ${rez}")
    case ExecStart(cmd) => run(cmd)
    case ExecEnd => context.stop(self)
    case _ => log.info("....")
  }
  def run(cmd:String)
}

class CalcRequester(calcAgent:ActorRef) extends Requester {

  def toOp(vals:Array[String]):Operator = vals(2) match {
    case s if s == "plus" => Plus(vals(0).toInt, vals(1).toInt)
    case s if s == "minus" => Minus(vals(0).toInt, vals(1).toInt)
    case s if s == "mult" => Multiply(vals(0).toInt, vals(1).toInt)
    case s if s == "div" => Divide(vals(0).toInt, vals(1).toInt)
  }

  override def run(cmd:String) = {
      // parse a,b
      assume(cmd.count(_ == ',') == 2)
      val vals = cmd.toLowerCase().split(",")
      println("parsed input - " + vals(0) + "," + vals(1) + "," + vals(2))
      val res = calcAgent ! Request(vals(0).toInt, vals(1).toInt, toOp(vals))
  }
}

object MyApp {
  def main(args:Array[String]) = {
    val sys = ActorSystem("CalcSys")
    val calcAgent = sys.actorOf(Props[CalcWorker], "CalcAgent")
    val calcReq = sys.actorOf(Props(new CalcRequester(calcAgent)), "CalcReq")
    calcReq ! ExecStart("3,8,plus")
    Thread.sleep(3000)
    calcReq ! ExecEnd
    calcAgent ! ExecEnd
    val f = sys.terminate()
  }
}


水曜日, 12月 23, 2015

Scala - Mixin, traitそしてselfによる型間依存関係の表現

Scalaのmixinとtraitそしてselfアノテーションを利用することにより,Javaと比較してより明快に型間の依存関係を宣言出来る.Javaではミックスインが出来ない為,クラス間の依存関係はあるクラスのprivate or protectedフィールドとして表現されていることがご想像いただけるかと思う.

case class Neko(name:String,pattern:String, age:Int)

trait CatManage {
  self: CatManage with DefNekoCheck =>
  var nekoz:List[Neko]
  def init():Unit
  def lookupCat(name:String):Neko
}

trait DefNekoCheck {
  def ? (neko:Neko) = neko != null
}

trait NekoCheck extends DefNekoCheck {
  override def ? (neko:Neko) = neko.name == "NA"
}

trait NekozManage extends CatManage with NekoCheck {

  override var nekoz = List[Neko]()

  override def init():Unit = {
    nekoz ::= Neko("Mikeyo", "Mike", 3)
    nekoz ::= Neko("Tora", "Chatora", 8)
    nekoz ::= Neko("Powder", "Mike", 8)
    nekoz ::= Neko("Machiko", "Mike", 10)
  }

  override def lookupCat(name:String):Neko = {
    val n = nekoz find(_.name == name)
    n.getOrElse(Neko("NA", "NA", -1))
  }
}

trait Act {
  def start(f: => List[String])
}

trait App {
  self: Act with CatManage =>
  override def start(execPlan: => List[String]) = {
    init
    val l = execPlan
    l.foreach{ name =>
      lookupCat(name) match {
        case x if x.name == "NA" => println(s"${name} not found..")
        case x => println(x)
      }
    }
  }
}

日曜日, 12月 20, 2015

Scala - visitorパターンのようなもの

ScalaではVisitorパターンを割合簡潔に書くことが出来る.以下の例はあるデータベース上で接続・検索を実行するモジュールで,この例では通常のRDB, HiveそしてインメモリDBをサポートしているとする.また,(乱暴ではあるが簡略のために)RDBとHiveserver2はコネクトとクエリを同一の手順,インメモリはコネクトを必要としないと仮に想定している.


package tanuneko

/**
  * Created by neko32 on 2015/12/20.
  */
sealed abstract class DB(host:String, userId:String, passwd:String)

case class RegularRDB(host:String, userId:String, passwd:String) extends DB(host, userId, passwd)

case class InMemoryDB(userId:String, passwd:String) extends DB(null, userId, passwd)

case class HiveDB(host:String, userId:String, passwd:String) extends DB(host, userId, passwd)

case class ResultSet(data:String)

trait DBAction {
  def connect():Unit = {}
  def runQuery(str:String):ResultSet
}

class RegularDBAction(host:String, userId:String, passwd:String) extends DBAction {
  override def connect(): Unit = {
    println(s"connecting to ${host} with user[${userId}]")
  }
  override def runQuery(sql: String): ResultSet = {
    println(s"send query[${sql} to Database[${host}]")
    new ResultSet("returned data.")
  }
}

class InMemoryDBAction(userId:String, passwd:String) extends DBAction {
  override def runQuery(sql: String): ResultSet = {
    println(s"[inmem] running query[${sql}]")
    new ResultSet("returned data.")
  }
}

object Runner {
  def main(args:Array[String]):Unit = {
      val userId = "neko"
      val passwd = "miPasswd150X"
      val host = "myhost.org:16500"
      val sql = "select * from mydb.mytbl"

      def runQueryAfterConnect = (db:DB) => db match {
        case r:RegularRDB => {
          val act = new RegularDBAction(r.host, r.userId, r.passwd)
          act.connect()
          println(s"result - ${act.runQuery(sql)}")
        }
        case i:InMemoryDB => {
          val act = new InMemoryDBAction(i.userId, i.passwd)
          println(s"[inmem] result - ${act.runQuery(sql)}")
        }
        case r:HiveDB => {
          val act = new RegularDBAction(r.host, r.userId, r.passwd)
          act.connect()
          println(s"result - ${act.runQuery(sql)}")
        }
      }

      val sybase = new RegularRDB(host, userId, passwd)
      runQueryAfterConnect(sybase)
      val inmem = new InMemoryDB(userId, passwd)
      runQueryAfterConnect(inmem)
      val hive = new HiveDB(host, userId, passwd)
      runQueryAfterConnect(hive)
  }
}

月曜日, 12月 14, 2015

scala - tailrecアノテーション

scalaではtailrecアノテーションを使って末尾再帰の最適化をすることができる.
関数は再起呼び出しで終わっている必要がある.
  @tailrec
  def fact(i:BigInt, accm:BigInt):BigInt = {
    i match {
      case _ if i == 1 => accm
      case _ => fact(i - 1, i * accm)
    }
  }

火曜日, 12月 01, 2015

Javaの関数プログラミングで偏差

暇つぶしに統計の基礎の動画を見ていたら,サンプルのコードを書きたくなったので,メモとして..
この例ではカンマ区切りの名前と点数 の行からなるデータを読み込んで,平均,分散,偏差,標準偏差そして偏差値を計算する.

package org.tanuneko;

import com.google.common.base.Function;
import com.google.common.base.MoreObjects;
import com.google.common.base.Preconditions;
import com.google.common.base.Splitter;
import com.google.common.collect.Collections2;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
import com.google.common.io.Files;
import com.google.common.io.LineProcessor;

import java.io.File;
import java.io.IOException;
import java.nio.charset.Charset;
import java.util.List;
import java.util.stream.Stream;

public class STDDev {

    public static void main(String args[]) throws Exception {
        new STDDev();
    }

    public STDDev() throws IOException {
        File scoreFile = new File(STDDev.class.getClassLoader().getResource("score.txt").getPath());
        ImmutableList<Record> rec = Files.asCharSource(scoreFile, Charset.defaultCharset())
                .readLines(new LineProcessor<ImmutableList<Record>>() {
                    List<Record> results = Lists.newArrayList();
                    @Override
                    public boolean processLine(String line) throws IOException {
                        List<String> elems = Splitter.on(",").splitToList(line);
                        Preconditions.checkState(elems.size() == 2);
                        results.add(new Record(elems.get(0), Integer.parseInt(elems.get(1))));
                        return true;
                    }

                    @Override
                    public ImmutableList<Record> getResult() {
                        return ImmutableList.copyOf(results);
                    }
                });

        int totalScore = rec.stream().mapToInt(x->x.getScore()).sum();
        double avg = totalScore / rec.size();
        rec.stream().forEach(x->x.setDev(x.getScore() - avg));
        double v = calcVariation(rec);
        double d = calcStddev(rec);

        System.out.println("total:" + totalScore + ", avg:" + avg);
        System.out.println("variation:" + v);
        System.out.println("stddev:" + d);
        System.out.println(checkDev(rec));

        rec.stream().forEach(x -> {
            x.setScoreDev(50 + 10 * (x.getDev()/d));
        });

        System.out.println(rec);
    }

    private double calcVariation(ImmutableList<Record> rec) {
        return rec.stream().mapToDouble(x->x.getDev())
                .reduce(0, (x,y)-> x + Math.pow(y, 2)) / rec.size();
    }

    private double calcStddev(ImmutableList<Record> rec) {
        return Math.sqrt(calcVariation(rec));
    }

    private boolean checkDev(ImmutableList<Record> rec) {
        double d = rec.stream().mapToDouble(x->x.getDev())
                .sum();
        System.out.println(d);
        return d == 0;
    }
}

class Record {
    private String name;
    private int score;
    private double dev;
    private double scoreDev;
    public Record(String name, int score) {
        this.name = name;
        this.score = score;
        dev = scoreDev = 0d;
    }

    public String getName() {
        return name;
    }

    @Override
    public String toString() {
        return MoreObjects.toStringHelper(this)
                .add("name", name)
                .add("score", score)
                .add("dev", dev)
                .add("score dev", scoreDev)
                .toString();
    }

    public int getScore() {
        return score;
    }

    public double getDev() {
        return dev;
    }

    public void setDev(double dev) {
        this.dev = dev;
    }

    public double getScoreDev() {
        return scoreDev;
    }

    public void setScoreDev(double scoreDev) {
        this.scoreDev = scoreDev;
    }
}