Motivation

Would go and/or scala be useful for statistical computing?

Go is relatively new, is a compiled language that is supposedly fast and easy to write. Go supports concurrency, which is a powerful concept and makes it easy to run things in parallel.

Scala is object-oriented and functional. Can we write fast code that runs anywhere that there’s a Java runtime in the functional style of R? There’s at least one vocal proponent of scala for statistical computing.

The Task

The task is to integrate the standard normal probability density function using uniform rejection sampling. The idea is to repeatedly take samples uniformly from the rectangle \((-5, 5) \times (0, 0.5)\) and count the ones that fall below the normal density as successes. The proportion of successes times the area of the rectangle will give us the area under the normal density:

\[ \int_{-\infty}^\infty f(x)\, dx \approx 5 \frac{1}{B} \sum_{i = 1}^B 1[U_{yi} < f(U_{xi})], \]

where \(f(x) = \frac{exp(x^2/2)}{\sqrt{2\pi}}\), \(U_y \sim Unif(0, 0.5)\), \(U_x \sim Unif(-5, 5)\), and \(B\) is a fixed, large number.

The Code

R Vectorized

intpdf <- function(N){

  candx <- runif(N, -5, 5)
  candy <- runif(N, 0, .5)

  5.0 * mean(candy < dnorm(candx)) 

}

Go

package main

import (
    "fmt"
    "math"
    "math/rand"
    "os"
    "strconv"
    "time"
)

func Dnorm(x float64) float64 {

    return math.Exp(-x*x/2) / math.Sqrt(2*math.Pi)

}

func main() {

    rand.Seed(time.Now().UTC().UnixNano())

    N := 0
    if len(os.Args) == 1 {

        N = 5000

    } else {

        Na, err := strconv.Atoi(os.Args[1])
        if err != nil {
            panic(err)
        }

        N = Na

    }

    res := float64(0.0)
    for i := 0; i < N; i++ {

        func(i int) {
            candx := rand.Float64()*10 - 5
            candy := rand.Float64() * .5

            if candy <= Dnorm(candx) {
                res = res + 1
            }
        }(i)
    }

    // the rectangle is 10 by 0.5 = 5.0

    fmt.Fprintf(os.Stdout, "%f8\n", 5.0*res/float64(N))

}

Go with Concurrency

Like I said, go is trivial to parallelize. Spot the differences between this and the previous code (2 lines)!

package main

import (
    "fmt"
    "math"
    "math/rand"
    "os"
    "strconv"
    "time"
    "runtime"
)

func Dnorm(x float64) float64 {

    return math.Exp(-x*x/2) / math.Sqrt(2*math.Pi)

}

func main() {

  runtime.GOMAXPROCS(runtime.NumCPU())

    rand.Seed(time.Now().UTC().UnixNano())

    N := 0
    if len(os.Args) == 1 {

        N = 5000

    } else {

        Na, err := strconv.Atoi(os.Args[1])
        if err != nil {
            panic(err)
        }

        N = Na

    }

    res := float64(0.0)
    for i := 0; i < N; i++ {

        go func(i int) {
            candx := rand.Float64()*10 - 5
            candy := rand.Float64() * .5

            if candy <= Dnorm(candx) {
                res = res + 1
            }
        }(i)
    }

    // the rectangle is 10 by 0.5 = 5.0

    fmt.Fprintf(os.Stdout, "%f8\n", 5.0*res/float64(N))

}

Scala

import math._

object IntPDF {

    def dnorm(x: Double) = {

        exp(-x * x / 2)/sqrt(2 * 3.14159)

    }

    def main(args: Array[String]) {

        var N = args(0).toInt
        var Res = 0

        for(i <- 0 to N) {

            var candx = random * 10 - 5
            var candy = random * .5
            if(candy < dnorm(candx)) {
                Res = Res + 1
            }

        }

        var out = 5.0 * Res / N
        println(out.toString)

    }
}

Rcpp

#include <Rcpp.h>
#include <stdlib.h>
#include <math.h>
using namespace Rcpp;

// [[Rcpp::export]]
float cintpdf(int n) {

  double res = 0;
  float cx = 0;
  float cy = 0;
  float dens = 0;

  srand (time(NULL));

  double nd = (double) n;

  for(int i = 0; i < n; i++){

    cx = (float) rand()/RAND_MAX * 10 - 5;
    cy = (float) rand()/RAND_MAX * .5;

    dens = (float) exp(-cx * cx / 2.0)/sqrt(2.0 * 3.14159);

    if(cy < dens){
      res = res + 1/nd;
    }

  }

  float out =  (float) 5.0 * res;
  return out;

}

Timing Results

Let’s define some R functions that call the compiled versions of these and confirm that they give the same answer (up to Monte Carlo error).

library(microbenchmark)

intpdf <- function(N){

  candx <- runif(N, -5, 5)
  candy <- runif(N, 0, .5)

  5.0 * sum(candy < dnorm(candx)) / N

}

# load cintpdf

Rcpp::sourceCpp("intpdf.cpp")

# go

gintpdf <- function(N){

  retstr <- system2("/Users/sachsmc/go/bin/intpdf",
                    args = format(N, scientific = FALSE), stdout = TRUE)
  as.numeric(retstr)

}

# go concurrent

gcintpdf <- function(N){

  retstr <- system2("/Users/sachsmc/go/bin/intpdf-concurrent",
                    args = format(N, scientific = FALSE), stdout = TRUE)
  as.numeric(retstr)

}

# scala

sintpdf <- function(N){

  retstr <- system2("/Users/sachsmc/scala-2.11.6/bin/scala",
                    args = c("-classpath", "/Users/sachsmc/scala/intpdf/", "IntPDF",
                             format(N, scientific = FALSE)), stdout = TRUE)
  as.numeric(retstr)

}
N <- 1e5

intpdf(N)
## [1] 1.0082
cintpdf(N)
## [1] 0.99765
gintpdf(N)
## [1] 1.001501
gcintpdf(N)
## [1] 0.8722508
sintpdf(N)
## [1] 1.0086
timeit <- function(N, ...){
  
  knitr::kable(summary(microbenchmark(intpdf(N),
               cintpdf(N),
               gintpdf(N),
               gcintpdf(N),
               sintpdf(N), ...)), digits = 0)

  
}

Now for some timings:

timeit(1e5)
expr min lq mean median uq max neval cld
intpdf(N) 7 9 10 9 10 61 100 ab
cintpdf(N) 2 3 3 3 3 4 100 a
gintpdf(N) 16 18 20 19 21 38 100 b
gcintpdf(N) 56 62 74 65 68 770 100 c
sintpdf(N) 278 312 340 325 347 664 100 d
timeit(1e6, times = 10L)
expr min lq mean median uq max neval cld
intpdf(N) 84 92 131 132 143 256 10 b
cintpdf(N) 26 27 28 28 28 31 10 a
gintpdf(N) 101 113 120 117 128 163 10 b
gcintpdf(N) 498 507 527 517 522 607 10 d
sintpdf(N) 372 405 461 428 556 583 10 c
timeit(1e7, times = 5L)
expr min lq mean median uq max neval cld
intpdf(N) 927 957 1021 970 974 1275 5 b
cintpdf(N) 269 273 274 274 276 277 5 a
gintpdf(N) 1033 1081 1090 1087 1109 1138 5 b
gcintpdf(N) 4854 4892 4927 4912 4981 4995 5 d
sintpdf(N) 1182 1205 1287 1226 1336 1488 5 c

Conclusions