Comparison with depmixS4

Simple data set where depmix errors for nstates=2

x <- 1:4
if(requireNamespace("depmixS4")){
  model <- depmixS4::depmix(x ~ 1, nstates=2, ntimes=length(x))
  depmixS4::fit(model)
}
#> Loading required namespace: depmixS4
#> Error in fb(init = init, A = trDens, B = dens, ntimes = ntimes(object), : NA/NaN/Inf in foreign function call (arg 10)

Data set where nstates=5 errors

data("buggy.5states", package="plotHMM")
plot(buggy.5states$logratio)

if(requireNamespace("depmixS4")){
  model.spec <- depmixS4::depmix(
    logratio ~ 1, data=buggy.5states, nstates=5)
  set.seed(1)
  model.fit <- depmixS4::fit(model.spec)
}
#> Error in fb(init = init, A = trDens, B = dens, ntimes = ntimes(object), : NA/NaN/Inf in foreign function call (arg 10)

Fit several data sequences

if(
  require(data.table) && 
  requireNamespace("neuroblastoma") && 
  require(ggplot2) && 
  requireNamespace("depmixS4")
){
  data(neuroblastoma, package="neuroblastoma")
  nb.dt <- data.table(neuroblastoma$profiles)
  one.pro <- nb.dt[profile.id=="4" & chromosome%in%1:10]
  ntimes <- rle(as.integer(one.pro$chromosome))
  n.states <- 4
  model.spec <- depmixS4::depmix(
    logratio ~ 1, data=one.pro,
    nstates=n.states, ntimes=ntimes$lengths)
  set.seed(1)
  unconstrained.fit <- depmixS4::fit(model.spec)
  param.names <- c(mean="(Intercept)", sd="sd")
  par.vec <- depmixS4::getpars(unconstrained.fit)
  matrix(
    par.vec[names(par.vec) %in% param.names],
    ncol=length(param.names),
    byrow=TRUE,
    dimnames=list(state=1:n.states, parameter=names(param.names)))
  one.pro[, viterbi := factor(unconstrained.fit@posterior[,1]) ]
  ggplot()+
    geom_point(aes(
      position/1e6, logratio, color=viterbi),
      data=one.pro)+
    facet_grid(. ~ chromosome, scales="free", space="free")
}
#> Loading required package: data.table
#> Loading required namespace: neuroblastoma
#> Loading required package: ggplot2
#> converged at iteration 155 with logLik: 1332.267

How to constrain common variance parameter?

if(requireNamespace("depmixS4")){
  par.vec <- depmixS4::getpars(model.spec)
  equal.groups <- rep(1, length(par.vec))
  equal.groups[names(par.vec)=="sd"] <- 2
  if(FALSE){
    constrained.fit <- depmixS4::fit(model.spec, equal=equal.groups)
  }
}

Forward-backward algorithm speed comparison

if(requireNamespace("depmixS4")){
  one.chrom <- nb.dt[profile.id=="4" & chromosome=="2"]
  n.states <- 3
  model.spec <- depmixS4::depmix(
    logratio ~ 1,
    data=one.chrom,
    nstates=n.states)
  log.emission.mat <- log(model.spec@dens[,1,])
  log.transition.mat <- log(model.spec@trDens[1,,])
  log.init.vec <- log(model.spec@init[1,])
  microbenchmark::microbenchmark(depmixS4={
    result <- depmixS4::forwardbackward(model.spec)
  }, plotHMM={
    fwd.list <- plotHMM::forward_interface(
      log.emission.mat, log.transition.mat, log.init.vec)
    back.mat <- plotHMM::backward_interface(
      log.emission.mat, log.transition.mat)
    mult.mat <- plotHMM::multiply_interface(
      fwd.list$log_alpha, back.mat)
    pairwise.array <- plotHMM::pairwise_interface(
      log.emission.mat, log.transition.mat,
      fwd.list$log_alpha, back.mat)
  }, times=5)  
}
#> Unit: microseconds
#>      expr     min      lq      mean  median      uq       max neval
#>  depmixS4  86.000  91.511  110.3318 100.708 118.892   154.548     5
#>   plotHMM 150.461 154.378 8497.3434 157.344 157.554 41866.980     5

plotHMM is 2-3x slower than depmixS4. Possibly due to (1) overhead of several function calls rather than just one, and (2) log space computations are slower than scaling.

Viterbi algorithm speed comparison

if(requireNamespace("depmixS4")){
  microbenchmark::microbenchmark(depmixS4={
    depmixS4::viterbi(model.spec)
  }, plotHMM={
    plotHMM::viterbi_interface(
      log.emission.mat, log.transition.mat, log.init.vec)
  }, times=5)
}
#> Unit: microseconds
#>      expr      min       lq     mean   median       uq      max neval
#>  depmixS4 3048.195 3063.684 3115.681 3090.464 3143.873 3232.188     5
#>   plotHMM   15.719   23.754   42.918   30.016   67.286   77.815     5

plotHMM is about 100x faster than depmixS4, because of the overhead of loops in R (memory allocation in each iteration).