Title: | Asymptotic Timing |
---|---|
Description: | Computing and visualizing comparative asymptotic timings of different algorithms and code versions. Also includes functionality for comparing empirical timings with expected references such as linear or quadratic, <https://en.wikipedia.org/wiki/Asymptotic_computational_complexity> Also includes functionality for measuring asymptotic memory and other quantities. |
Authors: | Toby Hocking [aut, cre] |
Maintainer: | Toby Hocking <[email protected]> |
License: | GPL-3 |
Version: | 2024.11.19 |
Built: | 2024-11-19 15:40:26 UTC |
Source: | https://github.com/tdhock/atime |
Computation time and memory for several R expressions of several different data sizes.
atime( N, setup, expr.list=NULL, times=10, seconds.limit=0.01, verbose=FALSE, result=FALSE, N.env.parent=NULL, ...)
atime( N, setup, expr.list=NULL, times=10, seconds.limit=0.01, verbose=FALSE, result=FALSE, N.env.parent=NULL, ...)
N |
numeric vector of at least two data sizes, default is |
setup |
expression to evaluate for every data size, before timings. |
expr.list |
named list of expressions to time. |
times |
number of times to evaluate each timed expression. |
seconds.limit |
if the median timing of any expression exceeds this many seconds, then no timings for larger N are computed. |
verbose |
logical, print messages after every data size? |
result |
logical: save the result of evaluating each expression? Or a function to compute a result, given the value obtained after evaluating each expression. If each result is a data frame with one row, then the numeric column names will be saved as more units to analyze (in addition to kilobytes and seconds). |
N.env.parent |
environment to use as parent of environment created for each data size N, or NULL to use default parent env. |
... |
named expressions to time. |
Each iteration involves first computing the setup expression,
and then computing several times the ... expressions. For
convenience, expressions may be specified either via code (...) or
data (expr.list
arg).
list of class atime with elements unit.col.vec
(character
vector of column names to analyze), seconds.limit
(numeric
input param), measurements
(data table of results).
Toby Dylan Hocking
atime_grid
for avoiding repetition when measuring
asymptotic properties of several similar expressions.
## Polynomial and exponential time string functions. atime_result_string <- atime::atime( seconds.limit=0.001, N=unique(as.integer(10^seq(0,3,l=100))), setup={ subject <- paste(rep("a", N), collapse="") pattern <- paste(rep(c("a?", "a"), each=N), collapse="") linear_size_replacement <- paste(rep("REPLACEMENT", N), collapse="") }, PCRE.match=regexpr(pattern, subject, perl=TRUE), TRE.match=regexpr(pattern, subject, perl=FALSE), constant.replacement=gsub("a","constant size replacement",subject), linear.replacement=gsub("a",linear_size_replacement,subject)) plot(atime_result_string)
## Polynomial and exponential time string functions. atime_result_string <- atime::atime( seconds.limit=0.001, N=unique(as.integer(10^seq(0,3,l=100))), setup={ subject <- paste(rep("a", N), collapse="") pattern <- paste(rep(c("a?", "a"), each=N), collapse="") linear_size_replacement <- paste(rep("REPLACEMENT", N), collapse="") }, PCRE.match=regexpr(pattern, subject, perl=TRUE), TRE.match=regexpr(pattern, subject, perl=FALSE), constant.replacement=gsub("a","constant size replacement",subject), linear.replacement=gsub("a",linear_size_replacement,subject)) plot(atime_result_string)
Create expressions for asymptotic timing by substituting values into expressions.
atime_grid( param.list = list(), ..., name.value.sep="=", expr.param.sep=" ", collapse = ",", symbol.params=character())
atime_grid( param.list = list(), ..., name.value.sep="=", expr.param.sep=" ", collapse = ",", symbol.params=character())
param.list |
Named list of items to replace in ... expressions, default empty list means nothing to replace. |
... |
Named expressions which each must contain each name of
|
name.value.sep |
string: separator between names and values from |
expr.param.sep |
string: separator between expressions and parameters, default
|
collapse |
string: separator between parameters, default |
symbol.params |
character vector: these elements of |
Named list of expressions which can be used as expr.list
argument of atime
.
Toby Dylan Hocking
## Example 0: with no param.list, same as quote inside named list. atime::atime_grid(m=mean(data), s=sum(data)) list(m=quote(mean(data)), s=quote(sum(data))) ## Example 1: polynomial vs exponential time regex. (expr.list <- atime::atime_grid( list(PERL=c(TRUE, FALSE)), expr.param.sep="\n", regexpr=regexpr(pattern, subject, perl=PERL))) atime.list <- atime::atime( seconds.limit=0.001, N=unique(as.integer(10^seq(0,2,l=30))), setup={ subject <- paste(rep("a", N), collapse="") pattern <- paste(rep(c("a?", "a"), each=N), collapse="") }, expr.list=expr.list) plot(atime.list) ## Example 2: symbol.params arg. sub.param.list <- list(FUN=c("sub","gsub"), PERL=c(TRUE,FALSE)) ## with base R we can use as.symbol and substitute: sapply(sub.param.list$FUN,function(name)substitute(fun("a","",subject), list(fun=as.symbol(name)))) ## with atime_grid the analog is to use symbol.params argument: (sub.expr.list <- atime::atime_grid( sub.param.list, replace=FUN("a","",subject,perl=PERL), symbol.params="FUN")) sub.atime.list <- atime::atime( seconds.limit=0.001, setup={ subject <- paste(rep("a",N),collapse="") }, expr.list=sub.expr.list) plot(sub.atime.list) ## Customize ggplot, which shows expr.name column as direct labels. sub.atime.edited <- sub.atime.list library(data.table) sub.atime.edited$measurements <- data.table(sub.atime.list$measurements)[ , expr.name := paste0("PERL=",PERL)] if(require(ggplot2)){ plot(sub.atime.edited)+ facet_grid(unit ~ FUN, labeller=label_both) }
## Example 0: with no param.list, same as quote inside named list. atime::atime_grid(m=mean(data), s=sum(data)) list(m=quote(mean(data)), s=quote(sum(data))) ## Example 1: polynomial vs exponential time regex. (expr.list <- atime::atime_grid( list(PERL=c(TRUE, FALSE)), expr.param.sep="\n", regexpr=regexpr(pattern, subject, perl=PERL))) atime.list <- atime::atime( seconds.limit=0.001, N=unique(as.integer(10^seq(0,2,l=30))), setup={ subject <- paste(rep("a", N), collapse="") pattern <- paste(rep(c("a?", "a"), each=N), collapse="") }, expr.list=expr.list) plot(atime.list) ## Example 2: symbol.params arg. sub.param.list <- list(FUN=c("sub","gsub"), PERL=c(TRUE,FALSE)) ## with base R we can use as.symbol and substitute: sapply(sub.param.list$FUN,function(name)substitute(fun("a","",subject), list(fun=as.symbol(name)))) ## with atime_grid the analog is to use symbol.params argument: (sub.expr.list <- atime::atime_grid( sub.param.list, replace=FUN("a","",subject,perl=PERL), symbol.params="FUN")) sub.atime.list <- atime::atime( seconds.limit=0.001, setup={ subject <- paste(rep("a",N),collapse="") }, expr.list=sub.expr.list) plot(sub.atime.list) ## Customize ggplot, which shows expr.name column as direct labels. sub.atime.edited <- sub.atime.list library(data.table) sub.atime.edited$measurements <- data.table(sub.atime.list$measurements)[ , expr.name := paste0("PERL=",PERL)] if(require(ggplot2)){ plot(sub.atime.edited)+ facet_grid(unit ~ FUN, labeller=label_both) }
R package performance testing, by computing time/memory
usage of several R expressions of
several different data sizes, for several package
versions (base, HEAD, CRAN, merge-base, other historical references
specified by user).
atime_pkg_test_info
returns an environment containing test
code/calls (useful for running a single test), whereas
atime_pkg
runs all tests and saves results/plots to disk.
atime_pkg(pkg.path=".", tests.dir=NULL) atime_pkg_test_info(pkg.path=".", tests.dir=NULL)
atime_pkg(pkg.path=".", tests.dir=NULL) atime_pkg_test_info(pkg.path=".", tests.dir=NULL)
pkg.path |
path to git repository containing R package. |
tests.dir |
path to directory which contains |
There should be a tests.R
code file which
defines test.list
, a list with names corresponding to different
tests.
Each element should be a list with at least three named elements: N
,
setup
, expr
, (possibly others such as
pkg.edit.fun
and version_name="1234567890abcdef"
)
to be passed as named arguments to atime_versions
, along with
the following versions which are passed using the sha.vec
argument:
base
ref comes from GITHUB_BASE_REF
environment variable (default master
),
HEAD
ref is the branch that you want to merge into base,
CRAN
is current published version (sha value ""
),
merge-base
is most recent common ancestor commit between base
and
HEAD
.
For visualization, default colors are provided for versions with names:
HEAD
, base
, CRAN
, merge-base
,
Before
, Regression
, Slow
, Fixed
,
Fast
; other version names will be gray using
the default colors.
If tests.R
defines a variable named version.colors
, then
it should be a character vector of colors to be used instead of the
default (names for versions, values for colors).
atime_pkg_test_info
returns an environment in which the code of
tests.R
was evaluated, including a variable test.call
which is a list of un-evaluated atime_versions
calls,
one for each test
(use with eval
to run a single test).
atime_pkg
returns a named list of test results,
names come from names of test.list
, and values
come from results of atime_versions
. Side effect is that
data/plot files are saved in atime
directory, including
tests.RData (test results which can be read into R if you want to make
your own alternative plots/analyses), tests_all_facet.png (plot
summarizing all test results), tests_preview_facet.png (plot
summarizing only most significant results), and install_seconds.txt
(total number of seconds used to install different package versions).
Toby Dylan Hocking
atime_test
for defining each test,
atime_test_list
for defining common arguments in each
element of the test list.
if(FALSE){ tdir <- tempfile() dir.create(tdir) git2r::clone("https://github.com/tdhock/binsegRcpp", tdir) repo <- git2r::repository(tdir) git2r::checkout(repo, "another-branch") result.list <- atime::atime_pkg(tdir) inst.atime <- file.path(tdir, "inst", "atime") dir(inst.atime) tests.RData <- file.path(inst.atime, "tests.RData") (objs <- load(tests.RData)) atime::atime_versions_remove("binsegRcpp") } ## https://github.com/tdhock/binsegRcpp/blob/atime-test-funs/.ci/atime/tests.R ## has another real example, see how to run it in tests/testthat/test-versions.R
if(FALSE){ tdir <- tempfile() dir.create(tdir) git2r::clone("https://github.com/tdhock/binsegRcpp", tdir) repo <- git2r::repository(tdir) git2r::checkout(repo, "another-branch") result.list <- atime::atime_pkg(tdir) inst.atime <- file.path(tdir, "inst", "atime") dir(inst.atime) tests.RData <- file.path(inst.atime, "tests.RData") (objs <- load(tests.RData)) atime::atime_versions_remove("binsegRcpp") } ## https://github.com/tdhock/binsegRcpp/blob/atime-test-funs/.ci/atime/tests.R ## has another real example, see how to run it in tests/testthat/test-versions.R
Use this to define an element of your test.list
in
atime/tests.R, prior to running atime_pkg
.
atime_test( N, setup, expr, times, seconds.limit, verbose, pkg.edit.fun, result, ...)
atime_test( N, setup, expr, times, seconds.limit, verbose, pkg.edit.fun, result, ...)
N |
numeric vector of data sizes to vary. |
setup |
expression to evaluate for every data size, before timings.
In contrast to |
expr |
code with package double-colon prefix, for example
|
times |
number of times to evaluate each timed expression. |
seconds.limit |
if the median timing of any expression exceeds this many seconds, then no timings for larger N are computed. |
verbose |
logical, print messages after every data size? |
pkg.edit.fun |
function called to edit package before installation, should typically replace instances of PKG with PKG.SHA, default works with Rcpp packages. |
result |
logical, save results? (default FALSE) |
... |
named versions. |
List of expressions.
Toby Dylan Hocking
atime_test_list
for defining common arguments in each
element of the test list,
atime_pkg
for running tests.
atime::atime_test( N=c(1,10), setup=data.vec <- rnorm(N), expr=binsegRcpp::binseg("mean_norm",data.vec)) ## https://github.com/tdhock/binsegRcpp/blob/atime-test-funs/.ci/atime/tests.R ## has a real example, see how to run it in tests/testthat/test-versions.R
atime::atime_test( N=c(1,10), setup=data.vec <- rnorm(N), expr=binsegRcpp::binseg("mean_norm",data.vec)) ## https://github.com/tdhock/binsegRcpp/blob/atime-test-funs/.ci/atime/tests.R ## has a real example, see how to run it in tests/testthat/test-versions.R
Use this to define test.list
in your atime/tests.R
file, prior to running atime_pkg
.
Arguments in ... should all be named; if name is an argument of
atime_versions, it will be copied to each test; otherwise it should be
the name of a test.
atime_test_list( N, setup, expr, times, seconds.limit, verbose, pkg.edit.fun, result, tests = NULL, ...)
atime_test_list( N, setup, expr, times, seconds.limit, verbose, pkg.edit.fun, result, tests = NULL, ...)
... |
names for tests, values are lists of arguments to pass to
|
tests |
list of tests, with names for tests, values are lists of arguments to pass to
|
N |
integer vector of data sizes. |
setup |
expression that depends on |
expr |
expression to time. Not evaluated before copying to each test. |
times |
number of times to run |
seconds.limit |
number of seconds after which we stop trying larger |
verbose |
logical: print output? |
pkg.edit.fun |
function for editing package prior to testing. |
result |
logical: save results? |
List representing performance tests,
from ... and tests
; each element is a list of
arguments to pass to atime_versions
.
Toby Dylan Hocking
atime_test
for defining each test,
atime_pkg
for running tests.
(test.list.named <- atime::atime_test_list( N=as.integer(10^seq(1,3,by=0.5)), setup={ set.seed(1) data.vec <- rnorm(N) }, mean_norm=atime::atime_test(expr=binsegRcpp::binseg("mean_norm",data.vec)), poisson=atime::atime_test(expr=binsegRcpp::binseg("poisson",data.vec)), NULL)) ## https://github.com/tdhock/binsegRcpp/blob/atime-test-funs/.ci/atime/tests.R ## has a real example, see how to run it in tests/testthat/test-versions.R
(test.list.named <- atime::atime_test_list( N=as.integer(10^seq(1,3,by=0.5)), setup={ set.seed(1) data.vec <- rnorm(N) }, mean_norm=atime::atime_test(expr=binsegRcpp::binseg("mean_norm",data.vec)), poisson=atime::atime_test(expr=binsegRcpp::binseg("poisson",data.vec)), NULL)) ## https://github.com/tdhock/binsegRcpp/blob/atime-test-funs/.ci/atime/tests.R ## has a real example, see how to run it in tests/testthat/test-versions.R
Computation time and memory for a single R expression evaluated using several different git versions.
atime_versions( pkg.path, N, setup, expr, sha.vec=NULL, times=10, seconds.limit=0.01, verbose=FALSE, pkg.edit.fun=pkg.edit.default, result=FALSE, N.env.parent=NULL, ...)
atime_versions( pkg.path, N, setup, expr, sha.vec=NULL, times=10, seconds.limit=0.01, verbose=FALSE, pkg.edit.fun=pkg.edit.default, result=FALSE, N.env.parent=NULL, ...)
pkg.path |
Path to git repo containing R package. |
N |
numeric vector of data sizes to vary. |
setup |
expression to evaluate for every data size, before timings.
In contrast to |
expr |
code with package double-colon prefix, for example
|
sha.vec |
named character vector / list of versions. |
times |
number of times to evaluate each timed expression. |
seconds.limit |
if the median timing of any expression exceeds this many seconds, then no timings for larger N are computed. |
verbose |
logical, print messages after every data size? |
pkg.edit.fun |
function called to edit package before installation, should typically replace instances of PKG with PKG.SHA, default works with Rcpp packages. |
result |
logical, save results? (default FALSE) |
N.env.parent |
environment to use as parent of environment created for each data size N, or NULL to use default parent env. |
... |
named versions. |
For convenience, versions can
be specified either as code (...), data (sha.vec
), or both.
Each version should be either ""
(to use currently installed
version of package, or if missing, install most recent
version from CRAN) or a SHA1 hash, which is passed as branch
arg to git2r::checkout
; version names used to identify/interpret
the output/plots.
list of class atime with elements seconds.limit
(numeric
input param), timings
(data table of results).
Toby Dylan Hocking
atime_versions_exprs
converts expr
into a list of expressions, one for each version,
passed to atime
as the expr.list
argument.
if(FALSE){ tdir <- tempfile() dir.create(tdir) git2r::clone("https://github.com/tdhock/binsegRcpp", tdir) atime.list <- atime::atime_versions( pkg.path=tdir, N=2^seq(2, 20), setup={ max.segs <- as.integer(N/2) data.vec <- 1:N }, expr=binsegRcpp::binseg_normal(data.vec, max.segs), cv="908b77c411bc7f4fcbcf53759245e738ae724c3e", "rm unord map"="dcd0808f52b0b9858352106cc7852e36d7f5b15d", "mvl_construct"="5942af606641428315b0e63c7da331c4cd44c091") plot(atime.list) atime::atime_versions_remove("binsegRcpp") }
if(FALSE){ tdir <- tempfile() dir.create(tdir) git2r::clone("https://github.com/tdhock/binsegRcpp", tdir) atime.list <- atime::atime_versions( pkg.path=tdir, N=2^seq(2, 20), setup={ max.segs <- as.integer(N/2) data.vec <- 1:N }, expr=binsegRcpp::binseg_normal(data.vec, max.segs), cv="908b77c411bc7f4fcbcf53759245e738ae724c3e", "rm unord map"="dcd0808f52b0b9858352106cc7852e36d7f5b15d", "mvl_construct"="5942af606641428315b0e63c7da331c4cd44c091") plot(atime.list) atime::atime_versions_remove("binsegRcpp") }
Install different git commit versions as different
packages, then create a list of expressions, one for each
version. For most use cases atime_versions
is simpler, but
atime_versions_exprs
is more flexible for the case of comparing
different versions of one expression to another expression.
atime_versions_exprs( pkg.path, expr, sha.vec=NULL, verbose=FALSE, pkg.edit.fun=pkg.edit.default, ...)
atime_versions_exprs( pkg.path, expr, sha.vec=NULL, verbose=FALSE, pkg.edit.fun=pkg.edit.default, ...)
pkg.path |
Path to git repo containing R package. |
expr |
code with package double-colon prefix, for example
|
sha.vec |
named character vector / list of versions. |
verbose |
logical, print messages after every data size? |
pkg.edit.fun |
function called to edit package before
installation, should typically replace instances of |
... |
named versions. |
For convenience, versions can
be specified either as code (...), data (sha.vec
), or both.
Each version should be either ""
(to install most recent
version from CRAN) or a SHA1 hash, which is passed as branch
arg to git2r::checkout
; version names used to identify/interpret
the output/plots.
Each version is installed as a separate package
(to whatever R library is first on .libPaths()
),
using the package name PKG.SHA
.
A list of expressions, one for
each version, created by replacing PKG:
in expr
with PKG.SHA:
,
atime(name1=Package.SHA1::fun(argA, argB),
name2=Package.SHA2::fun(argA,argB))
.
Toby Dylan Hocking
if(FALSE){ if(requireNamespace("changepoint")){ tdir <- tempfile() dir.create(tdir) git2r::clone("https://github.com/tdhock/binsegRcpp", tdir) expr.list <- atime::atime_versions_exprs( pkg.path=tdir, expr=binsegRcpp::binseg_normal(data.vec, max.segs), cv="908b77c411bc7f4fcbcf53759245e738ae724c3e", "rm unord map"="dcd0808f52b0b9858352106cc7852e36d7f5b15d", "mvl_construct"="5942af606641428315b0e63c7da331c4cd44c091") atime.list <- atime::atime( N=2^seq(2, 20), setup={ max.segs <- as.integer(N/2) data.vec <- 1:N }, expr.list=expr.list, changepoint=changepoint::cpt.mean( data.vec, penalty="Manual", pen.value=0, method="BinSeg", Q=max.segs-1)) plot(atime.list) } atime::atime_versions_remove("binsegRcpp") }
if(FALSE){ if(requireNamespace("changepoint")){ tdir <- tempfile() dir.create(tdir) git2r::clone("https://github.com/tdhock/binsegRcpp", tdir) expr.list <- atime::atime_versions_exprs( pkg.path=tdir, expr=binsegRcpp::binseg_normal(data.vec, max.segs), cv="908b77c411bc7f4fcbcf53759245e738ae724c3e", "rm unord map"="dcd0808f52b0b9858352106cc7852e36d7f5b15d", "mvl_construct"="5942af606641428315b0e63c7da331c4cd44c091") atime.list <- atime::atime( N=2^seq(2, 20), setup={ max.segs <- as.integer(N/2) data.vec <- 1:N }, expr.list=expr.list, changepoint=changepoint::cpt.mean( data.vec, penalty="Manual", pen.value=0, method="BinSeg", Q=max.segs-1)) plot(atime.list) } atime::atime_versions_remove("binsegRcpp") }
atime_versions_exprs
installs different git versions
of a package, and this function removes them.
atime_versions_remove(Package)
atime_versions_remove(Package)
Package |
Name of package without SHA. |
The library searched is the first on .libPaths()
.
integer exit status code from unlink, non-zero if removal failed.
Toby Dylan Hocking
Find and replace for every file specified by glob.
glob_find_replace(glob, FIND, REPLACE)
glob_find_replace(glob, FIND, REPLACE)
glob |
character string: glob defining files. |
FIND |
character string: regex to find. |
REPLACE |
character string: regex to use for replacement. |
nothing.
Toby Dylan Hocking
## see vignette("data.table", package="atime")
## see vignette("data.table", package="atime")
Compute best asymptotic references, for all empirical measurements which are present (not missing) and increasing with data size.
references_best(L, fun.list=NULL)
references_best(L, fun.list=NULL)
L |
List output from atime. |
fun.list |
List of asymptotic complexity reference functions, default NULL means to use package default. |
list of class "references_best"
with elements
references
(data table of all references),
plot.references
(data table of references to show using plot
method, default is to show closest larger and smaller references),
measurements
(data table of measurements).
Toby Dylan Hocking
## Polynomial and exponential time string functions. atime_result_string <- atime::atime( seconds.limit=0.001, N=unique(as.integer(10^seq(0,4,l=100))), setup={ subject <- paste(rep("a", N), collapse="") pattern <- paste(rep(c("a?", "a"), each=N), collapse="") linear_size_replacement <- paste(rep("REPLACEMENT", N), collapse="") }, PCRE.match=regexpr(pattern, subject, perl=TRUE), TRE.match=regexpr(pattern, subject, perl=FALSE), constant.replacement=gsub("a","constant size replacement",subject), linear.replacement=gsub("a",linear_size_replacement,subject)) (refs_best_string <- atime::references_best(atime_result_string)) ## plot method shows each expr and unit in a separate panel. ## default is to show closest larger and smaller references. plot(refs_best_string) ## modifying plot.references changes violet references shown by plot. refs_best_string$plot.references <- refs_best_string$ref[c("N","N^2","N^3","2^N"),on="fun.name"] plot(refs_best_string) ## predict method computes N for given units (default seconds limit). (pred_string <- predict(refs_best_string)) plot(pred_string)
## Polynomial and exponential time string functions. atime_result_string <- atime::atime( seconds.limit=0.001, N=unique(as.integer(10^seq(0,4,l=100))), setup={ subject <- paste(rep("a", N), collapse="") pattern <- paste(rep(c("a?", "a"), each=N), collapse="") linear_size_replacement <- paste(rep("REPLACEMENT", N), collapse="") }, PCRE.match=regexpr(pattern, subject, perl=TRUE), TRE.match=regexpr(pattern, subject, perl=FALSE), constant.replacement=gsub("a","constant size replacement",subject), linear.replacement=gsub("a",linear_size_replacement,subject)) (refs_best_string <- atime::references_best(atime_result_string)) ## plot method shows each expr and unit in a separate panel. ## default is to show closest larger and smaller references. plot(refs_best_string) ## modifying plot.references changes violet references shown by plot. refs_best_string$plot.references <- refs_best_string$ref[c("N","N^2","N^3","2^N"),on="fun.name"] plot(refs_best_string) ## predict method computes N for given units (default seconds limit). (pred_string <- predict(refs_best_string)) plot(pred_string)