Conversation
tests currently fail
|
Oh this is juicy, turns out that tensorflow implements the same use of SD as numpy. Numpy uses N, not N-1 , when computing variance If we want to make greta "R equivalent", then we need to use N - 1. Here's a bit of proof about this, using the current implementation of it in this branch: # ugh, OK so tensorflow divides by N, not N - 1
regular_var <- function(x, n_minus_1 = TRUE){
total_ss <- sum((x - mean(x))^2)
if (n_minus_1){
return(total_ss / (length(x) - 1))
} else if (!n_minus_1){
return(total_ss / length(x))
}
}
regular_sd <- function(x, n_minus_1 = TRUE){
var <- regular_var(x, n_minus_1 = n_minus_1)
sqrt(var)
}
# this is what R does
sd(1:100)
#> [1] 29.01149
regular_sd(1:100, n_minus_1 = TRUE)
#> [1] 29.01149
# this is what numpy does
# https://numpy.org/doc/stable/reference/generated/numpy.var.html#numpy.var
# which is what tensorflow replicated
#
regular_sd(1:100, n_minus_1 = FALSE)
#> [1] 28.86607
library(greta)
#>
#> Attaching package: 'greta'
#> The following objects are masked from 'package:stats':
#>
#> binomial, cov2cor, poisson, sd
#> The following objects are masked from 'package:base':
#>
#> %*%, apply, backsolve, beta, chol2inv, colMeans, colSums, diag,
#> eigen, forwardsolve, gamma, identity, rowMeans, rowSums, sweep,
#> tapply
greta_sitrep()
#> ℹ checking if python available
#> ✓ python (version 3.7) available
#>
#> ℹ checking if TensorFlow available
#> ✓ TensorFlow (version 1.14.0) available
#>
#> ℹ checking if TensorFlow Probability available
#> ✓ TensorFlow Probability (version 0.7.0) available
#>
#> ℹ checking if greta conda environment available
#> ✓ greta conda environment available
#>
#> ℹ Initialising python and checking dependencies, this may take a moment.
#> ✓ Initialising python and checking dependencies ... done!
#>
#> ℹ greta is ready to use!
x <- as_data(1:100)
greta_sd <- as.numeric(calculate(val = sd(x)))
greta_sd
#> [1] 28.86607
greta_sd == regular_sd(1:100, n_minus_1 = FALSE)
#> [1] TRUECreated on 2022-03-18 by the reprex package (v2.0.1) Session infosessioninfo::session_info()
#> ─ Session info ───────────────────────────────────────────────────────────────
#> setting value
#> version R version 4.1.3 (2022-03-10)
#> os macOS Big Sur/Monterey 10.16
#> system x86_64, darwin17.0
#> ui X11
#> language (EN)
#> collate en_AU.UTF-8
#> ctype en_AU.UTF-8
#> tz Australia/Perth
#> date 2022-03-18
#> pandoc 2.17.1.1 @ /Applications/RStudio.app/Contents/MacOS/quarto/bin/ (via rmarkdown)
#>
#> ─ Packages ───────────────────────────────────────────────────────────────────
#> package * version date (UTC) lib source
#> backports 1.4.1 2021-12-13 [1] CRAN (R 4.1.0)
#> base64enc 0.1-3 2015-07-28 [1] CRAN (R 4.1.0)
#> callr 3.7.0 2021-04-20 [1] CRAN (R 4.1.0)
#> cli 3.2.0 2022-02-14 [1] CRAN (R 4.1.2)
#> coda 0.19-4 2020-09-30 [1] CRAN (R 4.1.0)
#> codetools 0.2-18 2020-11-04 [1] CRAN (R 4.1.3)
#> crayon 1.5.0 2022-02-14 [1] CRAN (R 4.1.2)
#> digest 0.6.29 2021-12-01 [1] CRAN (R 4.1.0)
#> ellipsis 0.3.2 2021-04-29 [1] CRAN (R 4.1.0)
#> evaluate 0.15 2022-02-18 [1] CRAN (R 4.1.2)
#> fansi 1.0.2 2022-01-14 [1] CRAN (R 4.1.2)
#> fastmap 1.1.0 2021-01-25 [1] CRAN (R 4.1.0)
#> fs 1.5.2 2021-12-08 [1] CRAN (R 4.1.0)
#> future 1.24.0 2022-02-19 [1] CRAN (R 4.1.2)
#> globals 0.14.0 2020-11-22 [1] CRAN (R 4.1.0)
#> glue 1.6.2 2022-02-24 [1] CRAN (R 4.1.2)
#> greta * 0.4.1.9000 2022-03-18 [1] local
#> here 1.0.1 2020-12-13 [1] CRAN (R 4.1.0)
#> highr 0.9 2021-04-16 [1] CRAN (R 4.1.0)
#> hms 1.1.1 2021-09-26 [1] CRAN (R 4.1.0)
#> htmltools 0.5.2 2021-08-25 [1] CRAN (R 4.1.0)
#> jsonlite 1.8.0 2022-02-22 [1] CRAN (R 4.1.2)
#> knitr 1.37 2021-12-16 [1] CRAN (R 4.1.0)
#> lattice 0.20-45 2021-09-22 [1] CRAN (R 4.1.3)
#> lifecycle 1.0.1 2021-09-24 [1] CRAN (R 4.1.0)
#> listenv 0.8.0 2019-12-05 [1] CRAN (R 4.1.0)
#> magrittr 2.0.2 2022-01-26 [1] CRAN (R 4.1.2)
#> Matrix 1.4-0 2021-12-08 [1] CRAN (R 4.1.3)
#> parallelly 1.30.0 2021-12-17 [1] CRAN (R 4.1.0)
#> pillar 1.7.0 2022-02-01 [1] CRAN (R 4.1.2)
#> pkgconfig 2.0.3 2019-09-22 [1] CRAN (R 4.1.0)
#> png 0.1-7 2013-12-03 [1] CRAN (R 4.1.0)
#> prettyunits 1.1.1 2020-01-24 [1] CRAN (R 4.1.0)
#> processx 3.5.2 2021-04-30 [1] CRAN (R 4.1.0)
#> progress 1.2.2 2019-05-16 [1] CRAN (R 4.1.0)
#> ps 1.6.0 2021-02-28 [1] CRAN (R 4.1.0)
#> purrr 0.3.4 2020-04-17 [1] CRAN (R 4.1.0)
#> R.cache 0.15.0 2021-04-30 [1] CRAN (R 4.1.0)
#> R.methodsS3 1.8.1 2020-08-26 [1] CRAN (R 4.1.0)
#> R.oo 1.24.0 2020-08-26 [1] CRAN (R 4.1.0)
#> R.utils 2.11.0 2021-09-26 [1] CRAN (R 4.1.0)
#> R6 2.5.1 2021-08-19 [1] CRAN (R 4.1.0)
#> Rcpp 1.0.8.2 2022-03-11 [1] CRAN (R 4.1.2)
#> reprex 2.0.1 2021-08-05 [1] CRAN (R 4.1.0)
#> reticulate 1.24 2022-01-26 [1] CRAN (R 4.1.2)
#> rlang 1.0.2 2022-03-04 [1] CRAN (R 4.1.2)
#> rmarkdown 2.11 2021-09-14 [1] CRAN (R 4.1.0)
#> rprojroot 2.0.2 2020-11-15 [1] CRAN (R 4.1.0)
#> rstudioapi 0.13 2020-11-12 [1] CRAN (R 4.1.0)
#> sessioninfo 1.2.2.9000 2022-03-01 [1] Github (r-lib/sessioninfo@d70760d)
#> stringi 1.7.6 2021-11-29 [1] CRAN (R 4.1.0)
#> stringr 1.4.0 2019-02-10 [1] CRAN (R 4.1.0)
#> styler 1.6.2 2021-09-23 [1] CRAN (R 4.1.0)
#> tensorflow 2.8.0 2022-02-09 [1] CRAN (R 4.1.2)
#> tfruns 1.5.0 2021-02-26 [1] CRAN (R 4.1.0)
#> tibble 3.1.6 2021-11-07 [1] CRAN (R 4.1.0)
#> utf8 1.2.2 2021-07-24 [1] CRAN (R 4.1.0)
#> vctrs 0.3.8 2021-04-29 [1] CRAN (R 4.1.0)
#> whisker 0.4 2019-08-28 [1] CRAN (R 4.1.0)
#> withr 2.5.0 2022-03-03 [1] CRAN (R 4.1.2)
#> xfun 0.30 2022-03-02 [1] CRAN (R 4.1.2)
#> yaml 2.3.5 2022-02-21 [1] CRAN (R 4.1.2)
#>
#> [1] /Library/Frameworks/R.framework/Versions/4.1/Resources/library
#>
#> ─ Python configuration ───────────────────────────────────────────────────────
#> python: /Users/njtierney/Library/r-miniconda/envs/greta-env/bin/python
#> libpython: /Users/njtierney/Library/r-miniconda/envs/greta-env/lib/libpython3.7m.dylib
#> pythonhome: /Users/njtierney/Library/r-miniconda/envs/greta-env:/Users/njtierney/Library/r-miniconda/envs/greta-env
#> version: 3.7.12 | packaged by conda-forge | (default, Oct 26 2021, 05:59:23) [Clang 11.1.0 ]
#> numpy: /Users/njtierney/Library/r-miniconda/envs/greta-env/lib/python3.7/site-packages/numpy
#> numpy_version: 1.16.4
#> tensorflow: /Users/njtierney/Library/r-miniconda/envs/greta-env/lib/python3.7/site-packages/tensorflow
#>
#> NOTE: Python version was forced by use_python function
#>
#> ────────────────────────────────────────────────────────────────────────────── |
M1 workaround CRAN patch
Increment version number to 0.4.2.9000
|
One alternative would be to write out the following in tensorflow: regular_sd <- function(x, n_minus_1 = TRUE){
if (!n_minus_1) {
n_dim <- length(dim(x))
reduction_dims <- seq_len(n_dim - 1)
sd_result <- tf$math$reduce_std(x, axis = reduction_dims, keepdims = !drop)
} else if (n_minus_1){
# replace these parts with tf_sum and friends?
x_mean_sq <- tf_mean(x) * tf_mean(x)
total_ss <- tf_sum(x - tf_mean_sq)
var <- total_ss / (length(x) - 1)
sd_result <- sqrt(var)
# total_ss <- sum((x - mean(x))^2)
# var <- (total_ss / (length(x) - 1))
# sd_result <- sqrt(var)
}
return(sd_result)
}And provide an optional with |
…tandard deviation
|
This is how benchmark results would change (along with a 95% confidence interval in relative change) if dbadadf is merged into master:
|
tests currently fail
…tandard deviation
…reta into implement-sd-var-504
…dimension that is being returned
|
This is how benchmark results would change (along with a 95% confidence interval in relative change) if 1ab817d is merged into master:
|
|
This is how benchmark results would change (along with a 95% confidence interval in relative change) if 897c035 is merged into master:
|
|
Looks like {touchstone} is working. :D. Note that you can make confidence intervals arbitrary tight by running more iterations in each expression you benchmark, see the doc for |
resolves #504