From 31e477ae03a04a6d7c58e7b95b17974b690761fe Mon Sep 17 00:00:00 2001 From: Graeme Blair Date: Fri, 29 Mar 2024 19:30:42 -0700 Subject: [PATCH 01/14] version up --- CRAN-SUBMISSION | 3 --- DESCRIPTION | 4 ++-- NEWS.md | 4 ++++ cran-comments.md | 2 +- man/estimatr.Rd | 2 +- man/horvitz_thompson.Rd | 5 ++--- tests/testthat/helper-lm-robust-se.R | 1 + tests/testthat/test-lm-cluster.R | 2 +- tests/testthat/test-modelsummary.R | 12 +++++++----- tests/testthat/test-zzzbroom.R | 13 ------------- 10 files changed, 19 insertions(+), 29 deletions(-) delete mode 100644 CRAN-SUBMISSION diff --git a/CRAN-SUBMISSION b/CRAN-SUBMISSION deleted file mode 100644 index 0080f95a..00000000 --- a/CRAN-SUBMISSION +++ /dev/null @@ -1,3 +0,0 @@ -Version: 1.0.2 -Date: 2024-01-17 06:28:29 UTC -SHA: 70b42698119d523bbacc9294c9238b7014715e7a diff --git a/DESCRIPTION b/DESCRIPTION index 99957251..aa06c275 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: estimatr Type: Package Title: Fast Estimators for Design-Based Inference -Version: 1.0.2 +Version: 1.0.4 Authors@R: c(person("Graeme", "Blair", email = "graeme.blair@gmail.com", role = c("aut", "cre")), person("Jasper", "Cooper", email = "jjc2247@columbia.edu", role = c("aut")), person("Alexander", "Coppock", email = "alex.coppock@yale.edu", role = c("aut")), @@ -23,7 +23,7 @@ Imports: rlang (>= 0.2.0) LinkingTo: Rcpp, RcppEigen Encoding: UTF-8 -RoxygenNote: 7.2.3 +RoxygenNote: 7.3.1 LazyData: true Suggests: fabricatr (>= 0.10.0), diff --git a/NEWS.md b/NEWS.md index c91ef9c6..f7283b6d 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,7 @@ +# estimatr 1.0.4 + +* Test suite changes for M1 mac stay current on CRAN. + # estimatr 1.0.2 * Minor documentation changes to stay current on CRAN. diff --git a/cran-comments.md b/cran-comments.md index ca838632..0214e442 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -1,6 +1,6 @@ ## Submission -Small changes to update based on Kurt's 8/19/23 email and also address C++ warning in R CMD CHECK. +Fixes M1 Mac errors. There are no changes to worse in reverse depends. diff --git a/man/estimatr.Rd b/man/estimatr.Rd index 2c51407c..a7aedcdf 100644 --- a/man/estimatr.Rd +++ b/man/estimatr.Rd @@ -2,8 +2,8 @@ % Please edit documentation in R/estimatr.R \docType{package} \name{estimatr} -\alias{estimatr} \alias{estimatr-package} +\alias{estimatr} \title{estimatr} \description{ Fast procedures for small set of commonly-used, design-appropriate estimators with robust standard errors and confidence intervals. Includes estimators for linear regression, instrumental variables regression, difference-in-means, Horvitz-Thompson estimation, and regression improving precision of experimental estimates by interacting treatment with centered pre-treatment covariates introduced by Lin (2013) . diff --git a/man/horvitz_thompson.Rd b/man/horvitz_thompson.Rd index abab3036..14cdb2f3 100644 --- a/man/horvitz_thompson.Rd +++ b/man/horvitz_thompson.Rd @@ -123,9 +123,8 @@ In short, the Horvitz-Thompson estimator essentially reweights each unit by the probability of it being in its observed condition. Pivotal to the estimation of treatment effects using this estimator are the marginal condition probabilities (i.e., the probability that any one unit is in -a particular treatment condition). Pivotal to the estimating the variance -variance whenever the design is more complicated than simple randomization, -are the +a particular treatment condition). Pivotal to estimating the variance +whenever the design is more complicated than simple randomization are the joint condition probabilities (i.e., the probabilities that any two units have a particular set of treatment conditions, either the same or different). The estimator we provide here considers the case with two diff --git a/tests/testthat/helper-lm-robust-se.R b/tests/testthat/helper-lm-robust-se.R index 4eb98d06..53e63071 100644 --- a/tests/testthat/helper-lm-robust-se.R +++ b/tests/testthat/helper-lm-robust-se.R @@ -164,3 +164,4 @@ BMlmSE <- function(model, clustervar=NULL, ell=NULL, IK=TRUE) { se = se, se.Stata = se.Stata ) } + diff --git a/tests/testthat/test-lm-cluster.R b/tests/testthat/test-lm-cluster.R index 2f1a432b..65109664 100644 --- a/tests/testthat/test-lm-cluster.R +++ b/tests/testthat/test-lm-cluster.R @@ -121,7 +121,7 @@ test_that("lm cluster se", { for (se_type in cr_se_types) { lmr_rd <- lm_robust(Y ~ X + Z + X2, data = dat, clusters = J, se_type = se_type) lmr_full <- lm_robust(Y ~ X + Z, data = dat, clusters = J, se_type = se_type) - expect_identical( + expect_equal( tidy(lmr_rd)[1:3, ], tidy(lmr_full) ) diff --git a/tests/testthat/test-modelsummary.R b/tests/testthat/test-modelsummary.R index 451f4574..ada05531 100644 --- a/tests/testthat/test-modelsummary.R +++ b/tests/testthat/test-modelsummary.R @@ -3,10 +3,12 @@ context("S3 - modelsummary works") test_that("modelsummary works with glance", { - # skip_if_not_installed("modelsummary") - skip("modelsummary non reproducible errors") + skip_if_not_installed("modelsummary") library(modelsummary) + + set.seed(5) + model1 <- lm_robust(mpg ~ am, mtcars) model2 <- lm_robust(mpg ~ am, mtcars, clusters = cyl) model3 <- lm_lin(mpg ~ am, ~ cyl, mtcars) @@ -14,9 +16,9 @@ test_that("modelsummary works with glance", { mso <- modelsummary(list(model1, model2, model3), output = "data.frame") expect_equal(colnames(mso), c("part", "term", "statistic", - "Model 1", "Model 2", "Model 3")) + "(1)", "(2)", "(3)")) - expect_equal(nrow(mso), 12L) + expect_equal(nrow(mso), 15L) expect_equal(ncol(mso), 6L) @@ -28,7 +30,7 @@ test_that("modelsummary works with glance", { gof_omit = c("N|[sS]tatistic|p.value|p{1}"), output = "data.frame") - expect_equal(nrow(mso), 6) + expect_equal(nrow(mso), 10) expect_equal(ncol(mso), 5) diff --git a/tests/testthat/test-zzzbroom.R b/tests/testthat/test-zzzbroom.R index 07fa9702..05431c30 100644 --- a/tests/testthat/test-zzzbroom.R +++ b/tests/testthat/test-zzzbroom.R @@ -2,19 +2,6 @@ # isn't a SUGGESTed package context("zzzbroom.R - .onAttach") - -test_that(".onLoad does not message if new version of 'broom' is loaded", { - skip_if_not_installed("broom", minimum_version = "0.5.0.9000") - library(broom) - expect_silent(.onLoad("estimatr", "estimatr")) - - expect_is( - tidy(lm_robust(mpg ~ hp, mtcars)), - "data.frame" - ) -}) - - test_that(".onLoad message if old version of 'broom' is installed", { skip_if_not_installed("broom") skip_if(packageVersion("broom") > "0.5.0") From a1734bab3bcafd1feb3ea1a9c691fe460f8f4593 Mon Sep 17 00:00:00 2001 From: Molly Offer-Westort Date: Mon, 6 Jan 2025 01:02:07 -0600 Subject: [PATCH 02/14] generate factorial model matrix for lm_lin in predict --- R/S3_predict.R | 69 ++++++++++++++++++++++++++++++++------------------ 1 file changed, 45 insertions(+), 24 deletions(-) diff --git a/R/S3_predict.R b/R/S3_predict.R index 2e2bbfb5..78fb3e04 100644 --- a/R/S3_predict.R +++ b/R/S3_predict.R @@ -74,30 +74,6 @@ predict.lm_robust <- function(object, X <- get_X(object, newdata, na.action) - # lm_lin scaling - if (!is.null(object$scaled_center)) { - demeaned_covars <- - scale( - X[ - , - names(object$scaled_center), - drop = FALSE - ], - center = object$scaled_center, - scale = FALSE - ) - - # Interacted with treatment - treat_name <- attr(object$terms, "term.labels")[1] - interacted_covars <- X[, treat_name] * demeaned_covars - - X <- cbind( - X[, attr(X, "assign") <= 1, drop = FALSE], - demeaned_covars, - interacted_covars - ) - } - # Get coefs coefs <- as.matrix(coef(object)) @@ -224,9 +200,54 @@ get_X <- function(object, newdata, na.action) { X <- model.matrix(rhs_terms, mf, contrasts.arg = object$contrasts) + # lm_lin scaling (moved down from predict.lm_robust) + if (!is.null(object$scaled_center)) { + # Covariates + demeaned_covars <- + scale( + X[ + , + names(object$scaled_center), + drop = FALSE + ], + center = object$scaled_center, + scale = FALSE + ) + + # Handle treatment variable reconstruction + treat_name <- attr(object$terms, "term.labels")[1] + treatment <- mf[, treat_name] + + if (any(!(treatment %in% c(0, 1)))) { + vals <- sort(unique(treatment)) + treatment <- model.matrix(~ factor(treatment, levels = vals) - 1) + colnames(treatment) <- paste0(treat_name, "_", vals) + # Drop out first group if there is an intercept + if (attr(rhs_terms, "intercept") == 1) treatment <- treatment[, -1, drop = FALSE] + } + + # Interactions with covariates + interaction_matrix <- do.call( + cbind, + lapply(seq_len(ncol(treatment)), function(i) { + treatment[, i] * demeaned_covars + }) + ) + + X <- cbind( + if (attr(rhs_terms, "intercept") == 1) { + matrix(1, nrow = nrow(X), ncol = 1, dimnames = list(NULL, "(Intercept)")) + }, + treatment, + if (attr(rhs_terms, "intercept") == 1 || ncol(treatment) == 1) demeaned_covars, + interaction_matrix + ) + } + return(X) } + add_fes <- function(preds, object, newdata) { # Add factors! From bedcc7e664683d49e59d2d3dfb55e889907acee5 Mon Sep 17 00:00:00 2001 From: Molly Offer-Westort Date: Mon, 6 Jan 2025 18:28:44 -0600 Subject: [PATCH 03/14] deal with intercepts --- R/S3_predict.R | 13 +++++-------- R/estimatr_lm_lin.R | 20 +++++--------------- 2 files changed, 10 insertions(+), 23 deletions(-) diff --git a/R/S3_predict.R b/R/S3_predict.R index 78fb3e04..b381df1e 100644 --- a/R/S3_predict.R +++ b/R/S3_predict.R @@ -217,14 +217,11 @@ get_X <- function(object, newdata, na.action) { # Handle treatment variable reconstruction treat_name <- attr(object$terms, "term.labels")[1] treatment <- mf[, treat_name] - - if (any(!(treatment %in% c(0, 1)))) { - vals <- sort(unique(treatment)) - treatment <- model.matrix(~ factor(treatment, levels = vals) - 1) - colnames(treatment) <- paste0(treat_name, "_", vals) - # Drop out first group if there is an intercept - if (attr(rhs_terms, "intercept") == 1) treatment <- treatment[, -1, drop = FALSE] - } + vals <- sort(unique(treatment)) + treatment <- model.matrix(~ factor(treatment, levels = vals) - 1) + colnames(treatment) <- paste0(treat_name, "_", vals) + # Drop out first group if there is an intercept + if (attr(rhs_terms, "intercept") == 1) treatment <- treatment[, -1, drop = FALSE] # Interactions with covariates interaction_matrix <- do.call( diff --git a/R/estimatr_lm_lin.R b/R/estimatr_lm_lin.R index 37a0eabe..5a757bc8 100644 --- a/R/estimatr_lm_lin.R +++ b/R/estimatr_lm_lin.R @@ -230,7 +230,7 @@ lm_lin <- function(formula, design_mat_treatment <- colnames(design_matrix)[treat_col] # Check case where treatment is not factor and is not binary - if (any(!(treatment %in% c(0, 1)))) { + if (any(!(treatment %in% c(0, 1))) | (!has_intercept&ncol(treatment) ==1) ) { # create dummies for non-factor treatment variable # Drop out first group if there is an intercept @@ -313,20 +313,10 @@ lm_lin <- function(formula, interacted_covars ) } else { - # If no intercept, but treatment is only one column, - # need to add base terms for covariates - if (n_treat_cols == 1) { - X <- cbind( - treatment, - demeaned_covars, - interacted_covars - ) - } else { - X <- cbind( - treatment, - interacted_covars - ) - } + X <- cbind( + treatment, + interacted_covars + ) } # ---------- From 4b53e99d41b00bd0fd28bfab47479f937b7bb4fe Mon Sep 17 00:00:00 2001 From: Molly Offer-Westort Date: Tue, 21 Jan 2025 12:40:04 -0600 Subject: [PATCH 04/14] Ensure treatment levels in newdata are subset of those for lin model fit --- R/S3_predict.R | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/R/S3_predict.R b/R/S3_predict.R index b381df1e..e50a1cac 100644 --- a/R/S3_predict.R +++ b/R/S3_predict.R @@ -218,7 +218,16 @@ get_X <- function(object, newdata, na.action) { treat_name <- attr(object$terms, "term.labels")[1] treatment <- mf[, treat_name] vals <- sort(unique(treatment)) - treatment <- model.matrix(~ factor(treatment, levels = vals) - 1) + + # Ensure treatment levels in newdata are subset of those for model fit + if (!all(as.character(vals) %in% object$xlevels[[1]])) { + stop( + "Levels of treatment variable in `newdata` must be a subset of those ", + "in the model fit." + ) + } + assign(treat_name, object$xlevels[[1]]) + treatment <- model.matrix(~ factor(treatment, levels = object$xlevels[[1]]) - 1) colnames(treatment) <- paste0(treat_name, "_", vals) # Drop out first group if there is an intercept if (attr(rhs_terms, "intercept") == 1) treatment <- treatment[, -1, drop = FALSE] From 05c99203d488ad0ce211fa88ad87a36811497736 Mon Sep 17 00:00:00 2001 From: Molly Offer-Westort Date: Tue, 21 Jan 2025 20:44:36 -0600 Subject: [PATCH 05/14] save treatment levels for lin model prediction --- R/S3_predict.R | 9 +++++---- R/estimatr_lm_lin.R | 6 ++++++ 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/R/S3_predict.R b/R/S3_predict.R index e50a1cac..1b7b9ef5 100644 --- a/R/S3_predict.R +++ b/R/S3_predict.R @@ -218,17 +218,18 @@ get_X <- function(object, newdata, na.action) { treat_name <- attr(object$terms, "term.labels")[1] treatment <- mf[, treat_name] vals <- sort(unique(treatment)) + old_vals <- object$treatment_levels # Ensure treatment levels in newdata are subset of those for model fit - if (!all(as.character(vals) %in% object$xlevels[[1]])) { + if (!all(as.character(vals) %in% as.character(old_vals))) { stop( "Levels of treatment variable in `newdata` must be a subset of those ", "in the model fit." ) } - assign(treat_name, object$xlevels[[1]]) - treatment <- model.matrix(~ factor(treatment, levels = object$xlevels[[1]]) - 1) - colnames(treatment) <- paste0(treat_name, "_", vals) + treatment <- model.matrix(~ factor(treatment, levels = old_vals) - 1) + + colnames(treatment) <- paste0(treat_name, "_", old_vals) # Drop out first group if there is an intercept if (attr(rhs_terms, "intercept") == 1) treatment <- treatment[, -1, drop = FALSE] diff --git a/R/estimatr_lm_lin.R b/R/estimatr_lm_lin.R index 5a757bc8..cad486d2 100644 --- a/R/estimatr_lm_lin.R +++ b/R/estimatr_lm_lin.R @@ -350,6 +350,12 @@ lm_lin <- function(formula, return_list[["scaled_center"]] <- center setNames(return_list[["scaled_center"]], original_covar_names) + # Store unique treatment values + if(attr(terms(model_data), "dataClasses")[attr(terms(model_data),"term.labels")[1]] == "factor"){ + return_list[["treatment_levels"]] <- model_data$xlevels[[1]] + } else { + return_list[["treatment_levels"]] <- sort(unique(design_matrix[, design_mat_treatment])) + } return_list[["call"]] <- match.call() From c1da381e7e0a2405c8e501674a6714fb4784d2ca Mon Sep 17 00:00:00 2001 From: Molly Offer-Westort Date: Tue, 21 Jan 2025 20:44:59 -0600 Subject: [PATCH 06/14] resolve issue with interaction ordering in prediction matrix --- R/S3_predict.R | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/R/S3_predict.R b/R/S3_predict.R index 1b7b9ef5..1db06e67 100644 --- a/R/S3_predict.R +++ b/R/S3_predict.R @@ -233,13 +233,16 @@ get_X <- function(object, newdata, na.action) { # Drop out first group if there is an intercept if (attr(rhs_terms, "intercept") == 1) treatment <- treatment[, -1, drop = FALSE] - # Interactions with covariates - interaction_matrix <- do.call( - cbind, - lapply(seq_len(ncol(treatment)), function(i) { - treatment[, i] * demeaned_covars - }) - ) + # Interactions matching original fitting logic + n_treat_cols <- ncol(treatment) + n_covars <- ncol(demeaned_covars) + + interaction_matrix <- matrix(0, nrow = nrow(X), ncol = n_covars * n_treat_cols) + + for (i in 1:n_covars) { + cols <- (i - 1) * n_treat_cols + (1:n_treat_cols) + interaction_matrix[, cols] <- treatment * demeaned_covars[, i] + } X <- cbind( if (attr(rhs_terms, "intercept") == 1) { From 59d612ba8f0223c68f4d1c8ae0cc0fb7b1681e11 Mon Sep 17 00:00:00 2001 From: Molly Offer-Westort Date: Tue, 21 Jan 2025 21:45:27 -0600 Subject: [PATCH 07/14] tests for lm lin with no intercept and for prediction --- tests/testthat/test-lm-lin.R | 11 ++++- tests/testthat/test-lm-robust_margins.R | 9 +++-- tests/testthat/test-s3-methods.R | 53 +++++++++++++++++++++++++ 3 files changed, 68 insertions(+), 5 deletions(-) diff --git a/tests/testthat/test-lm-lin.R b/tests/testthat/test-lm-lin.R index 555f6edd..6a97e33c 100644 --- a/tests/testthat/test-lm-lin.R +++ b/tests/testthat/test-lm-lin.R @@ -308,10 +308,19 @@ test_that("Test LM Lin", { lmlo <- lm_lin(Y ~ z + 0, ~X1, data = dat) expect_equal( lmlo$term, - c("z", "X1_c", "z:X1_c") + c("z0", "z1", "z0:X1_c", "z1:X1_c") + ) + + # works with a multi-value treatment with no intercept + dat$z <- factor(rbinom(nrow(dat), 2, 0.5)) + lmlo <- lm_lin(Y ~ z + 0, ~X1, data = dat) + expect_equal( + lmlo$term, + c("z0", "z1", "z2", "z0:X1_c", "z1:X1_c", "z2:X1_c") ) # behaves correctly with "odd" covariates (see issue #283) + dat$z <- rbinom(nrow(dat), 1, 0.5) lmlo <- lm_lin(Y ~ z, ~ is.na(X1), data = dat) expect_equal( lmlo$term, diff --git a/tests/testthat/test-lm-robust_margins.R b/tests/testthat/test-lm-robust_margins.R index 433e6c38..0cfca407 100644 --- a/tests/testthat/test-lm-robust_margins.R +++ b/tests/testthat/test-lm-robust_margins.R @@ -97,17 +97,18 @@ test_that("lm robust + cluster can work with margins", { test_that("lm lin can work with margins", { skip_if_not_installed("margins") data("alo_star_men") - lml <- lm_lin(GPA_year1 ~ ssp, ~ gpa0, data = alo_star_men, se_type = "classical") + # instruct margins to treat treatment as a factor + lml <- lm_lin(GPA_year1 ~ factor(ssp), ~ gpa0, data = alo_star_men, se_type = "classical") alo_star_men$gpa0_tilde <- alo_star_men$gpa0 - mean(alo_star_men$gpa0) - lmo <- lm(GPA_year1 ~ ssp * gpa0_tilde, data = alo_star_men) + lmo <- lm(GPA_year1 ~ factor(ssp) * gpa0_tilde, data = alo_star_men) lml_sum <- margins:::summary.margins(margins::margins(lml, vce = "delta")) lmo_sum <- margins:::summary.margins(margins::margins(lmo, vce = "delta")) expect_equal( - round(lml_sum[, 4], 5), - round(lmo_sum[, 4], 5) + lml_sum[, 4], + lmo_sum[, 4], ) }) diff --git a/tests/testthat/test-s3-methods.R b/tests/testthat/test-s3-methods.R index 49b2a978..c47eda4d 100644 --- a/tests/testthat/test-s3-methods.R +++ b/tests/testthat/test-s3-methods.R @@ -643,6 +643,59 @@ test_that("predict works", { predict(lm_int_out, new_dat, se.fit = TRUE, interval = "confidence")[c(1, 2)] ) + # With and without factorial treatment, intercept, predictions should be the same + lml_out <- lm_lin(y ~ x, covariates = ~ z + I(z^2) +cl, data = dat, se_type = "classical") + lml_0_out <- lm_lin(y ~ x + 0, covariates = ~ z + I(z^2) + cl, data = dat, se_type = "classical") + lml_f_out <- lm_lin(y ~ factor(x), covariates = ~ z + I(z^2) + cl, data = dat, se_type = "classical") + lm_f0_out <- lm_lin(y ~ factor(x) + 0, covariates = ~ z + I(z^2) + cl, data = dat, se_type = "classical") + + expect_equivalent( + predict(lml_0_out, new_dat, se.fit = TRUE, interval = "confidence")[c(1, 2)], + predict(lml_out, new_dat, se.fit = TRUE, interval = "confidence")[c(1, 2)] + ) + expect_equivalent( + predict(lml_f_out, new_dat, se.fit = TRUE, interval = "confidence")[c(1, 2)], + predict(lml_out, new_dat, se.fit = TRUE, interval = "confidence")[c(1, 2)], + ) + + expect_equivalent( + predict(lml_f_out, new_dat, se.fit = TRUE, interval = "confidence")[c(1, 2)], + predict(lml_out, new_dat, se.fit = TRUE, interval = "confidence")[c(1, 2)] + ) + + expect_equivalent( + predict(lml_f_out, new_dat, se.fit = TRUE, interval = "confidence")[c(1, 2)], + predict(lm_f0_out, new_dat, se.fit = TRUE, interval = "confidence")[c(1, 2)] + ) + + # For multi-level treatments, with and without factorial treatment, intercept, predictions should be the same + dat$x_multi <- rep(0:2, times = 7)[1:20] + new_dat$x_multi <- rep(0:2, times = 7) + + lml_out <- lm_lin(y ~ x_multi, covariates = ~ z + I(z^2) +cl, data = dat, se_type = "classical") + lml_0_out <- lm_lin(y ~ x_multi + 0, covariates = ~ z + I(z^2) + cl, data = dat, se_type = "classical") + lml_f_out <- lm_lin(y ~ factor(x_multi), covariates = ~ z + I(z^2) + cl, data = dat, se_type = "classical") + lm_f0_out <- lm_lin(y ~ factor(x_multi) + 0, covariates = ~ z + I(z^2) + cl, data = dat, se_type = "classical") + + expect_equivalent( + predict(lml_0_out, new_dat, se.fit = TRUE, interval = "confidence")[c(1, 2)], + predict(lml_out, new_dat, se.fit = TRUE, interval = "confidence")[c(1, 2)] + ) + expect_equivalent( + predict(lml_f_out, new_dat, se.fit = TRUE, interval = "confidence")[c(1, 2)], + predict(lml_out, new_dat, se.fit = TRUE, interval = "confidence")[c(1, 2)], + ) + + expect_equivalent( + predict(lml_f_out, new_dat, se.fit = TRUE, interval = "confidence")[c(1, 2)], + predict(lml_out, new_dat, se.fit = TRUE, interval = "confidence")[c(1, 2)] + ) + + expect_equivalent( + predict(lml_f_out, new_dat, se.fit = TRUE, interval = "confidence")[c(1, 2)], + predict(lm_f0_out, new_dat, se.fit = TRUE, interval = "confidence")[c(1, 2)] + ) + # working with rank deficient X head(dat) dat$z2 <- dat$z From 4c9b28f89e11deb6ca07f549fc720b88064a065b Mon Sep 17 00:00:00 2001 From: Molly Offer-Westort Date: Wed, 22 Jan 2025 16:38:26 -0600 Subject: [PATCH 08/14] adds lm_lin prediction examples --- R/S3_predict.R | 21 +++++++++++++++++++++ R/estimatr_lm_lin.R | 21 ++++++++++++++++++--- man/lm_lin.Rd | 21 ++++++++++++++++++--- man/predict.lm_robust.Rd | 21 +++++++++++++++++++++ 4 files changed, 78 insertions(+), 6 deletions(-) diff --git a/R/S3_predict.R b/R/S3_predict.R index 1db06e67..5aa47407 100644 --- a/R/S3_predict.R +++ b/R/S3_predict.R @@ -61,6 +61,27 @@ #' new_dat$w <- runif(n) #' predict(lm_out, newdata = new_dat, weights = w, interval = "prediction") #' +#' # Works for 'lm_lin' models as well +#' dat$z <- sample(1:3, size = nrow(dat), replace = TRUE) +#' lmlin_out1 <- lm_lin(y ~ z, covariates = ~ x, data = dat) +#' predict(lmlin_out1, newdata = dat, interval = "prediction") +#' +#' # Predictions from Lin models are equivalent with and without an intercept +#' # and for multi-level treatments entered as numeric or factor variables +#' lmlin_out2 <- lm_lin(y ~ z - 1, covariates = ~ x, data = dat) +#' lmlin_out3 <- lm_lin(y ~ factor(z), covariates = ~ x, data = dat) +#' lmlin_out4 <- lm_lin(y ~ factor(z) - 1, covariates = ~ x, data = dat) +#' +#' predict(lmlin_out2, newdata = dat, interval = "prediction") +#' predict(lmlin_out3, newdata = dat, interval = "prediction") +#' predict(lmlin_out4, newdata = dat, interval = "prediction") +#' +#' # In Lin models, predict will stop with an error message if new +#' # treatment levels are supplied in the new data +#' new_dat$z <- sample(0:3, size = nrow(new_dat), replace = TRUE) +#' # predict(lmlin_out, newdata = new_dat) +#' +#' #' @export predict.lm_robust <- function(object, newdata, diff --git a/R/estimatr_lm_lin.R b/R/estimatr_lm_lin.R index cad486d2..23ce146c 100644 --- a/R/estimatr_lm_lin.R +++ b/R/estimatr_lm_lin.R @@ -83,7 +83,7 @@ #' \item{weighted}{whether or not weights were applied} #' \item{call}{the original function call} #' \item{fitted.values}{the matrix of predicted means} -#' We also return \code{terms} and \code{contrasts}, used by \code{predict}, +#' We also return \code{terms}, \code{contrasts}, and \code{treatment_levels}, used by \code{predict}, #' and \code{scaled_center} (the means of each of the covariates used for centering them). #' #' @seealso \code{\link{lm_robust}} @@ -127,9 +127,12 @@ #' #' lm_lin(y ~ z_clust, covariates = ~ x, data = dat, clusters = clusterID) #' -#' # Works with multi-valued treatments +#' # Works with multi-valued treatments, whether treatment is specified as a +#' # factor or not #' dat$z_multi <- sample(1:3, size = nrow(dat), replace = TRUE) +#' #' lm_lin(y ~ z_multi, covariates = ~ x, data = dat) +#' lm_lin(y ~ factor(z_multi), covariates = ~ x, data = dat) #' #' # Stratified estimator with blocks #' dat$blockID <- rep(1:5, each = 8) @@ -137,11 +140,23 @@ #' #' lm_lin(y ~ z_block, ~ factor(blockID), data = dat) #' +#' # Fitting the model without an intercept provides estimates of mean outcomes +#' # under each respective treatment condition +#' lm_lin(y ~ z_multi - 1, covariates = ~ x, data = dat) +#' +#' # Predictions are the same in equivalent models with and without an intercept +#' lmlin_out3 <- lm_lin(y ~ z_multi - 1, covariates = ~ x, data = dat) +#' lmlin_out4 <- lm_lin(y ~ z_multi, covariates = ~ x, data = dat) +#' +#' predict(lmlin_out3, newdata = dat, se.fit = TRUE, interval = "confidence") +#' predict(lmlin_out4, newdata = dat, se.fit = TRUE, interval = "confidence") +#' #' \dontrun{ #' # Can also use 'margins' package if you have it installed to get #' # marginal effects #' library(margins) -#' lmlout <- lm_lin(y ~ z_block, ~ x, data = dat) +#' # Instruct 'margins' to treat z as a factor +#' lmlout <- lm_lin(y ~ factor(z_block), ~ x, data = dat) #' summary(margins(lmlout)) #' #' # Can output results using 'texreg' diff --git a/man/lm_lin.Rd b/man/lm_lin.Rd index 14e8c147..8ba0149f 100644 --- a/man/lm_lin.Rd +++ b/man/lm_lin.Rd @@ -95,7 +95,7 @@ following components: \item{weighted}{whether or not weights were applied} \item{call}{the original function call} \item{fitted.values}{the matrix of predicted means} -We also return \code{terms} and \code{contrasts}, used by \code{predict}, +We also return \code{terms}, \code{contrasts}, and \code{treatment_levels}, used by \code{predict}, and \code{scaled_center} (the means of each of the covariates used for centering them). } \description{ @@ -149,9 +149,12 @@ dat$z_clust <- cluster_ra(clusters = dat$clusterID) lm_lin(y ~ z_clust, covariates = ~ x, data = dat, clusters = clusterID) -# Works with multi-valued treatments +# Works with multi-valued treatments, whether treatment is specified as a +# factor or not dat$z_multi <- sample(1:3, size = nrow(dat), replace = TRUE) + lm_lin(y ~ z_multi, covariates = ~ x, data = dat) +lm_lin(y ~ factor(z_multi), covariates = ~ x, data = dat) # Stratified estimator with blocks dat$blockID <- rep(1:5, each = 8) @@ -159,11 +162,23 @@ dat$z_block <- block_ra(blocks = dat$blockID) lm_lin(y ~ z_block, ~ factor(blockID), data = dat) +# Fitting the model without an intercept provides estimates of mean outcomes +# under each respective treatment condition +lm_lin(y ~ z_multi - 1, covariates = ~ x, data = dat) + +# Predictions are the same in equivalent models with and without an intercept +lmlin_out3 <- lm_lin(y ~ z_multi - 1, covariates = ~ x, data = dat) +lmlin_out4 <- lm_lin(y ~ z_multi, covariates = ~ x, data = dat) + +predict(lmlin_out3, newdata = dat, se.fit = TRUE, interval = "confidence") +predict(lmlin_out4, newdata = dat, se.fit = TRUE, interval = "confidence") + \dontrun{ # Can also use 'margins' package if you have it installed to get # marginal effects library(margins) - lmlout <- lm_lin(y ~ z_block, ~ x, data = dat) + # Instruct 'margins' to treat z as a factor + lmlout <- lm_lin(y ~ factor(z_block), ~ x, data = dat) summary(margins(lmlout)) # Can output results using 'texreg' diff --git a/man/predict.lm_robust.Rd b/man/predict.lm_robust.Rd index 27792d4e..43257bcd 100644 --- a/man/predict.lm_robust.Rd +++ b/man/predict.lm_robust.Rd @@ -90,4 +90,25 @@ predict(lm_out, newdata = new_dat) new_dat$w <- runif(n) predict(lm_out, newdata = new_dat, weights = w, interval = "prediction") +# Works for 'lm_lin' models as well +dat$z <- sample(1:3, size = nrow(dat), replace = TRUE) +lmlin_out1 <- lm_lin(y ~ z, covariates = ~ x, data = dat) +predict(lmlin_out1, newdata = dat, interval = "prediction") + +# Predictions from Lin models are equivalent with and without an intercept +# and for multi-level treatments entered as numeric or factor variables +lmlin_out2 <- lm_lin(y ~ z - 1, covariates = ~ x, data = dat) +lmlin_out3 <- lm_lin(y ~ factor(z), covariates = ~ x, data = dat) +lmlin_out4 <- lm_lin(y ~ factor(z) - 1, covariates = ~ x, data = dat) + +predict(lmlin_out2, newdata = dat, interval = "prediction") +predict(lmlin_out3, newdata = dat, interval = "prediction") +predict(lmlin_out4, newdata = dat, interval = "prediction") + +# In Lin models, predict will stop with an error message if new +# treatment levels are supplied in the new data +new_dat$z <- sample(0:3, size = nrow(new_dat), replace = TRUE) +# predict(lmlin_out, newdata = new_dat) + + } From 3d0266ccee346dd1fff8db58ddc1fbbac1624a80 Mon Sep 17 00:00:00 2001 From: Graeme Blair Date: Wed, 5 Feb 2025 09:08:46 -0800 Subject: [PATCH 09/14] cran prep --- CRAN-SUBMISSION | 3 --- DESCRIPTION | 5 +++-- estimatr.Rproj | 1 + man/estimatr.Rd | 2 +- man/horvitz_thompson.Rd | 5 ++--- 5 files changed, 7 insertions(+), 9 deletions(-) delete mode 100644 CRAN-SUBMISSION diff --git a/CRAN-SUBMISSION b/CRAN-SUBMISSION deleted file mode 100644 index 0080f95a..00000000 --- a/CRAN-SUBMISSION +++ /dev/null @@ -1,3 +0,0 @@ -Version: 1.0.2 -Date: 2024-01-17 06:28:29 UTC -SHA: 70b42698119d523bbacc9294c9238b7014715e7a diff --git a/DESCRIPTION b/DESCRIPTION index 99957251..be47902a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -9,7 +9,8 @@ Authors@R: c(person("Graeme", "Blair", email = "graeme.blair@gmail.com", role = person("Luke", "Sonnet", email = "luke.sonnet@gmail.com", role = c("aut")), person("Neal", "Fultz", email = "nfultz@gmail.com", role = c("ctb")), person("Lily", "Medina", email = "lilymiru@gmail.com", role = c("ctb")), - person("Russell", "Lenth", email = "russell-lenth@uiowa.edu", role = c("ctb"))) + person("Russell", "Lenth", email = "russell-lenth@uiowa.edu", role = c("ctb")), + person("Molly", "Offer-Westort", email = "mollyow@uchicago.edu", role = c("ctb"))) Description: Fast procedures for small set of commonly-used, design-appropriate estimators with robust standard errors and confidence intervals. Includes estimators for linear regression, instrumental variables regression, difference-in-means, Horvitz-Thompson estimation, and regression improving precision of experimental estimates by interacting treatment with centered pre-treatment covariates introduced by Lin (2013) . URL: https://declaredesign.org/r/estimatr/, https://github.com/DeclareDesign/estimatr BugReports: https://github.com/DeclareDesign/estimatr/issues @@ -23,7 +24,7 @@ Imports: rlang (>= 0.2.0) LinkingTo: Rcpp, RcppEigen Encoding: UTF-8 -RoxygenNote: 7.2.3 +RoxygenNote: 7.3.2 LazyData: true Suggests: fabricatr (>= 0.10.0), diff --git a/estimatr.Rproj b/estimatr.Rproj index 57bf3f9f..1c4d969c 100644 --- a/estimatr.Rproj +++ b/estimatr.Rproj @@ -1,4 +1,5 @@ Version: 1.0 +ProjectId: 2690710e-30ff-4c33-a75e-7b4e02172a9d RestoreWorkspace: Default SaveWorkspace: Default diff --git a/man/estimatr.Rd b/man/estimatr.Rd index 2c51407c..a7aedcdf 100644 --- a/man/estimatr.Rd +++ b/man/estimatr.Rd @@ -2,8 +2,8 @@ % Please edit documentation in R/estimatr.R \docType{package} \name{estimatr} -\alias{estimatr} \alias{estimatr-package} +\alias{estimatr} \title{estimatr} \description{ Fast procedures for small set of commonly-used, design-appropriate estimators with robust standard errors and confidence intervals. Includes estimators for linear regression, instrumental variables regression, difference-in-means, Horvitz-Thompson estimation, and regression improving precision of experimental estimates by interacting treatment with centered pre-treatment covariates introduced by Lin (2013) . diff --git a/man/horvitz_thompson.Rd b/man/horvitz_thompson.Rd index abab3036..14cdb2f3 100644 --- a/man/horvitz_thompson.Rd +++ b/man/horvitz_thompson.Rd @@ -123,9 +123,8 @@ In short, the Horvitz-Thompson estimator essentially reweights each unit by the probability of it being in its observed condition. Pivotal to the estimation of treatment effects using this estimator are the marginal condition probabilities (i.e., the probability that any one unit is in -a particular treatment condition). Pivotal to the estimating the variance -variance whenever the design is more complicated than simple randomization, -are the +a particular treatment condition). Pivotal to estimating the variance +whenever the design is more complicated than simple randomization are the joint condition probabilities (i.e., the probabilities that any two units have a particular set of treatment conditions, either the same or different). The estimator we provide here considers the case with two From d4bd29b88a245d3a5678b9bd9e6c01b83e653ca0 Mon Sep 17 00:00:00 2001 From: Graeme Blair Date: Wed, 5 Feb 2025 09:12:24 -0800 Subject: [PATCH 10/14] news & version bump --- DESCRIPTION | 2 +- NEWS.md | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 3ed81f3a..bab771a8 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: estimatr Type: Package Title: Fast Estimators for Design-Based Inference -Version: 1.0.4 +Version: 1.0.6 Authors@R: c(person("Graeme", "Blair", email = "graeme.blair@gmail.com", role = c("aut", "cre")), person("Jasper", "Cooper", email = "jjc2247@columbia.edu", role = c("aut")), person("Alexander", "Coppock", email = "alex.coppock@yale.edu", role = c("aut")), diff --git a/NEWS.md b/NEWS.md index f7283b6d..77de895d 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,13 @@ +# estimatr 1.0.6 + +* Allows for prediction with lm_lin() when treatment is a factor and/or multi-valued. +* Adds saved treatment_levels to the returned lm_lin model object. +* Stops prediction for lm_lin if the treatment values in new data are not a subset of treatment_levels. +* Standardizes model fit for lm_lin() models with no intercept. +* Adds tests to ensure identical predictions from lm_lin() models where treatment is either numeric or factorial, and fit with/without an intercept. +* Adds relevant examples to predict and lm_robust and lm_lin documentation. +* Adds Molly Offer-Westort as a contributor. + # estimatr 1.0.4 * Test suite changes for M1 mac stay current on CRAN. From b824e8750bb0d8339d4464cfdf67402813e69071 Mon Sep 17 00:00:00 2001 From: Graeme Blair Date: Wed, 5 Feb 2025 09:14:50 -0800 Subject: [PATCH 11/14] docs --- man/estimatr.Rd | 1 + 1 file changed, 1 insertion(+) diff --git a/man/estimatr.Rd b/man/estimatr.Rd index a7aedcdf..7c86faad 100644 --- a/man/estimatr.Rd +++ b/man/estimatr.Rd @@ -33,6 +33,7 @@ Other contributors: \item Neal Fultz \email{nfultz@gmail.com} [contributor] \item Lily Medina \email{lilymiru@gmail.com} [contributor] \item Russell Lenth \email{russell-lenth@uiowa.edu} [contributor] + \item Molly Offer-Westort \email{mollyow@uchicago.edu} [contributor] } } From 121cb5fd74b2c1703a4662051f44cfd456630ff7 Mon Sep 17 00:00:00 2001 From: Graeme Blair Date: Wed, 5 Feb 2025 09:34:47 -0800 Subject: [PATCH 12/14] test fix --- tests/testthat/test-lm-robust_margins.R | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/testthat/test-lm-robust_margins.R b/tests/testthat/test-lm-robust_margins.R index 0cfca407..76140dd4 100644 --- a/tests/testthat/test-lm-robust_margins.R +++ b/tests/testthat/test-lm-robust_margins.R @@ -110,5 +110,6 @@ test_that("lm lin can work with margins", { expect_equal( lml_sum[, 4], lmo_sum[, 4], + tolerance = 0.000001 ) }) From 3f14afbd61f65d700df45f431d5c326f5940d065 Mon Sep 17 00:00:00 2001 From: Graeme Blair Date: Wed, 5 Feb 2025 09:34:51 -0800 Subject: [PATCH 13/14] githb acion --- .github/workflows/R-CMD-check.yaml | 95 +++++++++++------------------- 1 file changed, 34 insertions(+), 61 deletions(-) diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index 85cd7f2e..69cfc6ad 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -1,12 +1,17 @@ +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help +# +# NOTE: This workflow is overkill for most R packages and +# check-standard.yaml is likely a better choice. +# usethis::use_github_action("check-standard") will install it. on: push: - branches: - - master + branches: [main, master] pull_request: - branches: - - master -name: R-CMD-check +name: R-CMD-check.yaml + +permissions: read-all jobs: R-CMD-check: @@ -18,72 +23,40 @@ jobs: fail-fast: false matrix: config: - - {os: macOS-latest, r: 'devel'} - - {os: macOS-latest, r: 'release'} - - {os: macOS-latest, r: 'oldrel'} - - {os: windows-latest, r: 'devel'} + - {os: macos-latest, r: 'release'} + - {os: windows-latest, r: 'release'} - - {os: windows-latest, r: 'oldrel'} - - {os: windows-latest, r: '3.5'} - - {os: ubuntu-16.04, r: 'devel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/xenial/latest"} - - {os: ubuntu-16.04, r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/xenial/latest"} - - {os: ubuntu-16.04, r: 'oldrel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/xenial/latest"} - - {os: ubuntu-16.04, r: '3.5', rspm: "https://packagemanager.rstudio.com/cran/__linux__/xenial/latest"} - - {os: ubuntu-16.04, r: '3.4', rspm: "https://packagemanager.rstudio.com/cran/__linux__/xenial/latest"} - - {os: ubuntu-16.04, r: '3.3', rspm: "https://packagemanager.rstudio.com/cran/__linux__/xenial/latest"} + # use 4.0 or 4.1 to check with rtools40's older compiler + - {os: windows-latest, r: 'oldrel-4'} + - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} + - {os: ubuntu-latest, r: 'release'} + - {os: ubuntu-latest, r: 'oldrel-1'} + - {os: ubuntu-latest, r: 'oldrel-2'} + - {os: ubuntu-latest, r: 'oldrel-3'} + - {os: ubuntu-latest, r: 'oldrel-4'} env: - R_REMOTES_NO_ERRORS_FROM_WARNINGS: true - RSPM: ${{ matrix.config.rspm }} + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + R_KEEP_PKG_SOURCE: yes steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - - uses: r-lib/actions/setup-r@master + - uses: r-lib/actions/setup-pandoc@v2 + + - uses: r-lib/actions/setup-r@v2 with: r-version: ${{ matrix.config.r }} + http-user-agent: ${{ matrix.config.http-user-agent }} + use-public-rspm: true - - uses: r-lib/actions/setup-pandoc@master - - - name: Query dependencies - run: | - install.packages('remotes') - saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2) - shell: Rscript {0} - - - name: Cache R packages - if: runner.os != 'Windows' - uses: actions/cache@v1 + - uses: r-lib/actions/setup-r-dependencies@v2 with: - path: ${{ env.R_LIBS_USER }} - key: ${{ runner.os }}-r-new-${{ matrix.config.r }}-1-${{ hashFiles('.github/depends.Rds') }} - restore-keys: ${{ runner.os }}-r-${{ matrix.config.r }}-1- - - - name: Install system dependencies - if: runner.os == 'Linux' - env: - RHUB_PLATFORM: linux-x86_64-ubuntu-gcc - run: | - Rscript -e "remotes::install_github('r-hub/sysreqs')" - sysreqs=$(Rscript -e "cat(sysreqs::sysreq_commands('DESCRIPTION'))") - sudo -s eval "$sysreqs" - - - name: Install dependencies - run: | - remotes::install_deps(dependencies = TRUE) - remotes::install_cran("rcmdcheck") - shell: Rscript {0} - - - name: Check - env: - _R_CHECK_CRAN_INCOMING_: false - run: rcmdcheck::rcmdcheck(args = c("--no-manual", "--as-cran"), error_on = "warning", check_dir = "check") - shell: Rscript {0} + extra-packages: any::rcmdcheck + needs: check - - name: Upload check results - if: failure() - uses: actions/upload-artifact@master + - uses: r-lib/actions/check-r-package@v2 with: - name: ${{ runner.os }}-r${{ matrix.config.r }}-results - path: check + upload-snapshots: true + build_args: 'c("--no-manual","--compact-vignettes=gs+qpdf")' From 8f229d697f9daddcb60126ee4a093e725d5b72c6 Mon Sep 17 00:00:00 2001 From: Graeme Blair Date: Fri, 28 Feb 2025 09:21:08 -0800 Subject: [PATCH 14/14] remove zzz broom test --- tests/testthat/test-zzzbroom.R | 13 ------------- 1 file changed, 13 deletions(-) delete mode 100644 tests/testthat/test-zzzbroom.R diff --git a/tests/testthat/test-zzzbroom.R b/tests/testthat/test-zzzbroom.R deleted file mode 100644 index 05431c30..00000000 --- a/tests/testthat/test-zzzbroom.R +++ /dev/null @@ -1,13 +0,0 @@ -# added to .Rbuildignore to keep CRAN from complaining that broom -# isn't a SUGGESTed package -context("zzzbroom.R - .onAttach") - -test_that(".onLoad message if old version of 'broom' is installed", { - skip_if_not_installed("broom") - skip_if(packageVersion("broom") > "0.5.0") - library(broom) - expect_message( - .onLoad("estimatr", "estimatr"), - "the `broom` package version 0.5.0 or earlier is loaded" - ) -})