From 4fe74ccda7c718a5e249d6b02daa734c128083f8 Mon Sep 17 00:00:00 2001 From: Luke Sonnet Date: Mon, 22 Oct 2018 08:45:11 -0700 Subject: [PATCH 1/7] Add CR3 SEs --- R/helper_lm_robust_fit.R | 15 +++++----- src/lm_robust_helper.cpp | 48 +++++++++++++++++++++----------- tests/testthat/test-lm-cluster.R | 36 ++++++++++++++++++++++-- 3 files changed, 73 insertions(+), 26 deletions(-) diff --git a/R/helper_lm_robust_fit.R b/R/helper_lm_robust_fit.R index f689c4b5..6d3e3d3b 100644 --- a/R/helper_lm_robust_fit.R +++ b/R/helper_lm_robust_fit.R @@ -72,9 +72,9 @@ lm_robust_fit <- function(y, se_type <- check_se_type(se_type, clustered) - if (weighted && se_type == "CR2" && fes) { + if (weighted && se_type %in% c("CR2", "CR3") && fes) { stop( - "Cannot use `fixed_effects` with weighted CR2 estimation at the moment. ", + "Cannot use `fixed_effects` with weighted CR2 or CR3 estimation at the moment. ", "Try setting `se_type` = \"stata\"" ) } @@ -175,9 +175,10 @@ lm_robust_fit <- function(y, data[["yunweighted"]] - fit_vals[["fitted.values.unweighted"]] ) + print(se_type) # For CR2 need X weighted by weights again # so that instead of having X * sqrt(W) we have X * W - if (se_type == "CR2") { + if (se_type %in% c("CR2", "CR3")) { data[["X"]] <- data[["weights"]] * data[["X"]] if (fes) { data[["femat"]] <- data[["weights"]] * data[["femat"]] @@ -200,14 +201,14 @@ lm_robust_fit <- function(y, if (se_type != "none") { vcov_fit <- lm_variance( - X = if (se_type %in% c("HC2", "HC3", "CR2") && fes) + X = if (se_type %in% c("HC2", "HC3", "CR2", "CR3") && fes) cbind(data[["X"]], data[["femat"]]) else data[["X"]], - Xunweighted = if (se_type %in% c("HC2", "HC3", "CR2") && fes && weighted) + Xunweighted = if (se_type %in% c("HC2", "HC3", "CR2", "CR3") && fes && weighted) cbind(data[["Xunweighted"]], data[["fematunweighted"]]) else data[["Xunweighted"]], XtX_inv = fit$XtX_inv, - ei = if (se_type == "CR2" && weighted) + ei = if (se_type %in% c("CR2", "CR3") && weighted) fit_vals[["ei.unweighted"]] else fit_vals[["ei"]], weight_mean = data[["weight_mean"]], @@ -388,7 +389,7 @@ lm_robust_fit <- function(y, check_se_type <- function(se_type, clustered) { # Allowable se_types with clustering - cl_se_types <- c("CR0", "CR2", "stata") + cl_se_types <- c("CR0", "CR2", "CR3", "CR1s", "stata") rob_se_types <- c("HC0", "HC1", "HC2", "HC3", "classical", "stata") # Parse cluster variable diff --git a/src/lm_robust_helper.cpp b/src/lm_robust_helper.cpp index 4cbb54a2..84c631c9 100644 --- a/src/lm_robust_helper.cpp +++ b/src/lm_robust_helper.cpp @@ -247,7 +247,7 @@ List lm_variance(Eigen::Map& X, const int n(X.rows()), r(XtX_inv.cols()), ny(ei.cols()); // Rcout << "X:" << std::endl << X << std::endl; int r_fe = r + fe_rank; - const bool clustered = ((se_type == "stata") || (se_type == "CR0") || (se_type == "CR2")); + const bool clustered = ((se_type == "stata") || (se_type == "CR0") || (se_type == "CR2") || (se_type == "CR3")); const int npars = r * ny; int sandwich_size = n; if (clustered) { @@ -291,7 +291,7 @@ List lm_variance(Eigen::Map& X, } Eigen::MatrixXd meatXtX_inv; - if ((se_type == "HC2") || (se_type == "HC3") || (se_type == "CR2")) { + if ((se_type == "HC2") || (se_type == "HC3") || (se_type == "CR2") || (se_type == "CR3")) { if (X.cols() > r) { meatXtX_inv = getMeatXtX(X, XtX_inv); r_fe = meatXtX_inv.cols(); @@ -324,7 +324,6 @@ List lm_variance(Eigen::Map& X, } // Rcout << "temp_omega:" << std::endl << temp_omega << std::endl; - for (int m = 0; m < ny; m++) { if (ny > 1) { // Preserve signs for off-diagonal vcov blocks in mlm @@ -337,7 +336,7 @@ List lm_variance(Eigen::Map& X, } else { // clustered - if (se_type == "CR2") { + if ((se_type == "CR2") || (se_type == "CR3")) { Xoriginal.resize(n, r); if (Xunweighted.isNotNull()) { Xoriginal = Rcpp::as >(Xunweighted); @@ -345,14 +344,16 @@ List lm_variance(Eigen::Map& X, Xoriginal = X; } - H1s.resize(r_fe, r_fe*J); - H2s.resize(r_fe, r_fe*J); - H3s.resize(r_fe, r_fe*J); - P_diags.resize(r_fe, J); + if (se_type == "CR2") { + H1s.resize(r_fe, r_fe*J); + H2s.resize(r_fe, r_fe*J); + H3s.resize(r_fe, r_fe*J); + P_diags.resize(r_fe, J); - M_U_ct = meatXtX_inv.llt().matrixL(); - MUWTWUM = meatXtX_inv * X.leftCols(r_fe).transpose() * X.leftCols(r_fe) * meatXtX_inv; - Omega_ct = MUWTWUM.llt().matrixL(); + M_U_ct = meatXtX_inv.llt().matrixL(); + MUWTWUM = meatXtX_inv * X.leftCols(r_fe).transpose() * X.leftCols(r_fe) * meatXtX_inv; + Omega_ct = MUWTWUM.llt().matrixL(); + } } Eigen::Map clusters = Rcpp::as >(cluster); @@ -426,16 +427,32 @@ List lm_variance(Eigen::Map& X, H2s.block(0, p_pos, r_fe, r_fe) = ME * X.block(start_pos, 0, len, r_fe) * M_U_ct; H3s.block(0, p_pos, r_fe, r_fe) = MEU * Omega_ct; } + + } else if (se_type == "CR3") { + + Eigen::ColPivHouseholderQR At( + Eigen::MatrixXd::Identity(len, len) - + Xoriginal.block(start_pos, 0, len, r_fe) * + meatXtX_inv * + X.block(start_pos, 0, len, r_fe).transpose() + ); + + if (Xunweighted.isNotNull()) { + At_WX_inv = At.solve(Eigen::MatrixXd::Identity(len, len)).transpose() * X.block(start_pos, 0, len, r_fe); + } else { + // Could speed up using LLT instead of QR for unweighted case as At is symmetric in that case + At_WX_inv = At.solve(X.block(start_pos, 0, len, r_fe)); + } + + } if (ny > 1) { - // Stack residuals for this cluster from each model - // Rcout << "len: " << len << std::endl; Eigen::MatrixXd ei_block = ei.block(start_pos, 0, len, ny); Eigen::Map ei_long(ei_block.data(), 1, len*ny); - if (se_type == "CR2") { + if ((se_type == "CR2") || (se_type == "CR3")) { half_meat.block(clust_num, 0, 1, npars) = ei_long * Kr(Eigen::MatrixXd::Identity(ny, ny), At_WX_inv.leftCols(r)); @@ -447,7 +464,7 @@ List lm_variance(Eigen::Map& X, } else { - if (se_type == "CR2") { + if (se_type == "CR2" || se_type == "CR3") { half_meat.row(clust_num) = ei.block(start_pos, 0, len, 1).transpose() * At_WX_inv.leftCols(r); @@ -456,7 +473,6 @@ List lm_variance(Eigen::Map& X, ei.block(start_pos, 0, len, 1).transpose() * X.block(start_pos, 0, len, r); } - } if (i < n) { current_cluster = clusters(i); diff --git a/tests/testthat/test-lm-cluster.R b/tests/testthat/test-lm-cluster.R index 82039379..4584a5b7 100644 --- a/tests/testthat/test-lm-cluster.R +++ b/tests/testthat/test-lm-cluster.R @@ -81,16 +81,15 @@ test_that("lm cluster se", { qt(0.975, df = length(unique(dat$J)) - 1) * bm_interact$se.Stata["Z:X"] * c(-1, 1) expect_equivalent( - tidy(lm_interact)[4, c("std.error", "conf.low", "conf.high")], + as.numeric(tidy(lm_interact)[4, c("std.error", "conf.low", "conf.high")]), c(bm_interact$se["Z:X"], bm_interact_interval) ) expect_equivalent( - tidy(lm_interact_stata)[4, c("std.error", "conf.low", "conf.high")], + as.numeric(tidy(lm_interact_stata)[4, c("std.error", "conf.low", "conf.high")]), c(bm_interact$se.Stata["Z:X"], bm_interact_stata_interval) ) - lm_full <- lm_robust( Y ~ Z + X, @@ -183,6 +182,7 @@ test_that("lm cluster se", { }) test_that("Clustered weighted SEs are correct", { + lm_cr3 <- lm_robust(mpg ~ hp, data = mtcars, weights = wt, clusters = cyl, se_type = "CR3") lm_cr2 <- lm_robust(mpg ~ hp, data = mtcars, weights = wt, clusters = cyl, se_type = "CR2") lm_stata <- lm_robust(mpg ~ hp, data = mtcars, weights = wt, clusters = cyl, se_type = "stata") lm_cr0 <- lm_robust(mpg ~ hp, data = mtcars, weights = wt, clusters = cyl, se_type = "CR0") @@ -193,6 +193,36 @@ test_that("Clustered weighted SEs are correct", { vcov(lm_cr2), as.matrix(clubSandwich::vcovCR(lm_o, cluster = mtcars$cyl, type = "CR2")) ) + expect_equivalent( + vcov(lm_cr3), + as.matrix(clubSandwich::vcovCR(lm_o, cluster = mtcars$cyl, type = "CR3")) + ) + expect_equivalent( + vcov(lm_cr0), + as.matrix(clubSandwich::vcovCR(lm_o, cluster = mtcars$cyl, type = "CR0")) + ) + expect_equivalent( + vcov(lm_stata), + as.matrix(clubSandwich::vcovCR(lm_o, cluster = mtcars$cyl, type = "CR1S")) + ) +}) + +test_that("Clustered SEs match clubSandwich", { + lm_cr3 <- lm_robust(mpg ~ hp, data = mtcars, clusters = cyl, se_type = "CR3") + lm_cr2 <- lm_robust(mpg ~ hp, data = mtcars, clusters = cyl, se_type = "CR2") + lm_stata <- lm_robust(mpg ~ hp, data = mtcars, clusters = cyl, se_type = "stata") + lm_cr0 <- lm_robust(mpg ~ hp, data = mtcars, clusters = cyl, se_type = "CR0") + + lm_o <- lm(mpg ~ hp, data = mtcars) + + expect_equivalent( + vcov(lm_cr2), + as.matrix(clubSandwich::vcovCR(lm_o, cluster = mtcars$cyl, type = "CR2")) + ) + expect_equivalent( + vcov(lm_cr3), + as.matrix(clubSandwich::vcovCR(lm_o, cluster = mtcars$cyl, type = "CR3")) + ) expect_equivalent( vcov(lm_cr0), as.matrix(clubSandwich::vcovCR(lm_o, cluster = mtcars$cyl, type = "CR0")) From 65959f12ec260cc3745602440ad93eaeb3a4bb61 Mon Sep 17 00:00:00 2001 From: Luke Sonnet Date: Mon, 22 Oct 2018 14:38:54 -0700 Subject: [PATCH 2/7] debugging CR3 --- R/helper_lm_robust_fit.R | 21 +- R/helper_starprep.R | 18 +- src/lm_robust_helper.cpp | 43 ++- tests/testthat/test-lm-cluster.R | 184 +++++----- tests/testthat/test-starprep.R | 601 +++++++------------------------ 5 files changed, 283 insertions(+), 584 deletions(-) diff --git a/R/helper_lm_robust_fit.R b/R/helper_lm_robust_fit.R index 6d3e3d3b..2a9069ce 100644 --- a/R/helper_lm_robust_fit.R +++ b/R/helper_lm_robust_fit.R @@ -175,7 +175,6 @@ lm_robust_fit <- function(y, data[["yunweighted"]] - fit_vals[["fitted.values.unweighted"]] ) - print(se_type) # For CR2 need X weighted by weights again # so that instead of having X * sqrt(W) we have X * W if (se_type %in% c("CR2", "CR3")) { @@ -187,8 +186,6 @@ lm_robust_fit <- function(y, } - - # Also need second stage residuals for fstat if (iv_stage[[1]] == 2) { fit_vals[["fitted.values.iv"]] <- as.matrix(data[["X"]] %*% fit$beta_hat) @@ -219,7 +216,23 @@ lm_robust_fit <- function(y, which_covs = which_covs[covs_used], fe_rank = fe_rank ) - + return_list$thing <- list( X = if (se_type %in% c("HC2", "HC3", "CR2", "CR3") && fes) + cbind(data[["X"]], data[["femat"]]) + else data[["X"]], + Xunweighted = if (se_type %in% c("HC2", "HC3", "CR2", "CR3") && fes && weighted) + cbind(data[["Xunweighted"]], data[["fematunweighted"]]) + else data[["Xunweighted"]], + XtX_inv = fit$XtX_inv, + ei = if (se_type %in% c("CR2", "CR3") && weighted) + fit_vals[["ei.unweighted"]] + else fit_vals[["ei"]], + weight_mean = data[["weight_mean"]], + cluster = data[["cluster"]], + J = data[["J"]], + ci = ci, + se_type = se_type, + which_covs = which_covs[covs_used], + fe_rank = fe_rank) return_list$std.error[est_exists] <- sqrt(diag(vcov_fit$Vcov_hat)) if (ci) { diff --git a/R/helper_starprep.R b/R/helper_starprep.R index 2d03f7ff..93d71428 100644 --- a/R/helper_starprep.R +++ b/R/helper_starprep.R @@ -93,7 +93,7 @@ commarobust <- function(model, XtX_inv <- data[["weight_mean"]] * XtX_inv # Need unweighted resid and need to reweight X - if (se_type == "CR2") { + if (se_type %in% c("CR2", "CR3")) { eiunweighted <- as.matrix(data[["yunweighted"]] - data[["Xunweighted"]] %*% coefs) data[["X"]] <- data[["weights"]] * data[["X"]] @@ -104,7 +104,7 @@ commarobust <- function(model, X = data[["X"]], Xunweighted = data[["Xunweighted"]], XtX_inv = XtX_inv, - ei = if (se_type == "CR2" && weighted) eiunweighted else ei, + ei = if (se_type %in% c("CR2", "CR3") && weighted) eiunweighted else ei, weight_mean = data[["weight_mean"]], cluster = data[["cluster"]], J = data[["J"]], @@ -113,10 +113,22 @@ commarobust <- function(model, which_covs = rep(TRUE, model$rank), fe_rank = 0 ) - + #print(vcov_fit) ## Build return_list return_list <- list( + thing = list( X = data[["X"]], + Xunweighted = data[["Xunweighted"]], + XtX_inv = XtX_inv, + ei = if (se_type %in% c("CR2", "CR3") && weighted) eiunweighted else ei, + weight_mean = data[["weight_mean"]], + cluster = data[["cluster"]], + J = data[["J"]], + ci = ci, + se_type = se_type, + which_covs = rep(TRUE, model$rank), + fe_rank = 0 + ), coefficients = as.matrix(coef(model)), std.error = NA, df = NA, diff --git a/src/lm_robust_helper.cpp b/src/lm_robust_helper.cpp index 84c631c9..1ebb544e 100644 --- a/src/lm_robust_helper.cpp +++ b/src/lm_robust_helper.cpp @@ -357,6 +357,7 @@ List lm_variance(Eigen::Map& X, } Eigen::Map clusters = Rcpp::as >(cluster); + // Rcout << "clusters: "<< std::endl << clusters << std::endl; double current_cluster = clusters(0); int clust_num = 0; @@ -430,20 +431,56 @@ List lm_variance(Eigen::Map& X, } else if (se_type == "CR3") { - Eigen::ColPivHouseholderQR At( + // TODO numerical instability in XtX_inv is causing large differences + // in vcov_CR3 in commarobust + Eigen::SelfAdjointEigenSolver At( Eigen::MatrixXd::Identity(len, len) - Xoriginal.block(start_pos, 0, len, r_fe) * meatXtX_inv * X.block(start_pos, 0, len, r_fe).transpose() ); + Eigen::VectorXd eigvals = At.eigenvalues(); + //Rcout << "eigvals: " << std::endl << eigvals << std::endl; + + for (int m = 0; m < eigvals.size(); ++m) { + if (eigvals(m) > std::pow(10.0, -12.0)) { + eigvals(m) = 1.0 / eigvals(m); + } else { + eigvals(m) = 0; + } + } + if (Xunweighted.isNotNull()) { - At_WX_inv = At.solve(Eigen::MatrixXd::Identity(len, len)).transpose() * X.block(start_pos, 0, len, r_fe); + At_WX_inv = + At.eigenvectors() * + eigvals.asDiagonal() * + At.eigenvectors().inverse() * + X.block(start_pos, 0, len, r_fe); } else { // Could speed up using LLT instead of QR for unweighted case as At is symmetric in that case - At_WX_inv = At.solve(X.block(start_pos, 0, len, r_fe)); + + At_WX_inv = + At.eigenvectors() * + eigvals.asDiagonal() * + At.eigenvectors().transpose() * + X.block(start_pos, 0, len, r_fe); } + // Eigen::ColPivHouseholderQR At( + // Eigen::MatrixXd::Identity(len, len) - + // Xoriginal.block(start_pos, 0, len, r_fe) * + // meatXtX_inv * + // X.block(start_pos, 0, len, r_fe).transpose() + // ); + // if (Xunweighted.isNotNull()) { + // At_WX_inv = At.solve(Eigen::MatrixXd::Identity(len, len)).transpose() * X.block(start_pos, 0, len, r_fe); + // } else { + // // Could speed up using LLT instead of QR for unweighted case as At is symmetric in that case + // At_WX_inv = At.solve(X.block(start_pos, 0, len, r_fe)); + // } + + // Rcout << "At_WX_inv: " << std::endl << At_WX_inv << std::endl; } diff --git a/tests/testthat/test-lm-cluster.R b/tests/testthat/test-lm-cluster.R index 4584a5b7..ff42faf6 100644 --- a/tests/testthat/test-lm-cluster.R +++ b/tests/testthat/test-lm-cluster.R @@ -1,5 +1,7 @@ context("Estimator - lm_robust, clustered") +cr_se_types <- c("CR0", "stata", "CR2", "CR3") + test_that("lm cluster se", { N <- 100 dat <- data.frame( @@ -117,19 +119,15 @@ test_that("lm cluster se", { ## Works with rank deficient case dat$X2 <- dat$X - lmr_rd <- lm_robust(Y ~ X + Z + X2, data = dat, clusters = J, se_type = "stata") - lmr_full <- lm_robust(Y ~ X + Z, data = dat, clusters = J, se_type = "stata") - expect_identical( - tidy(lmr_rd)[1:3, ], - tidy(lmr_full) - ) - lmr_rd_cr2 <- lm_robust(Y ~ X + Z + X2, data = dat, clusters = J, se_type = "CR2") - lmr_full_cr2 <- lm_robust(Y ~ X + Z, data = dat, clusters = J, se_type = "CR2") - expect_identical( - tidy(lmr_rd_cr2)[1:3, ], - tidy(lmr_full_cr2) - ) + for (se_type in cr_se_types) { + lmr_rd <- lm_robust(Y ~ X + Z + X2, data = dat, clusters = J, se_type = se_type) + lmr_full <- lm_robust(Y ~ X + Z, data = dat, clusters = J, se_type = se_type) + expect_identical( + tidy(lmr_rd)[1:3, ], + tidy(lmr_full) + ) + } ## Test error handling expect_error( @@ -181,56 +179,93 @@ test_that("lm cluster se", { }) -test_that("Clustered weighted SEs are correct", { - lm_cr3 <- lm_robust(mpg ~ hp, data = mtcars, weights = wt, clusters = cyl, se_type = "CR3") - lm_cr2 <- lm_robust(mpg ~ hp, data = mtcars, weights = wt, clusters = cyl, se_type = "CR2") - lm_stata <- lm_robust(mpg ~ hp, data = mtcars, weights = wt, clusters = cyl, se_type = "stata") - lm_cr0 <- lm_robust(mpg ~ hp, data = mtcars, weights = wt, clusters = cyl, se_type = "CR0") +test_that("Clustered SEs match clubSandwich", { + lm_o <- lm(mpg ~ hp, data = mtcars) + lm_ow <- lm(mpg ~ hp, data = mtcars, weights = wt) - lm_o <- lm(mpg ~ hp, data = mtcars, weights = wt) + for (se_type in cr_se_types) { + se_type <- "CR3" + lm_r <- lm_robust(mpg ~ hp, data = mtcars, clusters = cyl, se_type = se_type) + lm_rw <- lm_robust(mpg ~ hp, data = mtcars, weights = wt, clusters = cyl, se_type = se_type) + print(se_type) + expect_equivalent( + vcov(lm_r), + as.matrix(clubSandwich::vcovCR( + lm_o, + cluster = mtcars$cyl, + type = ifelse(se_type == "stata", "CR1S", se_type) + )) + ) - expect_equivalent( - vcov(lm_cr2), - as.matrix(clubSandwich::vcovCR(lm_o, cluster = mtcars$cyl, type = "CR2")) - ) - expect_equivalent( - vcov(lm_cr3), - as.matrix(clubSandwich::vcovCR(lm_o, cluster = mtcars$cyl, type = "CR3")) - ) - expect_equivalent( - vcov(lm_cr0), - as.matrix(clubSandwich::vcovCR(lm_o, cluster = mtcars$cyl, type = "CR0")) - ) - expect_equivalent( - vcov(lm_stata), - as.matrix(clubSandwich::vcovCR(lm_o, cluster = mtcars$cyl, type = "CR1S")) - ) + expect_equivalent( + vcov(lm_rw), + as.matrix(clubSandwich::vcovCR( + lm_ow, + cluster = mtcars$cyl, + type = ifelse(se_type == "stata", "CR1S", se_type) + )) + ) + } }) -test_that("Clustered SEs match clubSandwich", { - lm_cr3 <- lm_robust(mpg ~ hp, data = mtcars, clusters = cyl, se_type = "CR3") - lm_cr2 <- lm_robust(mpg ~ hp, data = mtcars, clusters = cyl, se_type = "CR2") - lm_stata <- lm_robust(mpg ~ hp, data = mtcars, clusters = cyl, se_type = "stata") - lm_cr0 <- lm_robust(mpg ~ hp, data = mtcars, clusters = cyl, se_type = "CR0") +test_that("multiple outcomes", { - lm_o <- lm(mpg ~ hp, data = mtcars) + for (se_type in cr_se_types) { + lmo <- lm(cbind(mpg, hp) ~ wt, data = mtcars) + lmow <- lm(cbind(mpg, hp) ~ wt, weights = qsec, data = mtcars) + + lmro <- lm_robust(cbind(mpg, hp) ~ wt, data = mtcars, clusters = cyl, se_type = se_type) + lmrow <- lm_robust(cbind(mpg, hp) ~ wt, weights = qsec, data = mtcars, clusters = cyl, se_type = se_type) + + if (se_type == "stata") { + # Have to manually do correction for CR1stata + # because clubSandwich uses n*ny and r*ny in place of n and r + # in stata correction + J <- length(unique(mtcars$cyl)) + n <- nrow(mtcars) + r <- 2 + + cs_vcov <- as.matrix(clubSandwich::vcovCR(lmo, cluster = mtcars$cyl, type = "CR0")) * + ((J * (n - 1)) / ((J - 1) * (n - r))) + + cs_vcov_w <- as.matrix(clubSandwich::vcovCR(lmow, cluster = mtcars$cyl, type = "CR0")) * + ((J * (n - 1)) / ((J - 1) * (n - r))) + + } else { + cs_vcov <- as.matrix(clubSandwich::vcovCR(lmo, + cluster = mtcars$cyl, + type = se_type)) + cs_vcov_w <- as.matrix(clubSandwich::vcovCR(lmow, + cluster = mtcars$cyl, + type = se_type)) + } + expect_equivalent( + vcov(lmro), + cs_vcov + ) + expect_equivalent( + vcov(lmrow), + cs_vcov_w + ) + } + + # Test same as individual models + lmro <- lm_robust(cbind(mpg, hp) ~ wt, data = mtcars, clusters = cyl) + lmmpg <- lm_robust(mpg ~ wt, data = mtcars, clusters = cyl) + lmhp <- lm_robust(hp ~ wt, data = mtcars, clusters = cyl) expect_equivalent( - vcov(lm_cr2), - as.matrix(clubSandwich::vcovCR(lm_o, cluster = mtcars$cyl, type = "CR2")) - ) - expect_equivalent( - vcov(lm_cr3), - as.matrix(clubSandwich::vcovCR(lm_o, cluster = mtcars$cyl, type = "CR3")) - ) - expect_equivalent( - vcov(lm_cr0), - as.matrix(clubSandwich::vcovCR(lm_o, cluster = mtcars$cyl, type = "CR0")) + tidy(lmro)$df[1:2], + lmmpg$df ) + expect_equivalent( - vcov(lm_stata), - as.matrix(clubSandwich::vcovCR(lm_o, cluster = mtcars$cyl, type = "CR1S")) + tidy(lmro)$df[3:4], + lmhp$df ) + + expect_equivalent(lmro$r.squared[1], lmmpg$r.squared) + expect_equivalent(lmro$r.squared[2], lmhp$r.squared) }) test_that("lm cluster se with missingness", { @@ -348,48 +383,3 @@ test_that("Clustered SEs work with clusters of size 1", { cbind(coef(lmo), bmo$se.Stata) ) }) - -test_that("multiple outcomes", { - - lmo <- lm(cbind(mpg, hp) ~ wt, data = mtcars) - lmro <- lm_robust(cbind(mpg, hp) ~ wt, data = mtcars, clusters = cyl) - - expect_equivalent( - as.matrix(clubSandwich::vcovCR(lmo, cluster = mtcars$cyl, type = "CR2")), - vcov(lmro) - ) - - expect_equivalent( - as.matrix(clubSandwich::vcovCR(lmo, cluster = mtcars$cyl, type = "CR0")), - vcov(lm_robust(cbind(mpg, hp) ~ wt, data = mtcars, clusters = cyl, se_type = "CR0")) - ) - - lmmpg <- lm_robust(mpg ~ wt, data = mtcars, clusters = cyl) - lmhp <- lm_robust(hp ~ wt, data = mtcars, clusters = cyl) - - expect_equivalent( - tidy(lmro)$df[1:2], - lmmpg$df - ) - - expect_equivalent( - tidy(lmro)$df[3:4], - lmhp$df - ) - - expect_equivalent(lmro$r.squared[1], lmmpg$r.squared) - expect_equivalent(lmro$r.squared[2], lmhp$r.squared) - - J <- length(unique(mtcars$cyl)) - n <- nrow(mtcars) - r <- 2 - - # Have to manually do correction because clubSandwich uses n*ny and r*ny in place of n and r in - # stata correction - expect_equivalent( - as.matrix(clubSandwich::vcovCR(lmo, cluster = mtcars$cyl, type = "CR0")) * - ((J * (n - 1)) / ((J - 1) * (n - r))) , - vcov(lm_robust(cbind(mpg, hp) ~ wt, data = mtcars, clusters = cyl, se_type = "stata")) - ) - -}) diff --git a/tests/testthat/test-starprep.R b/tests/testthat/test-starprep.R index fdb994ea..d10deda2 100644 --- a/tests/testthat/test-starprep.R +++ b/tests/testthat/test-starprep.R @@ -25,6 +25,9 @@ test_that("starprep works", { }) +se_types <- c("classical", "HC0", "HC1", "HC2", "HC3") +cr_se_types <- c("CR0", "stata", "CR2", "CR3") + set.seed(43) N <- 20 dat <- data.frame( @@ -53,170 +56,56 @@ test_that("commarobust works with regular lm", { "`clusters` must be the same length as the model data." ) - ## Classical - ro <- lm_robust(Y ~ Z + X + factor(B) + factor(B2), data = datmiss, se_type = "classical") - lo <- lm(Y ~ Z + X + factor(B) + factor(B2), data = datmiss) - clo <- commarobust(lo, se_type = "classical") - - expect_equal( - tidy(ro), - tidy(clo) - ) - - expect_equal( - ro$fstatistic, - clo$fstatistic - ) - - expect_equal( - ro[c("r.squared", "adj.r.squared")], - clo[c("r.squared", "adj.r.squared")] - ) - - ## HC0 - ro <- lm_robust(Y ~ Z + X + factor(B) + factor(B2), data = datmiss, se_type = "HC0") - lo <- lm(Y ~ Z + X + factor(B) + factor(B2), data = datmiss) - clo <- commarobust(lo, se_type = "HC0") - - expect_equal( - tidy(ro), - tidy(clo) - ) - - expect_equal( - ro$fstatistic, - clo$fstatistic - ) - - expect_equal( - ro[c("r.squared", "adj.r.squared")], - clo[c("r.squared", "adj.r.squared")] - ) - - ## HC1 - ro <- lm_robust(Y ~ Z + X + factor(B) + factor(B2), data = datmiss, se_type = "HC1") - lo <- lm(Y ~ Z + X + factor(B) + factor(B2), data = datmiss) - clo <- commarobust(lo, se_type = "HC1") - - expect_equal( - tidy(ro), - tidy(clo) - ) - - expect_equal( - ro$fstatistic, - clo$fstatistic - ) - - expect_equal( - ro[c("r.squared", "adj.r.squared")], - clo[c("r.squared", "adj.r.squared")] - ) - - ## HC2 - ro <- lm_robust(Y ~ Z + X + factor(B) + factor(B2), data = datmiss, se_type = "HC2") - lo <- lm(Y ~ Z + X + factor(B) + factor(B2), data = datmiss) - clo <- commarobust(lo, se_type = "HC2") - - expect_equal( - clo, - commarobust(lo) - ) - - expect_equal( - tidy(ro), - tidy(clo) - ) - - expect_equal( - ro$fstatistic, - clo$fstatistic - ) - - expect_equal( - ro[c("r.squared", "adj.r.squared")], - clo[c("r.squared", "adj.r.squared")] - ) - - ## HC3 - ro <- lm_robust(Y ~ Z + X + factor(B) + factor(B2), data = datmiss, se_type = "HC3") - lo <- lm(Y ~ Z + X + factor(B) + factor(B2), data = datmiss) - clo <- commarobust(lo, se_type = "HC3") - - expect_equal( - tidy(ro), - tidy(clo) - ) - - expect_equal( - ro$fstatistic, - clo$fstatistic - ) - - expect_equal( - ro[c("r.squared", "adj.r.squared")], - clo[c("r.squared", "adj.r.squared")] - ) - - ## CR0 - ro <- lm_robust(Y ~ Z + X + factor(B) + factor(B2), clusters = cl, data = datmiss, se_type = "CR0") - lo <- lm(Y ~ Z + X + factor(B) + factor(B2), data = datmiss) - clo <- commarobust(lo, clusters = datmiss$cl[complete.cases(datmiss)], se_type = "CR0") - - expect_equal( - tidy(ro), - tidy(clo) - ) - - expect_equal( - ro$fstatistic, - clo$fstatistic - ) - - expect_equal( - ro[c("r.squared", "adj.r.squared")], - clo[c("r.squared", "adj.r.squared")] - ) + ## Test unclustered SEs + for (se_type in se_types) { + ro <- lm_robust(Y ~ Z + X + factor(B) + factor(B2), data = datmiss, se_type = se_type) + lo <- lm(Y ~ Z + X + factor(B) + factor(B2), data = datmiss) + clo <- commarobust(lo, se_type = se_type) - ## CR stata - ro <- lm_robust(Y ~ Z + X + factor(B) + factor(B2), clusters = cl, data = datmiss, se_type = "stata") - lo <- lm(Y ~ Z + X + factor(B) + factor(B2), data = datmiss) - clo <- commarobust(lo, clusters = datmiss$cl[complete.cases(datmiss)], se_type = "stata") + expect_equal( + tidy(ro), + tidy(clo) + ) - expect_equal( - tidy(ro), - tidy(clo) - ) - - expect_equal( - ro$fstatistic, - clo$fstatistic - ) - - expect_equal( - ro[c("r.squared", "adj.r.squared")], - clo[c("r.squared", "adj.r.squared")] - ) + expect_equal( + ro$fstatistic, + clo$fstatistic + ) - ## CR2 - ro <- lm_robust(Y ~ Z + X + factor(B) + factor(B2), clusters = cl, data = datmiss, se_type = "CR2") - lo <- lm(Y ~ Z + X + factor(B) + factor(B2), data = datmiss) - clo <- commarobust(lo, clusters = datmiss$cl[complete.cases(datmiss)], se_type = "CR2") - - expect_equal( - tidy(ro), - tidy(clo) - ) - - expect_equal( - ro$fstatistic, - clo$fstatistic - ) + expect_equal( + ro[c("r.squared", "adj.r.squared")], + clo[c("r.squared", "adj.r.squared")] + ) + } - expect_equal( - ro[c("r.squared", "adj.r.squared")], - clo[c("r.squared", "adj.r.squared")] - ) + ## Test clustered SEs + for (se_type in cr_se_types) { + # se_type <- "CR3" + # debugonce(estimatr::lm_robust_fit) + # datmiss <- datmiss[order(datmiss$cl),] + ro <- lm_robust(Y ~ Z + X + factor(B) + factor(B2), clusters = cl, data = datmiss, se_type = se_type) + lo <- lm(Y ~ Z + X + factor(B) + factor(B2), data = datmiss) + clo <- commarobust(lo, clusters = datmiss$cl[complete.cases(datmiss)], se_type = se_type) + # setdiff(names(ro$thing), names(clo$thing)) + # setdiff(names(clo$thing), names(ro$thing)) + # + # purrr::map(names(ro$thing), ~identical(ro$thing[[.x]], clo$thing[[.x]])) + # ro$thing$cluster == clo$thing$cluster + expect_equal( + tidy(ro), + tidy(clo) + ) + + expect_equal( + ro$fstatistic, + clo$fstatistic + ) + + expect_equal( + ro[c("r.squared", "adj.r.squared")], + clo[c("r.squared", "adj.r.squared")] + ) + } # Works with character, factor, and numeric clusters datmiss$cl_char <- sample(letters, size = nrow(datmiss), replace = TRUE) @@ -254,164 +143,51 @@ test_that("commarobust works with regular lm", { }) test_that("commarobust works with weighted lm", { - ro <- lm_robust(Y ~ Z + X + factor(B) + factor(B2), data = datmiss, weights = w, se_type = "classical") - lo <- lm(Y ~ Z + X + factor(B) + factor(B2), data = datmiss, weights = w) - clo <- commarobust(lo, se_type = "classical") - - expect_equal( - tidy(ro), - tidy(clo) - ) - - expect_equal( - ro$fstatistic, - clo$fstatistic - ) - - expect_equal( - ro[c("r.squared", "adj.r.squared")], - clo[c("r.squared", "adj.r.squared")] - ) - - ## HC0 - ro <- lm_robust(Y ~ Z + X + factor(B) + factor(B2), data = datmiss, weights = w, se_type = "HC0") - lo <- lm(Y ~ Z + X + factor(B) + factor(B2), data = datmiss, weights = w) - clo <- commarobust(lo, se_type = "HC0") - - expect_equal( - tidy(ro), - tidy(clo) - ) - - expect_equal( - ro$fstatistic, - clo$fstatistic - ) - - expect_equal( - ro[c("r.squared", "adj.r.squared")], - clo[c("r.squared", "adj.r.squared")] - ) - - ## HC1 - ro <- lm_robust(Y ~ Z + X + factor(B) + factor(B2), data = datmiss, weights = w, se_type = "HC1") - lo <- lm(Y ~ Z + X + factor(B) + factor(B2), data = datmiss, weights = w) - clo <- commarobust(lo, se_type = "HC1") - - expect_equal( - tidy(ro), - tidy(clo) - ) - - expect_equal( - ro$fstatistic, - clo$fstatistic - ) - - expect_equal( - ro[c("r.squared", "adj.r.squared")], - clo[c("r.squared", "adj.r.squared")] - ) - - ## HC2 - ro <- lm_robust(Y ~ Z + X + factor(B) + factor(B2), data = datmiss, weights = w, se_type = "HC2") - lo <- lm(Y ~ Z + X + factor(B) + factor(B2), data = datmiss, weights = w) - clo <- commarobust(lo, se_type = "HC2") - - expect_equal( - tidy(ro), - tidy(clo) - ) - - expect_equal( - ro$fstatistic, - clo$fstatistic - ) - - expect_equal( - ro[c("r.squared", "adj.r.squared")], - clo[c("r.squared", "adj.r.squared")] - ) - - ## HC3 - ro <- lm_robust(Y ~ Z + X + factor(B) + factor(B2), data = datmiss, weights = w, se_type = "HC3") - lo <- lm(Y ~ Z + X + factor(B) + factor(B2), data = datmiss, weights = w) - clo <- commarobust(lo, se_type = "HC3") - - expect_equal( - tidy(ro), - tidy(clo) - ) - - expect_equal( - ro$fstatistic, - clo$fstatistic - ) - - expect_equal( - ro[c("r.squared", "adj.r.squared")], - clo[c("r.squared", "adj.r.squared")] - ) - - ## CR0 - ro <- lm_robust(Y ~ Z + X + factor(B) + factor(B2), clusters = cl, data = datmiss, weights = w, se_type = "CR0") - lo <- lm(Y ~ Z + X + factor(B) + factor(B2), data = datmiss, weights = w) - clo <- commarobust(lo, clusters = datmiss$cl[complete.cases(datmiss)], se_type = "CR0") - - expect_equal( - tidy(ro), - tidy(clo) - ) - - expect_equal( - ro$fstatistic, - clo$fstatistic - ) + # Test unclustered SEs + for (se_type in se_types) { + ro <- lm_robust(Y ~ Z + X + factor(B) + factor(B2), data = datmiss, weights = w, se_type = se_type) + lo <- lm(Y ~ Z + X + factor(B) + factor(B2), data = datmiss, weights = w) + clo <- commarobust(lo, se_type = se_type) + + expect_equal( + tidy(ro), + tidy(clo) + ) + + expect_equal( + ro$fstatistic, + clo$fstatistic + ) + + expect_equal( + ro[c("r.squared", "adj.r.squared")], + clo[c("r.squared", "adj.r.squared")] + ) + } - expect_equal( - ro[c("r.squared", "adj.r.squared")], - clo[c("r.squared", "adj.r.squared")] - ) + ## Test clustered SEs + for (se_type in cr_se_types) { - ## CR stata - ro <- lm_robust(Y ~ Z + X + factor(B) + factor(B2), clusters = cl, data = datmiss, weights = w, se_type = "stata") - lo <- lm(Y ~ Z + X + factor(B) + factor(B2), data = datmiss, weights = w) - clo <- commarobust(lo, clusters = datmiss$cl[complete.cases(datmiss)], se_type = "stata") + ro <- lm_robust(Y ~ Z + X + factor(B) + factor(B2), clusters = cl, data = datmiss, weights = w, se_type = se_type) + lo <- lm(Y ~ Z + X + factor(B) + factor(B2), data = datmiss, weights = w) + clo <- commarobust(lo, clusters = datmiss$cl[complete.cases(datmiss)], se_type = se_type) - expect_equal( - tidy(ro), - tidy(clo) - ) + expect_equal( + tidy(ro), + tidy(clo) + ) - expect_equal( - ro$fstatistic, - clo$fstatistic - ) + expect_equal( + ro$fstatistic, + clo$fstatistic + ) - expect_equal( - ro[c("r.squared", "adj.r.squared")], - clo[c("r.squared", "adj.r.squared")] - ) - - ## CR2 - ro <- lm_robust(Y ~ Z + X + factor(B) + factor(B2), clusters = cl, data = datmiss, weights = w, se_type = "CR2") - lo <- lm(Y ~ Z + X + factor(B) + factor(B2), data = datmiss, weights = w) - clo <- commarobust(lo, clusters = datmiss$cl[complete.cases(datmiss)], se_type = "CR2") - - expect_equal( - tidy(ro), - tidy(clo) - ) - - expect_equal( - ro$fstatistic, - clo$fstatistic - ) + expect_equal( + ro[c("r.squared", "adj.r.squared")], + clo[c("r.squared", "adj.r.squared")] + ) + } - expect_equal( - ro[c("r.squared", "adj.r.squared")], - clo[c("r.squared", "adj.r.squared")] - ) }) test_that("commarobust works with dependency, weighted lm", { @@ -422,180 +198,51 @@ test_that("commarobust works with dependency, weighted lm", { } } - ro <- lm_robust(Y ~ Z + X + Xdup + factor(B) + factor(B2), data = datmiss, weights = w, se_type = "classical") - lo <- lm(Y ~ Z + X + Xdup + factor(B) + factor(B2), data = datmiss, weights = w) - clo <- commarobust(lo, se_type = "classical") - - capture_output(sapply(names(ro), check_obj, ro = ro, clo = clo)) - - expect_equal( - tidy(ro), - tidy(clo) - ) - - expect_equal( - ro$fstatistic, - clo$fstatistic - ) - - expect_equal( - ro[c("r.squared", "adj.r.squared")], - clo[c("r.squared", "adj.r.squared")] - ) - - ## HC0 - ro <- lm_robust(Y ~ Z + X + Xdup + factor(B) + factor(B2), data = datmiss, weights = w, se_type = "HC0") - lo <- lm(Y ~ Z + X + Xdup + factor(B) + factor(B2), data = datmiss, weights = w) - clo <- commarobust(lo, se_type = "HC0") - - capture_output(sapply(names(ro), check_obj, ro = ro, clo = clo)) - - expect_equal( - tidy(ro), - tidy(clo) - ) - - expect_equal( - ro$fstatistic, - clo$fstatistic - ) - - expect_equal( - ro[c("r.squared", "adj.r.squared")], - clo[c("r.squared", "adj.r.squared")] - ) - - ## HC1 - ro <- lm_robust(Y ~ Z + X + Xdup + factor(B) + factor(B2), data = datmiss, weights = w, se_type = "HC1") - lo <- lm(Y ~ Z + X + Xdup + factor(B) + factor(B2), data = datmiss, weights = w) - clo <- commarobust(lo, se_type = "HC1") + for (se_type in se_types) { + ro <- lm_robust(Y ~ Z + X + Xdup + factor(B) + factor(B2), data = datmiss, weights = w, se_type = se_type) + lo <- lm(Y ~ Z + X + Xdup + factor(B) + factor(B2), data = datmiss, weights = w) + clo <- commarobust(lo, se_type = se_type) - capture_output(sapply(names(ro), check_obj, ro = ro, clo = clo)) - - expect_equal( - tidy(ro), - tidy(clo) - ) + capture_output(sapply(names(ro), check_obj, ro = ro, clo = clo)) - expect_equal( - ro$fstatistic, - clo$fstatistic - ) + expect_equal( + tidy(ro), + tidy(clo) + ) - expect_equal( - ro[c("r.squared", "adj.r.squared")], - clo[c("r.squared", "adj.r.squared")] - ) - - ## HC2 - ro <- lm_robust(Y ~ Z + X + Xdup + factor(B) + factor(B2), data = datmiss, weights = w, se_type = "HC2") - lo <- lm(Y ~ Z + X + Xdup + factor(B) + factor(B2), data = datmiss, weights = w) - clo <- commarobust(lo, se_type = "HC2") - - capture_output(sapply(names(ro), check_obj, ro = ro, clo = clo)) - - expect_equal( - tidy(ro), - tidy(clo) - ) + expect_equal( + ro$fstatistic, + clo$fstatistic + ) - expect_equal( - ro$fstatistic, - clo$fstatistic - ) - - expect_equal( - ro[c("r.squared", "adj.r.squared")], - clo[c("r.squared", "adj.r.squared")] - ) - - ## HC3 - ro <- lm_robust(Y ~ Z + X + Xdup + factor(B) + factor(B2), data = datmiss, weights = w, se_type = "HC3") - lo <- lm(Y ~ Z + X + Xdup + factor(B) + factor(B2), data = datmiss, weights = w) - clo <- commarobust(lo, se_type = "HC3") - - capture_output(sapply(names(ro), check_obj, ro = ro, clo = clo)) - - expect_equal( - tidy(ro), - tidy(clo) - ) - - expect_equal( - ro$fstatistic, - clo$fstatistic - ) - - expect_equal( - ro[c("r.squared", "adj.r.squared")], - clo[c("r.squared", "adj.r.squared")] - ) - - ## CR0 - ro <- lm_robust(Y ~ Z + X + Xdup + factor(B) + factor(B2), clusters = cl, data = datmiss, weights = w, se_type = "CR0") - lo <- lm(Y ~ Z + X + Xdup + factor(B) + factor(B2), data = datmiss, weights = w) - clo <- commarobust(lo, clusters = datmiss$cl[complete.cases(datmiss)], se_type = "CR0") - - capture_output(sapply(names(ro), check_obj, ro = ro, clo = clo)) - - expect_equal( - tidy(ro), - tidy(clo) - ) - - expect_equal( - ro$fstatistic, - clo$fstatistic - ) - - expect_equal( - ro[c("r.squared", "adj.r.squared")], - clo[c("r.squared", "adj.r.squared")] - ) - - ## CR stata - ro <- lm_robust(Y ~ Z + X + Xdup + factor(B) + factor(B2), clusters = cl, data = datmiss, weights = w, se_type = "stata") - lo <- lm(Y ~ Z + X + Xdup + factor(B) + factor(B2), data = datmiss, weights = w) - clo <- commarobust(lo, clusters = datmiss$cl[complete.cases(datmiss)], se_type = "stata") - - capture_output(sapply(names(ro), check_obj, ro = ro, clo = clo)) - - expect_equal( - tidy(ro), - tidy(clo) - ) - - expect_equal( - ro$fstatistic, - clo$fstatistic - ) - - expect_equal( - ro[c("r.squared", "adj.r.squared")], - clo[c("r.squared", "adj.r.squared")] - ) + expect_equal( + ro[c("r.squared", "adj.r.squared")], + clo[c("r.squared", "adj.r.squared")] + ) + } - ## CR2 - ro <- lm_robust(Y ~ Z + X + Xdup + factor(B) + factor(B2), clusters = cl, data = datmiss, weights = w, se_type = "CR2") - lo <- lm(Y ~ Z + X + Xdup + factor(B) + factor(B2), data = datmiss, weights = w) - clo <- commarobust(lo, clusters = datmiss$cl[complete.cases(datmiss)], se_type = "CR2") + for (se_type in cr_se_types) { + ro <- lm_robust(Y ~ Z + X + Xdup + factor(B) + factor(B2), clusters = cl, data = datmiss, weights = w, se_type = se_type) + lo <- lm(Y ~ Z + X + Xdup + factor(B) + factor(B2), data = datmiss, weights = w) + clo <- commarobust(lo, clusters = datmiss$cl[complete.cases(datmiss)], se_type = se_type) - capture_output(sapply(names(ro), check_obj, ro = ro, clo = clo)) + capture_output(sapply(names(ro), check_obj, ro = ro, clo = clo)) - expect_equal( - tidy(ro), - tidy(clo) - ) + expect_equal( + tidy(ro), + tidy(clo) + ) - expect_equal( - ro$fstatistic, - clo$fstatistic - ) + expect_equal( + ro$fstatistic, + clo$fstatistic + ) - expect_equal( - ro[c("r.squared", "adj.r.squared")], - clo[c("r.squared", "adj.r.squared")] - ) + expect_equal( + ro[c("r.squared", "adj.r.squared")], + clo[c("r.squared", "adj.r.squared")] + ) + } }) test_that("Only works with lm, not mlm or glm", { From 537581b292f9c64f3fa1feb152f0d27db0629e28 Mon Sep 17 00:00:00 2001 From: Luke Sonnet Date: Tue, 23 Oct 2018 13:54:14 -0700 Subject: [PATCH 3/7] implement CR3 using SVD pseudoinverse; refactor tests to add it --- R/helper_lm_robust_fit.R | 18 +- R/helper_starprep.R | 13 - src/lm_robust_helper.cpp | 53 +- tests/testthat/helper-return-cleaners.R | 13 + tests/testthat/helper-rm-call.R | 7 - tests/testthat/helper-se-types.R | 4 + tests/testthat/test-iv-robust-fes.R | 391 ++------- tests/testthat/test-iv-robust.R | 188 ++-- tests/testthat/test-lm-cluster.R | 25 +- tests/testthat/test-lm-robust-fes.R | 1064 ++++++----------------- tests/testthat/test-lm-robust.R | 270 ++---- tests/testthat/test-starprep.R | 13 +- 12 files changed, 583 insertions(+), 1476 deletions(-) create mode 100644 tests/testthat/helper-return-cleaners.R delete mode 100644 tests/testthat/helper-rm-call.R create mode 100644 tests/testthat/helper-se-types.R diff --git a/R/helper_lm_robust_fit.R b/R/helper_lm_robust_fit.R index 2a9069ce..ce13551a 100644 --- a/R/helper_lm_robust_fit.R +++ b/R/helper_lm_robust_fit.R @@ -216,23 +216,7 @@ lm_robust_fit <- function(y, which_covs = which_covs[covs_used], fe_rank = fe_rank ) - return_list$thing <- list( X = if (se_type %in% c("HC2", "HC3", "CR2", "CR3") && fes) - cbind(data[["X"]], data[["femat"]]) - else data[["X"]], - Xunweighted = if (se_type %in% c("HC2", "HC3", "CR2", "CR3") && fes && weighted) - cbind(data[["Xunweighted"]], data[["fematunweighted"]]) - else data[["Xunweighted"]], - XtX_inv = fit$XtX_inv, - ei = if (se_type %in% c("CR2", "CR3") && weighted) - fit_vals[["ei.unweighted"]] - else fit_vals[["ei"]], - weight_mean = data[["weight_mean"]], - cluster = data[["cluster"]], - J = data[["J"]], - ci = ci, - se_type = se_type, - which_covs = which_covs[covs_used], - fe_rank = fe_rank) + return_list$std.error[est_exists] <- sqrt(diag(vcov_fit$Vcov_hat)) if (ci) { diff --git a/R/helper_starprep.R b/R/helper_starprep.R index 93d71428..1c094c98 100644 --- a/R/helper_starprep.R +++ b/R/helper_starprep.R @@ -113,22 +113,9 @@ commarobust <- function(model, which_covs = rep(TRUE, model$rank), fe_rank = 0 ) - #print(vcov_fit) ## Build return_list return_list <- list( - thing = list( X = data[["X"]], - Xunweighted = data[["Xunweighted"]], - XtX_inv = XtX_inv, - ei = if (se_type %in% c("CR2", "CR3") && weighted) eiunweighted else ei, - weight_mean = data[["weight_mean"]], - cluster = data[["cluster"]], - J = data[["J"]], - ci = ci, - se_type = se_type, - which_covs = rep(TRUE, model$rank), - fe_rank = 0 - ), coefficients = as.matrix(coef(model)), std.error = NA, df = NA, diff --git a/src/lm_robust_helper.cpp b/src/lm_robust_helper.cpp index 1ebb544e..87864fe0 100644 --- a/src/lm_robust_helper.cpp +++ b/src/lm_robust_helper.cpp @@ -431,41 +431,16 @@ List lm_variance(Eigen::Map& X, } else if (se_type == "CR3") { - // TODO numerical instability in XtX_inv is causing large differences - // in vcov_CR3 in commarobust - Eigen::SelfAdjointEigenSolver At( - Eigen::MatrixXd::Identity(len, len) - - Xoriginal.block(start_pos, 0, len, r_fe) * - meatXtX_inv * - X.block(start_pos, 0, len, r_fe).transpose() - ); - - Eigen::VectorXd eigvals = At.eigenvalues(); - //Rcout << "eigvals: " << std::endl << eigvals << std::endl; - - for (int m = 0; m < eigvals.size(); ++m) { - if (eigvals(m) > std::pow(10.0, -12.0)) { - eigvals(m) = 1.0 / eigvals(m); - } else { - eigvals(m) = 0; - } - } + //if (Xunweighted.isNotNull()) { - if (Xunweighted.isNotNull()) { - At_WX_inv = - At.eigenvectors() * - eigvals.asDiagonal() * - At.eigenvectors().inverse() * - X.block(start_pos, 0, len, r_fe); - } else { - // Could speed up using LLT instead of QR for unweighted case as At is symmetric in that case + Eigen::CompleteOrthogonalDecomposition At( + Eigen::MatrixXd::Identity(len, len) - + Xoriginal.block(start_pos, 0, len, r_fe) * + meatXtX_inv * + X.block(start_pos, 0, len, r_fe).transpose() + ); - At_WX_inv = - At.eigenvectors() * - eigvals.asDiagonal() * - At.eigenvectors().transpose() * - X.block(start_pos, 0, len, r_fe); - } + //} // Eigen::ColPivHouseholderQR At( // Eigen::MatrixXd::Identity(len, len) - @@ -473,12 +448,12 @@ List lm_variance(Eigen::Map& X, // meatXtX_inv * // X.block(start_pos, 0, len, r_fe).transpose() // ); - // if (Xunweighted.isNotNull()) { - // At_WX_inv = At.solve(Eigen::MatrixXd::Identity(len, len)).transpose() * X.block(start_pos, 0, len, r_fe); - // } else { - // // Could speed up using LLT instead of QR for unweighted case as At is symmetric in that case - // At_WX_inv = At.solve(X.block(start_pos, 0, len, r_fe)); - // } + if (Xunweighted.isNotNull()) { + At_WX_inv = At.solve(Eigen::MatrixXd::Identity(len, len)).transpose() * X.block(start_pos, 0, len, r_fe); + } else { + // Could speed up using LLT instead of QR for unweighted case as At is symmetric in that case + At_WX_inv = At.solve(X.block(start_pos, 0, len, r_fe)); + } // Rcout << "At_WX_inv: " << std::endl << At_WX_inv << std::endl; diff --git a/tests/testthat/helper-return-cleaners.R b/tests/testthat/helper-return-cleaners.R new file mode 100644 index 00000000..5123361b --- /dev/null +++ b/tests/testthat/helper-return-cleaners.R @@ -0,0 +1,13 @@ +# This fn removes calls from function returns to make testing easier +rmcall <- function(obj) { + if (!is.null(obj[["call"]])) { + obj[["call"]] <- NULL + } + return(obj) +} + +# This fn casts tibbles as data.frames for equivalency tests +# TODO implement this everywhere +expect_equivalent_tbl <- function(obj1, obj2) { + expect_equivalent(as.data.frame(obj1), as.data.frame(obj2)) +} diff --git a/tests/testthat/helper-rm-call.R b/tests/testthat/helper-rm-call.R deleted file mode 100644 index c34dc8bc..00000000 --- a/tests/testthat/helper-rm-call.R +++ /dev/null @@ -1,7 +0,0 @@ -# This file removes calls from function returns to make testing easier -rmcall <- function(obj) { - if (!is.null(obj[["call"]])) { - obj[["call"]] <- NULL - } - return(obj) -} diff --git a/tests/testthat/helper-se-types.R b/tests/testthat/helper-se-types.R new file mode 100644 index 00000000..3e2995b6 --- /dev/null +++ b/tests/testthat/helper-se-types.R @@ -0,0 +1,4 @@ +# se_types for various files + +se_types <- c("classical", "HC0", "HC1", "HC2", "HC3") +cr_se_types <- c("CR0", "stata", "CR2", "CR3") diff --git a/tests/testthat/test-iv-robust-fes.R b/tests/testthat/test-iv-robust-fes.R index d4698b3e..598ebf72 100644 --- a/tests/testthat/test-iv-robust-fes.R +++ b/tests/testthat/test-iv-robust-fes.R @@ -16,313 +16,92 @@ dat$Xdup <- dat$X dat$Bdup <- dat$B test_that("FE matches with multiple FEs and covars", { - ## Classical - ro <- iv_robust(Y ~ X1 + X2 + factor(B) + factor(B2) | Z + X2 + factor(B) + factor(B2), data = dat, se_type = "classical") - rfo <- iv_robust(Y ~ X1 + X2 | Z + X2, fixed_effects = ~ B + B2, data = dat, se_type = "classical") - - expect_equivalent( - tidy(ro)[ro$term %in% c("X1", "X2"), ], - tidy(rfo)[rfo$term %in% c("X1", "X2"), ] - ) - - expect_equivalent( - ro$fitted.values, - rfo$fitted.values - ) - - expect_equal( - ro[c("r.squared", "adj.r.squared")], - rfo[c("r.squared", "adj.r.squared")] - ) - - ## HC0 - ro <- iv_robust(Y ~ X1 + X2 + factor(B) + factor(B2) | Z + X2 + factor(B) + factor(B2), data = dat, se_type = "HC0") - rfo <- iv_robust(Y ~ X1 + X2 | Z + X2, fixed_effects = ~ B + B2, data = dat, se_type = "HC0") - - expect_equivalent( - tidy(ro)[ro$term %in% c("X1", "X2"), ], - tidy(rfo)[rfo$term %in% c("X1", "X2"), ] - ) - - expect_equivalent( - ro$fitted.values, - rfo$fitted.values - ) - - expect_equal( - ro[c("r.squared", "adj.r.squared")], - rfo[c("r.squared", "adj.r.squared")] - ) - - ## HC1 - ro <- iv_robust(Y ~ X1 + X2 + factor(B) + factor(B2) | Z + X2 + factor(B) + factor(B2), data = dat, se_type = "HC1") - rfo <- iv_robust(Y ~ X1 + X2 | Z + X2, fixed_effects = ~ B + B2, data = dat, se_type = "HC1") - - expect_equivalent( - tidy(ro)[ro$term %in% c("X1", "X2"), ], - tidy(rfo)[rfo$term %in% c("X1", "X2"), ] - ) - - expect_equivalent( - ro$fitted.values, - rfo$fitted.values - ) - - expect_equal( - ro[c("r.squared", "adj.r.squared")], - rfo[c("r.squared", "adj.r.squared")] - ) - - ## HC2 - ro <- iv_robust(Y ~ X1 + X2 + factor(B) + factor(B2) | Z + X2 + factor(B) + factor(B2), data = dat, se_type = "HC2") - rfo <- iv_robust(Y ~ X1 + X2 | Z + X2, fixed_effects = ~ B + B2, data = dat, se_type = "HC2") - - expect_equivalent( - tidy(ro)[ro$term %in% c("X1", "X2"), ], - tidy(rfo)[rfo$term %in% c("X1", "X2"), ] - ) - - expect_equivalent( - ro$fitted.values, - rfo$fitted.values - ) - - expect_equal( - ro[c("r.squared", "adj.r.squared")], - rfo[c("r.squared", "adj.r.squared")] - ) - - ## HC3 - ro <- iv_robust(Y ~ X1 + X2 + factor(B) + factor(B2) | Z + X2 + factor(B) + factor(B2), data = dat, se_type = "HC3") - rfo <- iv_robust(Y ~ X1 + X2 | Z + X2, fixed_effects = ~ B + B2, data = dat, se_type = "HC3") - - expect_equivalent( - tidy(ro)[ro$term %in% c("X1", "X2"), ], - tidy(rfo)[rfo$term %in% c("X1", "X2"), ] - ) - - expect_equivalent( - ro$fitted.values, - rfo$fitted.values - ) - - expect_equal( - ro[c("r.squared", "adj.r.squared")], - rfo[c("r.squared", "adj.r.squared")] - ) - - ## CR0 - ro <- iv_robust(Y ~ X1 + X2 + factor(B) + factor(B2) | Z + X2 + factor(B) + factor(B2), clusters = cl, data = dat, se_type = "CR0") - rfo <- iv_robust(Y ~ X1 + X2 | Z + X2, fixed_effects = ~ B + B2, clusters = cl, data = dat, se_type = "CR0") - - expect_equivalent( - tidy(ro)[ro$term %in% c("X1", "X2"), ], - tidy(rfo)[rfo$term %in% c("X1", "X2"), ] - ) - - expect_equivalent( - ro$fitted.values, - rfo$fitted.values - ) - - expect_equal( - ro[c("r.squared", "adj.r.squared")], - rfo[c("r.squared", "adj.r.squared")] - ) - - ## CR stata - ro <- iv_robust(Y ~ X1 + X2 + factor(B) + factor(B2) | Z + X2 + factor(B) + factor(B2), clusters = cl, data = dat, se_type = "stata") - rfo <- iv_robust(Y ~ X1 + X2 | Z + X2, fixed_effects = ~ B + B2, clusters = cl, data = dat, se_type = "stata") - - expect_equivalent( - tidy(ro)[ro$term %in% c("X1", "X2"), ], - tidy(rfo)[rfo$term %in% c("X1", "X2"), ] - ) - - expect_equivalent( - ro$fitted.values, - rfo$fitted.values - ) - - expect_equal( - ro[c("r.squared", "adj.r.squared")], - rfo[c("r.squared", "adj.r.squared")] - ) - - ## CR2 - ro <- iv_robust(Y ~ X1 + X2 + factor(B) + factor(B2) | Z + X2 + factor(B) + factor(B2), clusters = cl, data = dat, se_type = "CR2") - rfo <- iv_robust(Y ~ X1 + X2 | Z + X2, fixed_effects = ~ B + B2, clusters = cl, data = dat, se_type = "CR2") - - expect_equivalent( - tidy(ro)[ro$term %in% c("X1", "X2"), ], - tidy(rfo)[rfo$term %in% c("X1", "X2"), ] - ) - - expect_equivalent( - ro$fitted.values, - rfo$fitted.values - ) - - expect_equal( - ro[c("r.squared", "adj.r.squared")], - rfo[c("r.squared", "adj.r.squared")] - ) - -}) - -test_that("FE matches with weights", { - ## Classical - ro <- iv_robust(Y ~ X1 + X2 + factor(B) + factor(B2) | Z + X2 + factor(B) + factor(B2), data = dat, weights = w, se_type = "classical") - rfo <- iv_robust(Y ~ X1 + X2 | Z + X2, fixed_effects = ~ B + B2, data = dat, weights = w, se_type = "classical") - - expect_equivalent( - tidy(ro)[ro$term %in% c("X1", "X2"), ], - tidy(rfo)[rfo$term %in% c("X1", "X2"), ] - ) - - expect_equivalent( - ro$fitted.values, - rfo$fitted.values - ) - - expect_equal( - ro[c("r.squared", "adj.r.squared")], - rfo[c("r.squared", "adj.r.squared")] - ) - - ## HC0 - ro <- iv_robust(Y ~ X1 + X2 + factor(B) + factor(B2) | Z + X2 + factor(B) + factor(B2), data = dat, weights = w, se_type = "HC0") - rfo <- iv_robust(Y ~ X1 + X2 | Z + X2, fixed_effects = ~ B + B2, data = dat, weights = w, se_type = "HC0") - - expect_equivalent( - tidy(ro)[ro$term %in% c("X1", "X2"), ], - tidy(rfo)[rfo$term %in% c("X1", "X2"), ] - ) - - expect_equivalent( - ro$fitted.values, - rfo$fitted.values - ) - - expect_equal( - ro[c("r.squared", "adj.r.squared")], - rfo[c("r.squared", "adj.r.squared")] - ) - - ## HC1 - ro <- iv_robust(Y ~ X1 + X2 + factor(B) + factor(B2) | Z + X2 + factor(B) + factor(B2), data = dat, weights = w, se_type = "HC1") - rfo <- iv_robust(Y ~ X1 + X2 | Z + X2, fixed_effects = ~ B + B2, data = dat, weights = w, se_type = "HC1") - - expect_equivalent( - tidy(ro)[ro$term %in% c("X1", "X2"), ], - tidy(rfo)[rfo$term %in% c("X1", "X2"), ] - ) - - expect_equivalent( - ro$fitted.values, - rfo$fitted.values - ) - - expect_equal( - ro[c("r.squared", "adj.r.squared")], - rfo[c("r.squared", "adj.r.squared")] - ) - - ## HC2 - ro <- iv_robust(Y ~ X1 + X2 + factor(B) + factor(B2) | Z + X2 + factor(B) + factor(B2), data = dat, weights = w, se_type = "HC2") - rfo <- iv_robust(Y ~ X1 + X2 | Z + X2, fixed_effects = ~ B + B2, data = dat, weights = w, se_type = "HC2") - - expect_equivalent( - tidy(ro)[ro$term %in% c("X1", "X2"), ], - tidy(rfo)[rfo$term %in% c("X1", "X2"), ] - ) - - expect_equivalent( - ro$fitted.values, - rfo$fitted.values - ) - - expect_equal( - ro[c("r.squared", "adj.r.squared")], - rfo[c("r.squared", "adj.r.squared")] - ) - - ## HC3 - ro <- iv_robust(Y ~ X1 + X2 + factor(B) + factor(B2) | Z + X2 + factor(B) + factor(B2), data = dat, weights = w, se_type = "HC3") - rfo <- iv_robust(Y ~ X1 + X2 | Z + X2, fixed_effects = ~ B + B2, data = dat, weights = w, se_type = "HC3") - - expect_equivalent( - tidy(ro)[ro$term %in% c("X1", "X2"), ], - tidy(rfo)[rfo$term %in% c("X1", "X2"), ] - ) - - expect_equivalent( - ro$fitted.values, - rfo$fitted.values - ) - - expect_equal( - ro[c("r.squared", "adj.r.squared")], - rfo[c("r.squared", "adj.r.squared")] - ) - - ## CR0 - ro <- iv_robust(Y ~ X1 + X2 + factor(B) + factor(B2) | Z + X2 + factor(B) + factor(B2), clusters = cl, data = dat, weights = w, se_type = "CR0") - rfo <- iv_robust(Y ~ X1 + X2 | Z + X2, fixed_effects = ~ B + B2, clusters = cl, data = dat, weights = w, se_type = "CR0") - - expect_equivalent( - tidy(ro)[ro$term %in% c("X1", "X2"), ], - tidy(rfo)[rfo$term %in% c("X1", "X2"), ] - ) - - expect_equivalent( - ro$fitted.values, - rfo$fitted.values - ) - - expect_equal( - ro[c("r.squared", "adj.r.squared")], - rfo[c("r.squared", "adj.r.squared")] - ) - - ## CR stata - ro <- iv_robust(Y ~ X1 + X2 + factor(B) + factor(B2) | Z + X2 + factor(B) + factor(B2), clusters = cl, data = dat, weights = w, se_type = "stata") - rfo <- iv_robust(Y ~ X1 + X2 | Z + X2, fixed_effects = ~ B + B2, clusters = cl, data = dat, weights = w, se_type = "stata") - - expect_equivalent( - tidy(ro)[ro$term %in% c("X1", "X2"), ], - tidy(rfo)[rfo$term %in% c("X1", "X2"), ] - ) - - expect_equivalent( - ro$fitted.values, - rfo$fitted.values - ) - - expect_equal( - ro[c("r.squared", "adj.r.squared")], - rfo[c("r.squared", "adj.r.squared")] - ) - - ## CR2 - expect_error( - rfo <- iv_robust(Y ~ X1 + X2 | Z + X2, fixed_effects = ~ B + B2, clusters = cl, data = dat, weights = w, se_type = "CR2"), - "Cannot use `fixed_effects` with weighted" - ) - - - # ro <- iv_robust(Y ~ X1 + X2 + factor(B) + factor(B2) | Z + X2 + factor(B) + factor(B2), clusters = cl, data = dat, weights = w, se_type = "CR2") - # - # expect_equivalent( - # tidy(ro)[ro$term %in% c("X1", "X2"), ], - # tidy(rfo)[rfo$term %in% c("X1", "X2"), ] - # ) - # - # expect_equal( - # ro[c("r.squared", "adj.r.squared")], - # rfo[c("r.squared", "adj.r.squared")] - # ) + for (se_type in se_types) { + ro <- iv_robust(Y ~ X1 + X2 + factor(B) + factor(B2) | Z + X2 + factor(B) + factor(B2), data = dat, se_type = se_type) + rfo <- iv_robust(Y ~ X1 + X2 | Z + X2, fixed_effects = ~ B + B2, data = dat, se_type = se_type) + + + expect_equivalent( + tidy(ro)[ro$term %in% c("X1", "X2"), ], + tidy(rfo)[rfo$term %in% c("X1", "X2"), ] + ) + + expect_equivalent( + ro$fitted.values, + rfo$fitted.values + ) + + expect_equal( + ro[c("r.squared", "adj.r.squared")], + rfo[c("r.squared", "adj.r.squared")] + ) + + # weights + ro <- iv_robust(Y ~ X1 + X2 + factor(B) + factor(B2) | Z + X2 + factor(B) + factor(B2), data = dat, weights = w, se_type = se_type) + rfo <- iv_robust(Y ~ X1 + X2 | Z + X2, fixed_effects = ~ B + B2, data = dat, weights = w, se_type = se_type) + + expect_equivalent( + tidy(ro)[ro$term %in% c("X1", "X2"), ], + tidy(rfo)[rfo$term %in% c("X1", "X2"), ] + ) + + expect_equivalent( + ro$fitted.values, + rfo$fitted.values + ) + + expect_equal( + ro[c("r.squared", "adj.r.squared")], + rfo[c("r.squared", "adj.r.squared")] + ) + } + + for (se_type in cr_se_types) { + ro <- iv_robust(Y ~ X1 + X2 + factor(B) + factor(B2) | Z + X2 + factor(B) + factor(B2), clusters = cl, data = dat, se_type = se_type) + rfo <- iv_robust(Y ~ X1 + X2 | Z + X2, fixed_effects = ~ B + B2, clusters = cl, data = dat, se_type = se_type) + + expect_equivalent( + tidy(ro)[ro$term %in% c("X1", "X2"), ], + tidy(rfo)[rfo$term %in% c("X1", "X2"), ] + ) + + expect_equivalent( + ro$fitted.values, + rfo$fitted.values + ) + + expect_equal( + ro[c("r.squared", "adj.r.squared")], + rfo[c("r.squared", "adj.r.squared")] + ) + + # weights + if (se_type %in% c("CR2", "CR3")) { + expect_error( + rfo <- iv_robust(Y ~ X1 + X2 | Z + X2, fixed_effects = ~ B + B2, clusters = cl, data = dat, weights = w, se_type = se_type), + "Cannot use `fixed_effects` with weighted CR2 or CR3" + ) + } else { + ro <- iv_robust(Y ~ X1 + X2 + factor(B) + factor(B2) | Z + X2 + factor(B) + factor(B2), clusters = cl, data = dat, weights = w, se_type = se_type) + rfo <- iv_robust(Y ~ X1 + X2 | Z + X2, fixed_effects = ~ B + B2, clusters = cl, data = dat, weights = w, se_type = se_type) + + expect_equivalent( + tidy(ro)[ro$term %in% c("X1", "X2"), ], + tidy(rfo)[rfo$term %in% c("X1", "X2"), ] + ) + + expect_equivalent( + ro$fitted.values, + rfo$fitted.values + ) + + expect_equal( + ro[c("r.squared", "adj.r.squared")], + rfo[c("r.squared", "adj.r.squared")] + ) + } + } }) test_that("IV FE matches lfe including proj r2", { diff --git a/tests/testthat/test-iv-robust.R b/tests/testthat/test-iv-robust.R index 06055b27..57c71dc9 100644 --- a/tests/testthat/test-iv-robust.R +++ b/tests/testthat/test-iv-robust.R @@ -10,6 +10,7 @@ dat <- data.frame( clust = sample(letters[1:3], size = N, replace = TRUE) ) dat$z <- dat$x * 0.5 + rnorm(N) +dat$x1_c <- dat$x test_that("iv_robust warnings and errors are correct", { expect_warning( @@ -71,33 +72,10 @@ test_that("iv_robust matches AER + ivpack", { ivclusto$fitted.values ) - # CR2 - ivcr2o <- iv_robust(y ~ x | z, data = dat, clusters = clust) - clubsando <- clubSandwich::coef_test(ivfit, vcov = "CR2", cluster = dat$clust) - - expect_equivalent( - as.matrix(tidy(ivcr2o)[, c("estimate", "std.error", "df", "p.value")]), - as.matrix(clubsando) - ) - - expect_equivalent( - ivfit$fitted.values, - ivcr2o$fitted.values - ) - - # CR0 - ivcr0o <- iv_robust(y ~ x | z, data = dat, clusters = clust, se_type = "CR0") - clubsandCR0o <- clubSandwich::coef_test(ivfit, vcov = "CR0", cluster = dat$clust, test = "naive-t") - - expect_equivalent( - as.matrix(tidy(ivcr0o)[, c("estimate", "std.error", "p.value")]), - as.matrix(clubsandCR0o) - ) - # Weighting classical + ivw <- AER::ivreg(y ~ x | z, data = dat, weights = w) ivcw <- iv_robust(y ~ x | z, data = dat, weights = w, se_type = "classical") - ivw <- AER::ivreg(y ~ x | z, weights = w, data = dat) - ivregsum <- summary(ivw) + ivregsum <- summary(ivcw) expect_equivalent( as.matrix(tidy(ivcw)[, c("estimate", "std.error", "statistic", "p.value")]), @@ -123,37 +101,23 @@ test_that("iv_robust matches AER + ivpack", { ivcw$fitted.values ) - # CR2 weighted - ivcr2wo <- iv_robust(y ~ x | z, data = dat, clusters = clust, weights = w) - clubsandwo <- clubSandwich::coef_test(ivw, vcov = "CR2", cluster = dat$clust) - - expect_equivalent( - as.matrix(tidy(ivcr2wo)[, c("estimate", "std.error", "df", "p.value")]), - as.matrix(clubsandwo) - ) - - expect_equivalent( - ivcr2wo$fitted.values, - ivcw$fitted.values - ) - - # CR0 weighted - ivcr0wo <- iv_robust(y ~ x | z, data = dat, clusters = clust, weights = w, se_type = "CR0") - clubsandCR0wo <- clubSandwich::coef_test(ivw, vcov = "CR0", cluster = dat$clust, test = "naive-t") + # Stata weighted + ivclrw <- iv_robust(y ~ x | z, data = dat, clusters = clust, weights = w, se_type = "stata") + ivclw <- AER::ivreg(y ~ x | z, data = dat, weights = w) + capture_output(ivclwse <- ivpack::cluster.robust.se(ivclw, clusterid = dat$clust)) expect_equivalent( - as.matrix(tidy(ivcr0wo)[, c("estimate", "std.error", "p.value")]), - as.matrix(clubsandCR0wo) + as.matrix(tidy(ivclrw)[1:2, c("estimate", "std.error")]), + ivclwse[, c(1, 2)] ) expect_equivalent( - ivcr0wo$fitted.values, - ivcw$fitted.values + ivclrw$fitted.values, + ivclw$fitted.values ) # Rank-deficiency # HC0 - dat$x1_c <- dat$x ivdefr <- iv_robust(y ~ x + x1_c| z + z2, data = dat, se_type = "HC0") ivdef <- AER::ivreg(y ~ x + x1_c| z + z2, data = dat) capture_output(ivdefse <- ivpack::robust.se(ivdef)) @@ -204,28 +168,11 @@ test_that("iv_robust matches AER + ivpack", { ivdefclse[, c(1, 2)] ) - expect_equivalent( ivdefclr$fitted.values, ivdefcl$fitted.values ) - # CR2 - ivdefcl2r <- iv_robust(y ~ x + x1_c | z + z2, data = dat, clusters = clust, se_type = "CR2") - ivdefcl2 <- AER::ivreg(y ~ x + x1_c | z + z2, data = dat) - ivdefcl2se <- clubSandwich::coef_test(ivdefcl2, vcov = "CR2", cluster = dat$clust) - - - expect_equivalent( - na.omit(as.matrix(tidy(ivdefcl2r)[, c("estimate", "std.error", "df", "p.value")])), - na.omit(as.matrix(ivdefcl2se)) - ) - - expect_equivalent( - ivdefcl2r$fitted.values, - ivdefcl2$fitted.values - ) - # HC0 Weighted ivdefrw <- iv_robust(y ~ x + x1_c| z + z2, weights = w, data = dat, se_type = "HC0") ivdefw <- AER::ivreg(y ~ x + x1_c| z + z2, weights = w, data = dat) @@ -246,33 +193,24 @@ test_that("iv_robust matches AER + ivpack", { ivdefw$fitted.values ) - # CR2 Weighted - ivdefclrw <- iv_robust(y ~ x + x1_c | z + z2, data = dat, clusters = clust, weights = w, se_type = "CR2") - ivdefclw <- AER::ivreg(y ~ x + x1_c | z + z2, weights = w, data = dat) - ivdefclsew <- clubSandwich::coef_test(ivdefclw, vcov = "CR2", cluster = dat$clust) - - expect_equivalent( - na.omit(as.matrix(tidy(ivdefclrw)[, c("estimate", "std.error", "df", "p.value")])), - na.omit(as.matrix(ivdefclsew)[, 1:4]) - ) + # Stata weighted + ivdefclr <- iv_robust(y ~ x + x1_c | z + z2, data = dat, weights = w, clusters = clust, se_type = "stata") + ivdefcl <- AER::ivreg(y ~ x + x1_c | z + z2, data = dat, weights = w) + capture_output(ivdefclse <- ivpack::cluster.robust.se(ivdefcl, clusterid = dat$clust)) - expect_equivalent( - ivdefclrw$fitted.values, - ivdefclw$fitted.values + expect_equal( + coef(ivdefclr), + coef(ivdefcl) ) - ivdef2clrw <- iv_robust(y ~ x + z | x + x1_c, data = dat, clusters = clust, weights = w, se_type = "CR2") - ivdef2clw <- AER::ivreg(y ~ x + z | x + x1_c, weights = w, data = dat) - ivdef2clsew <- clubSandwich::coef_test(ivdef2clw, vcov = "CR2", cluster = dat$clust) - expect_equivalent( - na.omit(as.matrix(tidy(ivdef2clrw)[, c("estimate", "std.error", "df", "p.value")])), - na.omit(as.matrix(ivdef2clsew)[, 1:4]) + as.matrix(tidy(ivdefclr)[1:2, c("estimate", "std.error")]), + ivdefclse[, c(1, 2)] ) expect_equivalent( - ivdef2clrw$fitted.values, - ivdef2clw$fitted.values + ivdefclr$fitted.values, + ivdefcl$fitted.values ) # F-stat fails properly with blocks of size 1 @@ -287,6 +225,88 @@ test_that("iv_robust matches AER + ivpack", { }) +test_that("iv_robust matches AER + clubSandwich", { + + # ClubSandwich IV tests + for (se_type in cr_se_types) { + + ivfit <- AER::ivreg(y ~ x | z, data = dat) + ivfitw <- AER::ivreg(y ~ x | z, weights = w, data = dat) + + # Standard IV models + ivcr <- iv_robust(y ~ x | z, data = dat, clusters = clust, se_type = se_type) + clubsand <- clubSandwich::coef_test(ivfit, + vcov = ifelse(se_type == "stata", "CR1S", se_type), + cluster = dat$clust, + test = ifelse(se_type == "CR2", "Satterthwaite", "naive-t")) + + cols <- if (se_type == "CR2") c("estimate", "std.error", "df", "p.value") else c("estimate", "std.error", "p.value") + + expect_equivalent( + as.matrix(tidy(ivcr)[, cols]), + as.matrix(clubsand) + ) + + expect_equivalent( + ivfit$fitted.values, + ivcr$fitted.values + ) + + # Weighted IV models + ivcrw <- iv_robust(y ~ x | z, data = dat, clusters = clust, weights = w, se_type = se_type) + clubsandw <- clubSandwich::coef_test(ivfitw, + vcov = ifelse(se_type == "stata", "CR1S", se_type), + cluster = dat$clust, + test = ifelse(se_type == "CR2", "Satterthwaite", "naive-t")) + + expect_equivalent( + as.matrix(tidy(ivcrw)[, cols]), + as.matrix(clubsandw) + ) + + expect_equivalent( + ivfitw$fitted.values, + ivcrw$fitted.values + ) + + # Rank-deficiency + ivfit_rd <- AER::ivreg(y ~ x + x1_c | z + z2, data = dat) + ivcr_rd <- iv_robust(y ~ x + x1_c | z + z2, data = dat, clusters = clust, se_type = se_type) + clubsand_rd <- clubSandwich::coef_test(ivfit_rd, + vcov = ifelse(se_type == "stata", "CR1S", se_type), + cluster = dat$clust, + test = ifelse(se_type == "CR2", "Satterthwaite", "naive-t")) + + expect_equivalent( + na.omit(as.matrix(tidy(ivcr_rd)[, cols])), + na.omit(as.matrix(clubsand_rd)) + ) + + expect_equivalent( + ivfit_rd$fitted.values, + ivcr_rd$fitted.values + ) + + # Rank-deficient, weighted + ivfitw_rd <- AER::ivreg(y ~ x + x1_c | z + z2, weights = w, data = dat) + ivcrw_rd <- iv_robust(y ~ x + x1_c | z + z2, data = dat, weights = w, clusters = clust, se_type = se_type) + clubsandw_rd <- clubSandwich::coef_test(ivfitw_rd, + vcov = ifelse(se_type == "stata", "CR1S", se_type), + cluster = dat$clust, + test = ifelse(se_type == "CR2", "Satterthwaite", "naive-t")) + + expect_equivalent( + na.omit(as.matrix(tidy(ivcrw_rd)[, cols])), + na.omit(as.matrix(clubsandw_rd)) + ) + + expect_equivalent( + ivfitw_rd$fitted.values, + ivcrw_rd$fitted.values + ) + } + +}) test_that("iv_robust different specifications work", { # More instruments than endog. regressors diff --git a/tests/testthat/test-lm-cluster.R b/tests/testthat/test-lm-cluster.R index ff42faf6..864d77c0 100644 --- a/tests/testthat/test-lm-cluster.R +++ b/tests/testthat/test-lm-cluster.R @@ -1,7 +1,5 @@ context("Estimator - lm_robust, clustered") -cr_se_types <- c("CR0", "stata", "CR2", "CR3") - test_that("lm cluster se", { N <- 100 dat <- data.frame( @@ -155,6 +153,7 @@ test_that("lm cluster se", { lm_cr0 <- lm_robust(Y ~ Z + X, data = dat, weights = w, clusters = J, se_type = "CR0") lm_stata <- lm_robust(Y ~ Z + X, data = dat, weights = w, clusters = J, se_type = "stata") lm_cr2 <- lm_robust(Y ~ Z + X, data = dat, weights = w, clusters = J, se_type = "CR2") + lm_cr3 <- lm_robust(Y ~ Z + X, data = dat, weights = w, clusters = J, se_type = "CR3") # Stata is the same as CR0 but with finite sample expect_equivalent( @@ -184,10 +183,9 @@ test_that("Clustered SEs match clubSandwich", { lm_ow <- lm(mpg ~ hp, data = mtcars, weights = wt) for (se_type in cr_se_types) { - se_type <- "CR3" lm_r <- lm_robust(mpg ~ hp, data = mtcars, clusters = cyl, se_type = se_type) lm_rw <- lm_robust(mpg ~ hp, data = mtcars, weights = wt, clusters = cyl, se_type = se_type) - print(se_type) + expect_equivalent( vcov(lm_r), as.matrix(clubSandwich::vcovCR( @@ -314,11 +312,10 @@ test_that("lm works with quoted or unquoted vars and withor without factor clust lmr <- lm_robust(Y~Z, data = dat, weights = W) lmrq <- lm_robust(Y~Z, data = dat, weights = W) - lmr[["call"]] <- NULL - lmrq[["call"]] <- NULL + expect_equal( - lmr, - lmrq + rmcall(lmr), + rmcall(lmrq) ) # works with char @@ -326,21 +323,19 @@ test_that("lm works with quoted or unquoted vars and withor without factor clust lmrc <- lm_robust(Y~Z, data = dat, clusters = J) lmrcq <- lm_robust(Y~Z, data = dat, clusters = J) - lmrc[["call"]] <- NULL - lmrcq[["call"]] <- NULL + expect_equal( - lmrc, - lmrcq + rmcall(lmrc), + rmcall(lmrcq) ) # works with num dat$J_num <- as.numeric(dat$J) lmrc_qnum <- lm_robust(Y~Z, data = dat, clusters = J_num) - lmrc_qnum[["call"]] <- NULL expect_equal( - lmrc, - lmrc_qnum + rmcall(lmrc), + rmcall(lmrc_qnum) ) diff --git a/tests/testthat/test-lm-robust-fes.R b/tests/testthat/test-lm-robust-fes.R index 19456c87..00b8d70f 100644 --- a/tests/testthat/test-lm-robust-fes.R +++ b/tests/testthat/test-lm-robust-fes.R @@ -17,119 +17,81 @@ dat$Bdup <- dat$B test_that("FE matches lm_robust with dummies", { ## One FE, one covar + for (se_type in se_types) { + ro <- tidy(lm_robust(Y ~ Z + factor(B), data = dat, se_type = se_type)) + rfo <- tidy(lm_robust(Y ~ Z, fixed_effects = ~ B, data = dat, se_type = se_type)) - ## Classical - ro <- tidy(lm_robust(Y ~ Z + factor(B), data = dat, se_type = "classical")) - rfo <- tidy(lm_robust(Y ~ Z, fixed_effects = ~ B, data = dat, se_type = "classical")) - - expect_equivalent( - ro[ro$term %in% c("Z"), ], - rfo[rfo$term %in% c("Z"), ] - ) - - ## HC0 - ro <- tidy(lm_robust(Y ~ Z + factor(B), data = dat, se_type = "HC0")) - rfo <- tidy(lm_robust(Y ~ Z, fixed_effects = ~ B, data = dat, se_type = "HC0")) - - expect_equivalent( - ro[ro$term %in% c("Z"), ], - rfo[rfo$term %in% c("Z"), ] - ) - - ## HC1 - ro <- tidy(lm_robust(Y ~ Z + factor(B), data = dat, se_type = "HC1")) - rfo <- tidy(lm_robust(Y ~ Z, fixed_effects = ~ B, data = dat, se_type = "HC1")) - - expect_equivalent( - ro[ro$term %in% c("Z"), ], - rfo[rfo$term %in% c("Z"), ] - ) - - ## HC2 - ro <- tidy(lm_robust(Y ~ Z + factor(B), data = dat, se_type = "HC2")) - rfo <- tidy(lm_robust(Y ~ Z, fixed_effects = ~ B, data = dat, se_type = "HC2")) - - expect_equivalent( - ro[ro$term %in% c("Z"), ], - rfo[rfo$term %in% c("Z"), ] - ) - - ## HC3 - ro <- tidy(lm_robust(Y ~ Z + factor(B), data = dat, se_type = "HC3")) - rfo <- tidy(lm_robust(Y ~ Z, fixed_effects = ~ B, data = dat, se_type = "HC3")) - - expect_equivalent( - ro[ro$term %in% c("Z"), ], - rfo[rfo$term %in% c("Z"), ] - ) - - ## CR0 - ro <- tidy(lm_robust(Y ~ Z + factor(B), clusters = cl, data = dat, se_type = "CR0")) - rfo <- tidy(lm_robust(Y ~ Z, fixed_effects = ~ B, clusters = cl, data = dat, se_type = "CR0")) - - expect_equivalent( - ro[ro$term %in% c("Z"), ], - rfo[rfo$term %in% c("Z"), ] - ) - - ## CR stata - ro <- tidy(lm_robust(Y ~ Z + factor(B), clusters = cl, data = dat, se_type = "stata")) - rfo <- tidy(lm_robust(Y ~ Z, fixed_effects = ~ B, clusters = cl, data = dat, se_type = "stata")) + expect_equivalent( + ro[ro$term %in% c("Z"), ], + rfo[rfo$term %in% c("Z"), ] + ) + } - expect_equivalent( - ro[ro$term %in% c("Z"), ], - rfo[rfo$term %in% c("Z"), ] - ) + for (se_type in cr_se_types) { + ro <- tidy(lm_robust(Y ~ Z + factor(B), clusters = cl, data = dat, se_type = se_type)) + rfo <- tidy(lm_robust(Y ~ Z, fixed_effects = ~ B, clusters = cl, data = dat, se_type = se_type)) - ## CR2 - ro <- tidy(lm_robust(Y ~ Z + factor(B), clusters = cl, data = dat, se_type = "CR2")) - rfo <- tidy(lm_robust(Y ~ Z, fixed_effects = ~ B, clusters = cl, data = dat, se_type = "CR2")) - - expect_equivalent( - ro[ro$term %in% c("Z"), ], - rfo[rfo$term %in% c("Z"), ] - ) + expect_equivalent( + ro[ro$term %in% c("Z"), ], + rfo[rfo$term %in% c("Z"), ] + ) + } }) test_that("FE matches with multiple FEs and covars", { ## Multiple FEs, multiple covars + for (se_type in se_types) { + ro <- tidy(lm_robust(Y ~ Z + X + factor(B) + factor(B2), data = dat, se_type = se_type)) + rfo <- tidy(lm_robust(Y ~ Z + X, fixed_effects = ~ B + B2, data = dat, se_type = se_type)) - ## Classical - ro <- tidy(lm_robust(Y ~ Z + X + factor(B) + factor(B2), data = dat, se_type = "classical")) - rfo <- tidy(lm_robust(Y ~ Z + X, fixed_effects = ~ B + B2, data = dat, se_type = "classical")) - - expect_equivalent( - ro[ro$term %in% c("Z", "X"), ], - rfo[rfo$term %in% c("Z", "X"), ] - ) - - ## HC0 - ro <- tidy(lm_robust(Y ~ Z + X + factor(B) + factor(B2), data = dat, se_type = "HC0")) - rfo <- tidy(lm_robust(Y ~ Z + X, fixed_effects = ~ B + B2, data = dat, se_type = "HC0")) - - expect_equivalent( - ro[ro$term %in% c("Z", "X"), ], - rfo[rfo$term %in% c("Z", "X"), ] - ) + expect_equivalent( + ro[ro$term %in% c("Z"), ], + rfo[rfo$term %in% c("Z"), ] + ) - ## HC1 - ro <- tidy(lm_robust(Y ~ Z + X + factor(B) + factor(B2), data = dat, se_type = "HC1")) - rfo <- tidy(lm_robust(Y ~ Z + X, fixed_effects = ~ B + B2, data = dat, se_type = "HC1")) + # Weights + ro <- tidy(lm_robust(Y ~ Z + X + factor(B) + factor(B2), data = dat, weights = w, se_type = se_type)) + rfo <- tidy(lm_robust(Y ~ Z + X, fixed_effects = ~ B + B2, data = dat, weights = w, se_type = se_type)) - expect_equivalent( - ro[ro$term %in% c("Z", "X"), ], - rfo[rfo$term %in% c("Z", "X"), ] - ) + expect_equivalent( + ro[ro$term %in% c("Z", "X"), ], + rfo[rfo$term %in% c("Z", "X"), ] + ) + } - ## HC2 - ro <- tidy(lm_robust(Y ~ Z + X + factor(B) + factor(B2), data = dat, se_type = "HC2")) - rfo <- tidy(lm_robust(Y ~ Z + X, fixed_effects = ~ B + B2, data = dat, se_type = "HC2")) + for (se_type in cr_se_types) { + ro <- tidy(lm_robust(Y ~ Z + X + factor(B) + factor(B2), clusters = cl, data = dat, se_type = se_type)) + rfo <- tidy(lm_robust(Y ~ Z + X, fixed_effects = ~ B + B2, clusters = cl, data = dat, se_type = se_type)) - expect_equivalent( - ro[ro$term %in% c("Z", "X"), ], - rfo[rfo$term %in% c("Z", "X"), ] - ) + expect_equivalent( + ro[ro$term %in% c("Z"), ], + rfo[rfo$term %in% c("Z"), ] + ) + # Weights + if (se_type %in% c("CR2", "CR3")) { + expect_error( + rfo <- tidy(lm_robust(Y ~ Z + X, fixed_effects = ~ B + B2, clusters = cl, data = dat, weights = w, se_type = se_type)), + "Cannot use `fixed_effects` with weighted" + ) + + # ro <- tidy(lm_robust(Y ~ Z + X + factor(B) + factor(B2), clusters = cl, data = dat, weights = w, se_type = "CR2")) + # + # expect_equivalent( + # ro[ro$term %in% c("Z", "X"), ], + # rfo[rfo$term %in% c("Z", "X"), ] + # ) + } else { + ro <- tidy(lm_robust(Y ~ Z + X + factor(B) + factor(B2), clusters = cl, data = dat, weights = w, se_type = se_type)) + rfo <- tidy(lm_robust(Y ~ Z + X, fixed_effects = ~ B + B2, clusters = cl, data = dat, weights = w, se_type = se_type)) + + expect_equivalent( + ro[ro$term %in% c("Z", "X"), ], + rfo[rfo$term %in% c("Z", "X"), ] + ) + } + + } ## HC3 # Uncomment for perfect fits which reveal problems for our estimators @@ -138,436 +100,103 @@ test_that("FE matches with multiple FEs and covars", { # dat <- data.frame( # Y = rnorm(N), # Z = rbinom(N, 1, .5), - # X = rnorm(N), - # B = factor(rep(1:2, times = c(4, 6))), - # B2 = factor(rep(1:3, times = c(3, 3, 4))), - # cl = sample(1:4, size = N, replace = T) - # ) - # lmo <- lm(Y ~ Z + X + factor(B) + factor(B2), data = dat) - # summary(lmo) - # sandwich::vcovHC(lmo, "HC3") - - ro <- tidy(lm_robust(Y ~ Z + X + factor(B) + factor(B2), data = dat, se_type = "HC3")) - rfo <- tidy(lm_robust(Y ~ Z + X, fixed_effects = ~ B + B2, data = dat, se_type = "HC3")) - - expect_equivalent( - ro[ro$term %in% c("Z", "X"), ], - rfo[rfo$term %in% c("Z", "X"), ] - ) - - ## CR0 - ro <- tidy(lm_robust(Y ~ Z + X + factor(B) + factor(B2), clusters = cl, data = dat, se_type = "CR0")) - rfo <- tidy(lm_robust(Y ~ Z + X, fixed_effects = ~ B + B2, clusters = cl, data = dat, se_type = "CR0")) - - expect_equivalent( - ro[ro$term %in% c("Z", "X"), ], - rfo[rfo$term %in% c("Z", "X"), ] - ) - - ## CR stata - ro <- tidy(lm_robust(Y ~ Z + X + factor(B) + factor(B2), clusters = cl, data = dat, se_type = "stata")) - rfo <- tidy(lm_robust(Y ~ Z + X, fixed_effects = ~ B + B2, clusters = cl, data = dat, se_type = "stata")) - - expect_equivalent( - ro[ro$term %in% c("Z", "X"), ], - rfo[rfo$term %in% c("Z", "X"), ] - ) - - ## CR2 - ro <- tidy(lm_robust(Y ~ Z + X + factor(B) + factor(B2), clusters = cl, data = dat, se_type = "CR2")) - rfo <- tidy(lm_robust(Y ~ Z + X, fixed_effects = ~ B + B2, clusters = cl, data = dat, se_type = "CR2")) - - expect_equivalent( - ro[ro$term %in% c("Z", "X"), ], - rfo[rfo$term %in% c("Z", "X"), ] - ) -}) - -test_that("FE matches with weights", { ## Classical - ro <- tidy(lm_robust(Y ~ Z + X + factor(B) + factor(B2), data = dat, weights = w, se_type = "classical")) - rfo <- tidy(lm_robust(Y ~ Z + X, fixed_effects = ~ B + B2, data = dat, weights = w, se_type = "classical")) - - expect_equivalent( - ro[ro$term %in% c("Z", "X"), ], - rfo[rfo$term %in% c("Z", "X"), ] - ) - - ## HC0 - ro <- tidy(lm_robust(Y ~ Z + X + factor(B) + factor(B2), data = dat, weights = w, se_type = "HC0")) - rfo <- tidy(lm_robust(Y ~ Z + X, fixed_effects = ~ B + B2, data = dat, weights = w, se_type = "HC0")) - - expect_equivalent( - ro[ro$term %in% c("Z", "X"), ], - rfo[rfo$term %in% c("Z", "X"), ] - ) - - ## HC1 - ro <- tidy(lm_robust(Y ~ Z + X + factor(B) + factor(B2), data = dat, weights = w, se_type = "HC1")) - rfo <- tidy(lm_robust(Y ~ Z + X, fixed_effects = ~ B + B2, data = dat, weights = w, se_type = "HC1")) - - expect_equivalent( - ro[ro$term %in% c("Z", "X"), ], - rfo[rfo$term %in% c("Z", "X"), ] - ) - - ## HC2 - ro <- tidy(lm_robust(Y ~ Z + X + factor(B) + factor(B2), data = dat, weights = w, se_type = "HC2")) - rfo <- tidy(lm_robust(Y ~ Z + X, fixed_effects = ~ B + B2, data = dat, weights = w, se_type = "HC2")) - - expect_equivalent( - ro[ro$term %in% c("Z", "X"), ], - rfo[rfo$term %in% c("Z", "X"), ] - ) - - ## HC3 - ro <- tidy(lm_robust(Y ~ Z + X + factor(B) + factor(B2), data = dat, weights = w, se_type = "HC3")) - rfo <- tidy(lm_robust(Y ~ Z + X, fixed_effects = ~ B + B2, data = dat, weights = w, se_type = "HC3")) - - expect_equivalent( - ro[ro$term %in% c("Z", "X"), ], - rfo[rfo$term %in% c("Z", "X"), ] - ) - - ## CR0 - ro <- tidy(lm_robust(Y ~ Z + X + factor(B) + factor(B2), clusters = cl, data = dat, weights = w, se_type = "CR0")) - rfo <- tidy(lm_robust(Y ~ Z + X, fixed_effects = ~ B + B2, clusters = cl, data = dat, weights = w, se_type = "CR0")) - - expect_equivalent( - ro[ro$term %in% c("Z", "X"), ], - rfo[rfo$term %in% c("Z", "X"), ] - ) - - ## CR stata - ro <- tidy(lm_robust(Y ~ Z + X + factor(B) + factor(B2), clusters = cl, data = dat, weights = w, se_type = "stata")) - rfo <- tidy(lm_robust(Y ~ Z + X, fixed_effects = ~ B + B2, clusters = cl, data = dat, weights = w, se_type = "stata")) - - expect_equivalent( - ro[ro$term %in% c("Z", "X"), ], - rfo[rfo$term %in% c("Z", "X"), ] - ) - - ## CR2 - expect_error( - rfo <- tidy(lm_robust(Y ~ Z + X, fixed_effects = ~ B + B2, clusters = cl, data = dat, weights = w, se_type = "CR2")), - "Cannot use `fixed_effects` with weighted" - ) - - # ro <- tidy(lm_robust(Y ~ Z + X + factor(B) + factor(B2), clusters = cl, data = dat, weights = w, se_type = "CR2")) - # - # expect_equivalent( - # ro[ro$term %in% c("Z", "X"), ], - # rfo[rfo$term %in% c("Z", "X"), ] - # ) -}) - -test_that("FEs work with multiple outcomes", { - ## Multiple Outcomes - - ## Classical - ro <- lm_robust(cbind(Y, Y2) ~ Z + X + factor(B) + factor(B2), data = dat, se_type = "classical") - rfo <- lm_robust(cbind(Y, Y2) ~ Z + X, fixed_effects = ~ B + B2, data = dat, se_type = "classical") - - expect_equivalent( - tidy(ro)[ro$term %in% c("Z", "X"), ], - tidy(rfo)[rfo$term %in% c("Z", "X"), ] - ) - - expect_equivalent( - ro$fitted.values, - rfo$fitted.values - ) - - expect_equal( - ro[c("r.squared", "adj.r.squared")], - rfo[c("r.squared", "adj.r.squared")] - ) - - ## HC0 - ro <- lm_robust(cbind(Y, Y2) ~ Z + X + factor(B) + factor(B2), data = dat, se_type = "HC0") - rfo <- lm_robust(cbind(Y, Y2) ~ Z + X, fixed_effects = ~ B + B2, data = dat, se_type = "HC0") - - expect_equivalent( - tidy(ro)[ro$term %in% c("Z", "X"), ], - tidy(rfo)[rfo$term %in% c("Z", "X"), ] - ) - - expect_equivalent( - ro$fitted.values, - rfo$fitted.values - ) - - expect_equal( - ro[c("r.squared", "adj.r.squared")], - rfo[c("r.squared", "adj.r.squared")] - ) - - ## HC1 - ro <- lm_robust(cbind(Y, Y2) ~ Z + X + factor(B) + factor(B2), data = dat, se_type = "HC1") - rfo <- lm_robust(cbind(Y, Y2) ~ Z + X, fixed_effects = ~ B + B2, data = dat, se_type = "HC1") - - expect_equivalent( - tidy(ro)[ro$term %in% c("Z", "X"), ], - tidy(rfo)[rfo$term %in% c("Z", "X"), ] - ) - - expect_equivalent( - ro$fitted.values, - rfo$fitted.values - ) - - expect_equal( - ro[c("r.squared", "adj.r.squared")], - rfo[c("r.squared", "adj.r.squared")] - ) - - ## HC2 - ro <- lm_robust(cbind(Y, Y2) ~ Z + X + factor(B) + factor(B2), data = dat, se_type = "HC2") - rfo <- lm_robust(cbind(Y, Y2) ~ Z + X, fixed_effects = ~ B + B2, data = dat, se_type = "HC2") - - expect_equivalent( - tidy(ro)[ro$term %in% c("Z", "X"), ], - tidy(rfo)[rfo$term %in% c("Z", "X"), ] - ) - - expect_equivalent( - ro$fitted.values, - rfo$fitted.values - ) - - expect_equal( - ro[c("r.squared", "adj.r.squared")], - rfo[c("r.squared", "adj.r.squared")] - ) - - ## HC3 - ro <- lm_robust(cbind(Y, Y2) ~ Z + X + factor(B) + factor(B2), data = dat, se_type = "HC3") - rfo <- lm_robust(cbind(Y, Y2) ~ Z + X, fixed_effects = ~ B + B2, data = dat, se_type = "HC3") - - expect_equivalent( - tidy(ro)[ro$term %in% c("Z", "X"), ], - tidy(rfo)[rfo$term %in% c("Z", "X"), ] - ) - - expect_equivalent( - ro$fitted.values, - rfo$fitted.values - ) - - expect_equal( - ro[c("r.squared", "adj.r.squared")], - rfo[c("r.squared", "adj.r.squared")] - ) - - ## CR0 - ro <- lm_robust(cbind(Y, Y2) ~ Z + X + factor(B) + factor(B2), clusters = cl, data = dat, se_type = "CR0") - rfo <- lm_robust(cbind(Y, Y2) ~ Z + X, fixed_effects = ~ B + B2, clusters = cl, data = dat, se_type = "CR0") - - expect_equivalent( - tidy(ro)[ro$term %in% c("Z", "X"), ], - tidy(rfo)[rfo$term %in% c("Z", "X"), ] - ) - - expect_equivalent( - ro$fitted.values, - rfo$fitted.values - ) - - expect_equal( - ro[c("r.squared", "adj.r.squared")], - rfo[c("r.squared", "adj.r.squared")] - ) - - ## CR stata - ro <- lm_robust(cbind(Y, Y2) ~ Z + X + factor(B) + factor(B2), clusters = cl, data = dat, se_type = "stata") - rfo <- lm_robust(cbind(Y, Y2) ~ Z + X, fixed_effects = ~ B + B2, clusters = cl, data = dat, se_type = "stata") - - expect_equivalent( - tidy(ro)[ro$term %in% c("Z", "X"), ], - tidy(rfo)[rfo$term %in% c("Z", "X"), ] - ) - - expect_equivalent( - ro$fitted.values, - rfo$fitted.values - ) - - expect_equal( - ro[c("r.squared", "adj.r.squared")], - rfo[c("r.squared", "adj.r.squared")] - ) - - ## CR2 - ro <- lm_robust(cbind(Y, Y2) ~ Z + X + factor(B) + factor(B2), clusters = cl, data = dat, se_type = "CR2") - rfo <- lm_robust(cbind(Y, Y2) ~ Z + X, fixed_effects = ~ B + B2, clusters = cl, data = dat, se_type = "CR2") - - expect_equivalent( - tidy(ro)[ro$term %in% c("Z", "X"), ], - tidy(rfo)[rfo$term %in% c("Z", "X"), ] - ) - - expect_equivalent( - ro$fitted.values, - rfo$fitted.values - ) - - expect_equal( - ro[c("r.squared", "adj.r.squared")], - rfo[c("r.squared", "adj.r.squared")] - ) - - ## multiple outcomes with weights - ## Classical - ro <- lm_robust(cbind(Y, Y2) ~ Z + X + factor(B) + factor(B2), data = dat, weights = w, se_type = "classical") - rfo <- lm_robust(cbind(Y, Y2) ~ Z + X, fixed_effects = ~ B + B2, data = dat, weights = w, se_type = "classical") - - expect_equivalent( - tidy(ro)[ro$term %in% c("Z", "X"), ], - tidy(rfo)[rfo$term %in% c("Z", "X"), ] - ) - - expect_equivalent( - ro$fitted.values, - rfo$fitted.values - ) - - expect_equal( - ro[c("r.squared", "adj.r.squared")], - rfo[c("r.squared", "adj.r.squared")] - ) - - ## HC0 - ro <- lm_robust(cbind(Y, Y2) ~ Z + X + factor(B) + factor(B2), data = dat, weights = w, se_type = "HC0") - rfo <- lm_robust(cbind(Y, Y2) ~ Z + X, fixed_effects = ~ B + B2, data = dat, weights = w, se_type = "HC0") - - expect_equivalent( - tidy(ro)[ro$term %in% c("Z", "X"), ], - tidy(rfo)[rfo$term %in% c("Z", "X"), ] - ) - - expect_equivalent( - ro$fitted.values, - rfo$fitted.values - ) - - expect_equal( - ro[c("r.squared", "adj.r.squared")], - rfo[c("r.squared", "adj.r.squared")] - ) - - ## HC1 - ro <- lm_robust(cbind(Y, Y2) ~ Z + X + factor(B) + factor(B2), data = dat, weights = w, se_type = "HC1") - rfo <- lm_robust(cbind(Y, Y2) ~ Z + X, fixed_effects = ~ B + B2, data = dat, weights = w, se_type = "HC1") - - expect_equivalent( - tidy(ro)[ro$term %in% c("Z", "X"), ], - tidy(rfo)[rfo$term %in% c("Z", "X"), ] - ) - - expect_equivalent( - ro$fitted.values, - rfo$fitted.values - ) - - expect_equal( - ro[c("r.squared", "adj.r.squared")], - rfo[c("r.squared", "adj.r.squared")] - ) - - ## HC2 - ro <- lm_robust(cbind(Y, Y2) ~ Z + X + factor(B) + factor(B2), data = dat, weights = w, se_type = "HC2") - rfo <- lm_robust(cbind(Y, Y2) ~ Z + X, fixed_effects = ~ B + B2, data = dat, weights = w, se_type = "HC2") - - expect_equivalent( - tidy(ro)[ro$term %in% c("Z", "X"), ], - tidy(rfo)[rfo$term %in% c("Z", "X"), ] - ) - - expect_equivalent( - ro$fitted.values, - rfo$fitted.values - ) - - expect_equal( - ro[c("r.squared", "adj.r.squared")], - rfo[c("r.squared", "adj.r.squared")] - ) - - ## HC3 - ro <- lm_robust(cbind(Y, Y2) ~ Z + X + factor(B) + factor(B2), data = dat, weights = w, se_type = "HC3") - rfo <- lm_robust(cbind(Y, Y2) ~ Z + X, fixed_effects = ~ B + B2, data = dat, weights = w, se_type = "HC3") + # X = rnorm(N), + # B = factor(rep(1:2, times = c(4, 6))), + # B2 = factor(rep(1:3, times = c(3, 3, 4))), + # cl = sample(1:4, size = N, replace = T) + # ) + # lmo <- lm(Y ~ Z + X + factor(B) + factor(B2), data = dat) + # summary(lmo) + # sandwich::vcovHC(lmo, "HC3") +}) - expect_equivalent( - tidy(ro)[ro$term %in% c("Z", "X"), ], - tidy(rfo)[rfo$term %in% c("Z", "X"), ] - ) +test_that("FEs work with multiple outcomes", { + ## Multiple Outcomes + for (se_type in se_types) { + ro <- lm_robust(cbind(Y, Y2) ~ Z + X + factor(B) + factor(B2), data = dat, se_type = "classical") + rfo <- lm_robust(cbind(Y, Y2) ~ Z + X, fixed_effects = ~ B + B2, data = dat, se_type = "classical") - expect_equivalent( - ro$fitted.values, - rfo$fitted.values - ) + expect_equivalent( + tidy(ro)[ro$term %in% c("Z", "X"), ], + tidy(rfo)[rfo$term %in% c("Z", "X"), ] + ) - expect_equal( - ro[c("r.squared", "adj.r.squared")], - rfo[c("r.squared", "adj.r.squared")] - ) + expect_equivalent( + ro$fitted.values, + rfo$fitted.values + ) - ## CR0 - ro <- lm_robust(cbind(Y, Y2) ~ Z + X + factor(B) + factor(B2), clusters = cl, data = dat, weights = w, se_type = "CR0") - rfo <- lm_robust(cbind(Y, Y2) ~ Z + X, fixed_effects = ~ B + B2, clusters = cl, data = dat, weights = w, se_type = "CR0") + expect_equal( + ro[c("r.squared", "adj.r.squared")], + rfo[c("r.squared", "adj.r.squared")] + ) - expect_equivalent( - tidy(ro)[ro$term %in% c("Z", "X"), ], - tidy(rfo)[rfo$term %in% c("Z", "X"), ] - ) + # Weights + ro <- lm_robust(cbind(Y, Y2) ~ Z + X + factor(B) + factor(B2), data = dat, weights = w, se_type = se_type) + rfo <- lm_robust(cbind(Y, Y2) ~ Z + X, fixed_effects = ~ B + B2, data = dat, weights = w, se_type = se_type) - expect_equivalent( - ro$fitted.values, - rfo$fitted.values - ) + expect_equivalent( + tidy(ro)[ro$term %in% c("Z", "X"), ], + tidy(rfo)[rfo$term %in% c("Z", "X"), ] + ) - expect_equal( - ro[c("r.squared", "adj.r.squared")], - rfo[c("r.squared", "adj.r.squared")] - ) + expect_equivalent( + ro$fitted.values, + rfo$fitted.values + ) - ## CR stata - ro <- lm_robust(cbind(Y, Y2) ~ Z + X + factor(B) + factor(B2), clusters = cl, data = dat, weights = w, se_type = "stata") - rfo <- lm_robust(cbind(Y, Y2) ~ Z + X, fixed_effects = ~ B + B2, clusters = cl, data = dat, weights = w, se_type = "stata") + expect_equal( + ro[c("r.squared", "adj.r.squared")], + rfo[c("r.squared", "adj.r.squared")] + ) + } - expect_equivalent( - tidy(ro)[ro$term %in% c("Z", "X"), ], - tidy(rfo)[rfo$term %in% c("Z", "X"), ] - ) + # clusters + for (se_type in cr_se_types) { + ro <- lm_robust(cbind(Y, Y2) ~ Z + X + factor(B) + factor(B2), clusters = cl, data = dat, se_type = se_type) + rfo <- lm_robust(cbind(Y, Y2) ~ Z + X, fixed_effects = ~ B + B2, clusters = cl, data = dat, se_type = se_type) - expect_equivalent( - ro$fitted.values, - rfo$fitted.values - ) + expect_equivalent( + tidy(ro)[ro$term %in% c("Z", "X"), ], + tidy(rfo)[rfo$term %in% c("Z", "X"), ] + ) - expect_equal( - ro[c("r.squared", "adj.r.squared")], - rfo[c("r.squared", "adj.r.squared")] - ) + expect_equivalent( + ro$fitted.values, + rfo$fitted.values + ) - ## CR2 - expect_error( - rfo <- lm_robust(cbind(Y, Y2) ~ Z + X, fixed_effects = ~ B + B2, clusters = cl, data = dat, weights = w, se_type = "CR2"), - "Cannot use `fixed_effects` with weighted" - ) - # ro <- lm_robust(cbind(Y, Y2) ~ Z + X + factor(B) + factor(B2), clusters = cl, data = dat, weights = w, se_type = "CR2") - # - # expect_equivalent( - # tidy(ro)[ro$term %in% c("Z", "X"), ], - # tidy(rfo)[rfo$term %in% c("Z", "X"), ] - # ) - # - # expect_true( - # max( - # unlist(ro[c("r.squared", "adj.r.squared")]) - - # unlist(rfo[c("r.squared", "adj.r.squared")]) - # ) < 1e-7 - # ) + expect_equal( + ro[c("r.squared", "adj.r.squared")], + rfo[c("r.squared", "adj.r.squared")] + ) + # Weights + if (se_type %in% c("CR2", "CR3")) { + expect_error( + rfo <- tidy(lm_robust(cbind(Y, Y2) ~ Z + X, fixed_effects = ~ B + B2, clusters = cl, data = dat, weights = w, se_type = se_type)), + "Cannot use `fixed_effects` with weighted" + ) + } else { + ro <- lm_robust(cbind(Y, Y2) ~ Z + X + factor(B) + factor(B2), data = dat, clusters = cl, weights = w, se_type = se_type) + rfo <- lm_robust(cbind(Y, Y2) ~ Z + X, fixed_effects = ~ B + B2, data = dat, clusters = cl, weights = w, se_type = se_type) + + expect_equivalent( + tidy(ro)[ro$term %in% c("Z", "X"), ], + tidy(rfo)[rfo$term %in% c("Z", "X"), ] + ) + + expect_equivalent( + ro$fitted.values, + rfo$fitted.values + ) + + expect_equal( + ro[c("r.squared", "adj.r.squared")], + rfo[c("r.squared", "adj.r.squared")] + ) + } + } }) @@ -578,63 +207,23 @@ test_that("FEs work with missingness", { datmiss$Y[5] <- NA datmiss$B[1] <- NA - ## Classical - ro <- lm_robust(Y ~ Z + X + factor(B) + factor(B2), data = datmiss, se_type = "classical") - expect_warning( - rfo <- lm_robust(Y ~ Z + X, fixed_effects = ~ B + B2, data = datmiss, se_type = "classical"), - "Some observations have missingness in the fixed effects but not in the outcome or covariates." - ) - - expect_equivalent( - tidy(ro)[ro$term %in% c("Z", "X"), ], - tidy(rfo)[rfo$term %in% c("Z", "X"), ] - ) - - expect_equal( - ro[c("r.squared", "adj.r.squared")], - rfo[c("r.squared", "adj.r.squared")] - ) - - ## HC0 - ro <- lm_robust(Y ~ Z + X + factor(B) + factor(B2), data = datmiss, se_type = "HC0") - expect_warning( - rfo <- lm_robust(Y ~ Z + X, fixed_effects = ~ B + B2, data = datmiss, se_type = "HC0"), - "Some observations have missingness in the fixed effects but not in the outcome or covariates." -) - - expect_equivalent( - tidy(ro)[ro$term %in% c("Z", "X"), ], - tidy(rfo)[rfo$term %in% c("Z", "X"), ] - ) - - expect_equal( - ro[c("r.squared", "adj.r.squared")], - rfo[c("r.squared", "adj.r.squared")] - ) - - ## HC1 - ro <- lm_robust(Y ~ Z + X + factor(B) + factor(B2), data = datmiss, se_type = "HC1") - expect_warning( - rfo <- lm_robust(Y ~ Z + X, fixed_effects = ~ B + B2, data = datmiss, se_type = "HC1"), - "Some observations have missingness in the fixed effects but not in the outcome or covariates." - ) - - expect_equivalent( - tidy(ro)[ro$term %in% c("Z", "X"), ], - tidy(rfo)[rfo$term %in% c("Z", "X"), ] - ) + for (se_type in se_types) { + ro <- lm_robust(Y ~ Z + X + factor(B) + factor(B2), data = datmiss, se_type = se_type) + expect_warning( + rfo <- lm_robust(Y ~ Z + X, fixed_effects = ~ B + B2, data = datmiss, se_type = se_type), + "Some observations have missingness in the fixed effects but not in the outcome or covariates." + ) - expect_equal( - ro[c("r.squared", "adj.r.squared")], - rfo[c("r.squared", "adj.r.squared")] - ) + expect_equivalent( + tidy(ro)[ro$term %in% c("Z", "X"), ], + tidy(rfo)[rfo$term %in% c("Z", "X"), ] + ) - ## HC2 - ro <- lm_robust(Y ~ Z + X + factor(B) + factor(B2), data = datmiss, se_type = "HC2") - expect_warning( - rfo <- lm_robust(Y ~ Z + X, fixed_effects = ~ B + B2, data = datmiss, se_type = "HC2"), - "Some observations have missingness in the fixed effects but not in the outcome or covariates." - ) + expect_equal( + ro[c("r.squared", "adj.r.squared")], + rfo[c("r.squared", "adj.r.squared")] + ) + } # Check to make sure with only one FE when missingness works expect_warning( @@ -660,78 +249,75 @@ test_that("FEs work with missingness", { ) lfo <- lm(Y ~ Z + X + factor(B) + factor(B2), data = datmiss) - expect_equivalent( - tidy(ro)[ro$term %in% c("Z", "X"), ], - tidy(rfo)[rfo$term %in% c("Z", "X"), ] - ) - expect_equivalent( tidy(rfo)[rfo$term %in% c("Z", "X"), c("std.error")], sqrt(diag(sandwich::vcovHC(lfo, type = "HC3"))[2:3]) ) - expect_equal( - ro[c("r.squared", "adj.r.squared")], - rfo[c("r.squared", "adj.r.squared")] - ) - - ## CR0 - ro <- lm_robust(Y ~ Z + X + factor(B) + factor(B2), clusters = cl, data = datmiss, se_type = "CR0") - expect_warning( - rfo <- lm_robust(Y ~ Z + X, fixed_effects = ~ B + B2, clusters = cl, data = datmiss, se_type = "CR0"), - "Some observations have missingness in the fixed effects but not in the outcome or covariates." - ) - - expect_equivalent( - tidy(ro)[ro$term %in% c("Z", "X"), ], - tidy(rfo)[rfo$term %in% c("Z", "X"), ] - ) + for (se_type in cr_se_types) { + ro <- lm_robust(Y ~ Z + X + factor(B) + factor(B2), clusters = cl, data = datmiss, se_type = se_type) + expect_warning( + rfo <- lm_robust(Y ~ Z + X, fixed_effects = ~ B + B2, clusters = cl, data = datmiss, se_type = se_type), + "Some observations have missingness in the fixed effects but not in the outcome or covariates." + ) - expect_equal( - ro[c("r.squared", "adj.r.squared")], - rfo[c("r.squared", "adj.r.squared")] - ) + expect_equivalent( + tidy(ro)[ro$term %in% c("Z", "X"), ], + tidy(rfo)[rfo$term %in% c("Z", "X"), ] + ) - ## CR stata - ro <- lm_robust(Y ~ Z + X + factor(B) + factor(B2), clusters = cl, data = datmiss, se_type = "stata") - expect_warning( - rfo <- lm_robust(Y ~ Z + X, fixed_effects = ~ B + B2, clusters = cl, data = datmiss, se_type = "stata"), - "Some observations have missingness in the fixed effects but not in the outcome or covariates." - ) + expect_equal( + ro[c("r.squared", "adj.r.squared")], + rfo[c("r.squared", "adj.r.squared")] + ) - expect_equivalent( - tidy(ro)[ro$term %in% c("Z", "X"), ], - tidy(rfo)[rfo$term %in% c("Z", "X"), ] - ) + } +}) - expect_equal( - ro[c("r.squared", "adj.r.squared")], - rfo[c("r.squared", "adj.r.squared")] - ) +test_that("FEs handle collinear FEs", { + ## Collinear factors + for (se_type in se_types) { + ro <- tidy(lm_robust(Y ~ Z + X + factor(B) + factor(Bdup) + factor(B2), data = dat, se_type = se_type)) + rfo <- tidy(lm_robust(Y ~ Z + X, fixed_effects = ~ B + Bdup + B2, data = dat, se_type = se_type)) - ## CR2 - ro <- lm_robust(Y ~ Z + X + factor(B) + factor(B2), clusters = cl, data = datmiss, se_type = "CR2") - expect_warning( - rfo <- lm_robust(Y ~ Z + X, fixed_effects = ~ B + B2, clusters = cl, data = datmiss, se_type = "CR2"), - "Some observations have missingness in the fixed effects but not in the outcome or covariates." - ) + expect_equivalent( + ro$estimate[ro$term %in% c("Z", "X")], + rfo$estimate[rfo$term %in% c("Z", "X")] + ) - expect_equivalent( - tidy(ro)[ro$term %in% c("Z", "X"), ], - tidy(rfo)[rfo$term %in% c("Z", "X"), ] - ) - expect_equal( - ro[c("r.squared", "adj.r.squared")], - rfo[c("r.squared", "adj.r.squared")] - ) -}) + if (se_type %in% c("HC2", "HC3")) { + # HC2 or HC3 work because we can get the collinearity in the FEs for free as we have to invert + # UtU anyways (where U is cbind(X, FE_dummy_mat)) + expect_equivalent( + ro[ro$term %in% c("Z", "X"), ], + rfo[rfo$term %in% c("Z", "X"), ] + ) + } else { + # DoF is wrong because we count the FEs incorrectly for the finite sample correction with collinearity + expect_false( + all( + ro$df[ro$term %in% c("Z", "X")] == + rfo$df[rfo$term %in% c("Z", "X")] + ) + ) + if (se_type == "HC0") { + # But std errors work here bc no DoF correction + expect_equivalent( + ro$std.error[ro$term %in% c("Z", "X")], + rfo$std.error[rfo$term %in% c("Z", "X")] + ) + } else { + expect_false( + all( + ro$std.error[ro$term %in% c("Z", "X")] == + rfo$std.error[rfo$term %in% c("Z", "X")] + ) + ) + } + } + } -test_that("FEs handle collinear FEs", { - ## Collinear factors - ## Classical - ro <- tidy(lm_robust(Y ~ Z + X + factor(B) + factor(Bdup) + factor(B2), data = dat, se_type = "classical")) - rfo <- tidy(lm_robust(Y ~ Z + X, fixed_effects = ~ B + Bdup + B2, data = dat, se_type = "classical")) # lo <- lfe::felm(Y ~ Z + X|B + B2 + Bdup, data = dat) # # mtcars$cyl2 <- mtcars$cyl @@ -752,187 +338,55 @@ test_that("FEs handle collinear FEs", { # lfe:::summary.felm(lfe::felm(mpg ~ hp | cyl + cyl2 + am, data = mtcars))$coefficients # tidy(lm_robust(mpg ~ hp + factor(cyl) + factor(cyl3) + factor(am), data = mtcars, se_type = "classical"))[2,] - expect_equivalent( - ro$estimate[ro$term %in% c("Z", "X")], - rfo$estimate[rfo$term %in% c("Z", "X")] - ) - - # SE is wrong because we count the FEs incorrectly for the finite sample correction - expect_false( - all( - ro$std.error[ro$term %in% c("Z", "X")] == - rfo$std.error[rfo$term %in% c("Z", "X")] - ) - ) - - ## HC0 - ro <- tidy(lm_robust(Y ~ Z + X + factor(B) + factor(Bdup) + factor(B2), data = dat, se_type = "HC0")) - rfo <- tidy(lm_robust(Y ~ Z + X, fixed_effects = ~ B + Bdup + B2, data = dat, se_type = "HC0")) - - # SE is right for HC0 (as there is no finite sample correction), but DoF is still wrong - expect_equivalent( - ro[ro$term %in% c("Z", "X"), c("estimate", "std.error")], - rfo[rfo$term %in% c("Z", "X"), c("estimate", "std.error")] - ) - - # SE is right, but FE is wrong because we count the FEs incorrectly for the finite sample correction - expect_false( - all( - ro$df[ro$term %in% c("Z", "X")] == - rfo$df[rfo$term %in% c("Z", "X")] - ) - ) - - ## HC1 - ro <- tidy(lm_robust(Y ~ Z + X + factor(B) + factor(Bdup) + factor(B2), data = dat, se_type = "HC1")) - rfo <- tidy(lm_robust(Y ~ Z + X, fixed_effects = ~ B + Bdup + B2, data = dat, se_type = "HC1")) - - expect_equivalent( - ro$estimate[ro$term %in% c("Z", "X")], - rfo$estimate[rfo$term %in% c("Z", "X")] - ) - - # SE is wrong because we count the FEs incorrectly for the finite sample correction - expect_false( - all( - ro$std.error[ro$term %in% c("Z", "X")] == - rfo$std.error[rfo$term %in% c("Z", "X")] - ) - ) - - ## HC2 - ro <- tidy(lm_robust(Y ~ Z + X + factor(B) + factor(Bdup) + factor(B2), data = dat, se_type = "HC2")) - rfo <- tidy(lm_robust(Y ~ Z + X, fixed_effects = ~ B + Bdup + B2, data = dat, se_type = "HC2")) - - # HC2 or HC3 work because we can get the collinearity in the FEs for free as we have to invert - # UtU anyways (where U is cbind(X, FE_dummy_mat)) - expect_equivalent( - ro[ro$term %in% c("Z", "X"), ], - rfo[rfo$term %in% c("Z", "X"), ] - ) - - ## HC3 - ro <- tidy(lm_robust(Y ~ Z + X + factor(B) + factor(Bdup) + factor(B2), data = dat, se_type = "HC3")) - rfo <- tidy(lm_robust(Y ~ Z + X, fixed_effects = ~ B + Bdup + B2, data = dat, se_type = "HC3")) - - expect_equivalent( - ro[ro$term %in% c("Z", "X"), ], - rfo[rfo$term %in% c("Z", "X"), ] - ) - - ## CR0 - ro <- tidy(lm_robust(Y ~ Z + X + factor(B) + factor(Bdup) + factor(B2), clusters = cl, data = dat, se_type = "CR0")) - rfo <- tidy(lm_robust(Y ~ Z + X, fixed_effects = ~ B + Bdup + B2, clusters = cl, data = dat, se_type = "CR0")) - - # DoF for CR0 works, unlike HC0, because our DoF for CR0 is N_clust - 1, not N - total_rank - expect_equivalent( - ro[ro$term %in% c("Z", "X"), ], - rfo[rfo$term %in% c("Z", "X"), ] - ) - - ## CR stata - ro <- tidy(lm_robust(Y ~ Z + X + factor(B) + factor(Bdup) + factor(B2), clusters = cl, data = dat, se_type = "stata")) - rfo <- tidy(lm_robust(Y ~ Z + X, fixed_effects = ~ B + Bdup + B2, clusters = cl, data = dat, se_type = "stata")) - - expect_equivalent( - ro$estimate[ro$term %in% c("Z", "X")], - rfo$estimate[rfo$term %in% c("Z", "X")] - ) - - # SE is wrong because we count the FEs incorrectly for the finite sample correction - expect_false( - all( - ro$std.error[ro$term %in% c("Z", "X")] == - rfo$std.error[rfo$term %in% c("Z", "X")] - ) - ) - - ## CR2 - ro <- tidy(lm_robust(Y ~ Z + X + factor(B) + factor(Bdup) + factor(B2), clusters = cl, data = dat, se_type = "CR2")) - rfo <- tidy(lm_robust(Y ~ Z + X, fixed_effects = ~ B + Bdup + B2, clusters = cl, data = dat, se_type = "CR2")) - - # As with HC2, we get the count of FEs here for free - expect_equivalent( - ro[ro$term %in% c("Z", "X"), ], - rfo[rfo$term %in% c("Z", "X"), ] - ) + ## Collinear factors + for (se_type in cr_se_types) { + ro <- tidy(lm_robust(Y ~ Z + X + factor(B) + factor(Bdup) + factor(B2), clusters = cl, data = dat, se_type = se_type)) + rfo <- tidy(lm_robust(Y ~ Z + X, fixed_effects = ~ B + Bdup + B2, clusters = cl, data = dat, se_type = se_type)) + + # DoF for CR0/CR3 works, unlike HC0, because our DoF for CR0/CR3 is N_clust - 1, not N - total_rank + if (se_type %in% c("CR0", "CR2", "CR3")) { + expect_equivalent( + ro[ro$term %in% c("Z", "X"), ], + rfo[rfo$term %in% c("Z", "X"), ] + ) + } else { + expect_equivalent( + ro$estimate[ro$term %in% c("Z", "X")], + rfo$estimate[rfo$term %in% c("Z", "X")] + ) + expect_false( + all( + ro$std.error[ro$term %in% c("Z", "X")] == + rfo$std.error[rfo$term %in% c("Z", "X")] + ) + ) + } + + } }) test_that("FEs work with collinear covariates", { - ## Collinear covariates - ## Classical - ro <- tidy(lm_robust(Y ~ Z + X + Xdup + factor(B) + factor(B2), data = dat, se_type = "classical")) - rfo <- tidy(lm_robust(Y ~ Z + X + Xdup, fixed_effects = ~ B + B2, data = dat, se_type = "classical")) - - expect_equivalent( - ro[ro$term %in% c("Z", "X", "Xdup"), ], - rfo[rfo$term %in% c("Z", "X", "Xdup"), ] - ) - - ## HC0 - ro <- tidy(lm_robust(Y ~ Z + X + Xdup + factor(B) + factor(B2), data = dat, se_type = "HC0")) - rfo <- tidy(lm_robust(Y ~ Z + X + Xdup, fixed_effects = ~ B + B2, data = dat, se_type = "HC0")) - - expect_equivalent( - ro[ro$term %in% c("Z", "X", "Xdup"), ], - rfo[rfo$term %in% c("Z", "X", "Xdup"), ] - ) - - ## HC1 - ro <- tidy(lm_robust(Y ~ Z + X + Xdup + factor(B) + factor(B2), data = dat, se_type = "HC1")) - rfo <- tidy(lm_robust(Y ~ Z + X + Xdup, fixed_effects = ~ B + B2, data = dat, se_type = "HC1")) - - expect_equivalent( - ro[ro$term %in% c("Z", "X", "Xdup"), ], - rfo[rfo$term %in% c("Z", "X", "Xdup"), ] - ) - - ## HC2 - ro <- tidy(lm_robust(Y ~ Z + X + Xdup + factor(B) + factor(B2), data = dat, se_type = "HC2")) - rfo <- tidy(lm_robust(Y ~ Z + X + Xdup, fixed_effects = ~ B + B2, data = dat, se_type = "HC2")) - - expect_equivalent( - ro[ro$term %in% c("Z", "X", "Xdup"), ], - rfo[rfo$term %in% c("Z", "X", "Xdup"), ] - ) - - ## HC3 - - ro <- tidy(lm_robust(Y ~ Z + X + Xdup + factor(B) + factor(B2), data = dat, se_type = "HC3")) - rfo <- tidy(lm_robust(Y ~ Z + X + Xdup, fixed_effects = ~ B + B2, data = dat, se_type = "HC3")) - - expect_equivalent( - ro[ro$term %in% c("Z", "X", "Xdup"), ], - rfo[rfo$term %in% c("Z", "X", "Xdup"), ] - ) - - ## CR0 - ro <- tidy(lm_robust(Y ~ Z + X + Xdup + factor(B) + factor(B2), clusters = cl, data = dat, se_type = "CR0")) - rfo <- tidy(lm_robust(Y ~ Z + X + Xdup, fixed_effects = ~ B + B2, clusters = cl, data = dat, se_type = "CR0")) - - expect_equivalent( - ro[ro$term %in% c("Z", "X", "Xdup"), ], - rfo[rfo$term %in% c("Z", "X", "Xdup"), ] - ) - - ## CR stata - ro <- tidy(lm_robust(Y ~ Z + X + Xdup + factor(B) + factor(B2), clusters = cl, data = dat, se_type = "stata")) - rfo <- tidy(lm_robust(Y ~ Z + X + Xdup, fixed_effects = ~ B + B2, clusters = cl, data = dat, se_type = "stata")) + for (se_type in se_types) { + ro <- tidy(lm_robust(Y ~ Z + X + Xdup + factor(B) + factor(B2), data = dat, se_type = se_type)) + rfo <- tidy(lm_robust(Y ~ Z + X + Xdup, fixed_effects = ~ B + B2, data = dat, se_type = se_type)) - expect_equivalent( - ro[ro$term %in% c("Z", "X", "Xdup"), ], - rfo[rfo$term %in% c("Z", "X", "Xdup"), ] - ) + expect_equivalent( + ro[ro$term %in% c("Z", "X", "Xdup"), ], + rfo[rfo$term %in% c("Z", "X", "Xdup"), ] + ) + } - ## CR2 - ro <- tidy(lm_robust(Y ~ Z + X + Xdup + factor(B) + factor(B2), clusters = cl, data = dat, se_type = "CR2")) - rfo <- tidy(lm_robust(Y ~ Z + X + Xdup, fixed_effects = ~ B + B2, clusters = cl, data = dat, se_type = "CR2")) + # Clustered methods + for (se_type in cr_se_types) { + ro <- tidy(lm_robust(Y ~ Z + X + Xdup + factor(B) + factor(B2), clusters = cl, data = dat, se_type = se_type)) + rfo <- tidy(lm_robust(Y ~ Z + X + Xdup, fixed_effects = ~ B + B2, clusters = cl, data = dat, se_type = se_type)) - expect_equivalent( - ro[ro$term %in% c("Z", "X", "Xdup"), ], - rfo[rfo$term %in% c("Z", "X", "Xdup"), ] - ) + expect_equivalent( + ro[ro$term %in% c("Z", "X", "Xdup"), ], + rfo[rfo$term %in% c("Z", "X", "Xdup"), ] + ) + } }) diff --git a/tests/testthat/test-lm-robust.R b/tests/testthat/test-lm-robust.R index 8dbc6201..1741db0d 100644 --- a/tests/testthat/test-lm-robust.R +++ b/tests/testthat/test-lm-robust.R @@ -157,189 +157,111 @@ test_that("lm robust F-tests are correct", { c(caro$F[2], caro$Df[2], caro$Res.Df[2]) ) - hc0 <- lm_robust(mpg ~ hp + am, data = mtcars, se_type = "HC0") - caro <- car::linearHypothesis(hc0, c("hp = 0", "am = 0"), test = "F") - carolm <- car::linearHypothesis(lm(mpg ~ hp + am, data = mtcars), - c("hp = 0", "am = 0"), - test = "F", - white.adjust = "hc0") - expect_equivalent( - hc0$fstatistic, - c(caro$F[2], caro$Df[2], caro$Res.Df[2]) - ) - expect_equivalent( - hc0$fstatistic, - c(carolm$F[2], carolm$Df[2], carolm$Res.Df[2]) - ) - - hc1 <- lm_robust(mpg ~ hp + am, data = mtcars, se_type = "HC1") - caro <- car::linearHypothesis(hc1, c("hp = 0", "am = 0"), test = "F") - carolm <- car::linearHypothesis(lm(mpg ~ hp + am, data = mtcars), - c("hp = 0", "am = 0"), - test = "F", - white.adjust = "hc1") - expect_equivalent( - hc1$fstatistic, - c(caro$F[2], caro$Df[2], caro$Res.Df[2]) - ) - expect_equivalent( - hc1$fstatistic, - c(carolm$F[2], carolm$Df[2], carolm$Res.Df[2]) - ) - - hc1w <- lm_robust(mpg ~ hp + am, data = mtcars, weights = wt, se_type = "HC1") - caro <- car::linearHypothesis(hc1w, c("hp = 0", "am = 0"), test = "F") - expect_equivalent( - hc1w$fstatistic, - c(caro$F[2], caro$Df[2], caro$Res.Df[2]) - ) - - hc2 <- lm_robust(mpg ~ hp + am, data = mtcars, se_type = "HC2") - caro <- car::linearHypothesis(hc2, c("hp = 0", "am = 0"), test = "F") - carolm <- car::linearHypothesis(lm(mpg ~ hp + am, data = mtcars), - c("hp = 0", "am = 0"), - test = "F", - white.adjust = "hc2") - expect_equivalent( - hc2$fstatistic, - c(caro$F[2], caro$Df[2], caro$Res.Df[2]) - ) - expect_equivalent( - hc2$fstatistic, - c(carolm$F[2], carolm$Df[2], carolm$Res.Df[2]) - ) - - hc3 <- lm_robust(mpg ~ hp + am, data = mtcars, se_type = "HC3") - caro <- car::linearHypothesis(hc3, c("hp = 0", "am = 0"), test = "F") - carolm <- car::linearHypothesis(lm(mpg ~ hp + am, data = mtcars), - c("hp = 0", "am = 0"), - test = "F", - white.adjust = "hc3") - expect_equivalent( - hc3$fstatistic, - c(caro$F[2], caro$Df[2], caro$Res.Df[2]) - ) - expect_equivalent( - hc3$fstatistic, - c(carolm$F[2], carolm$Df[2], carolm$Res.Df[2]) - ) - - cr0 <- lm_robust(mpg ~ hp + am, data = mtcars, clusters = carb, se_type = "CR0") - caro <- clubSandwich::Wald_test(lm(mpg ~ hp + am, data = mtcars), - cluster = mtcars$carb, - c(FALSE, TRUE, TRUE), - vcov = "CR0", - test = "Naive-F") - - expect_equivalent( - cr0$fstatistic[c(1, 3)], - c(caro$Fstat, caro$df) - ) - - cr1s <- lm_robust(mpg ~ hp + am, data = mtcars, clusters = carb, se_type = "stata") - caro <- clubSandwich::Wald_test(lm(mpg ~ hp + am, data = mtcars), - cluster = mtcars$carb, - c(FALSE, TRUE, TRUE), - vcov = "CR1S", - test = "Naive-F") - - expect_equivalent( - cr1s$fstatistic[c(1, 3)], - c(caro$Fstat, caro$df) - ) - - cr1sw <- lm_robust(mpg ~ hp + am, data = mtcars, clusters = carb, weights = wt, se_type = "stata") - caro <- clubSandwich::Wald_test(lm(mpg ~ hp + am, weights = wt, data = mtcars), - cluster = mtcars$carb, - c(FALSE, TRUE, TRUE), - vcov = "CR1S", - test = "Naive-F") + for (se_type in setdiff(se_types, "classical")) { + lmr <- lm_robust(mpg ~ hp + am, data = mtcars, se_type = se_type) + caro <- car::linearHypothesis(lmr, c("hp = 0", "am = 0"), test = "F") + carolm <- car::linearHypothesis(lm(mpg ~ hp + am, data = mtcars), + c("hp = 0", "am = 0"), + test = "F", + white.adjust = tolower(se_type)) + expect_equivalent( + lmr$fstatistic, + c(caro$F[2], caro$Df[2], caro$Res.Df[2]) + ) + expect_equivalent( + lmr$fstatistic, + c(carolm$F[2], carolm$Df[2], carolm$Res.Df[2]) + ) - expect_equivalent( - cr1sw$fstatistic[c(1, 3)], - c(caro$Fstat, caro$df) - ) + lmrw <- lm_robust(mpg ~ hp + am, data = mtcars, weights = wt, se_type = se_type) + carow <- car::linearHypothesis(lmrw, c("hp = 0", "am = 0"), test = "F") + carolmw <- car::linearHypothesis(lm(mpg ~ hp + am, weights = wt, data = mtcars), + c("hp = 0", "am = 0"), + test = "F", + white.adjust = tolower(se_type)) + expect_equivalent( + lmrw$fstatistic, + c(carow$F[2], carow$Df[2], carow$Res.Df[2]) + ) + expect_equivalent( + lmrw$fstatistic, + c(carolmw$F[2], carolmw$Df[2], carolmw$Res.Df[2]) + ) + } - cr2 <- lm_robust(mpg ~ hp + am, data = mtcars, clusters = carb, se_type = "CR2") - caro <- clubSandwich::Wald_test(lm(mpg ~ hp + am, data = mtcars), - cluster = mtcars$carb, - c(FALSE, TRUE, TRUE), - vcov = "CR2", - test = "Naive-F") + for (se_type in cr_se_types) { - expect_equivalent( - cr2$fstatistic[c(1, 3)], - c(caro$Fstat, caro$df) - ) + lmcr <- lm_robust(mpg ~ hp + am, data = mtcars, clusters = carb, se_type = se_type) + caro <- clubSandwich::Wald_test(lm(mpg ~ hp + am, data = mtcars), + cluster = mtcars$carb, + c(FALSE, TRUE, TRUE), + vcov = ifelse(se_type == "stata", "CR1S", se_type), + test = "Naive-F") - cr2w <- lm_robust(mpg ~ hp + am, data = mtcars, clusters = carb, weights = wt, se_type = "CR2") - caro <- clubSandwich::Wald_test(lm(mpg ~ hp + am, weights = wt, data = mtcars), - cluster = mtcars$carb, - c(FALSE, TRUE, TRUE), - vcov = "CR2", - test = "Naive-F") + lmcrw <- lm_robust(mpg ~ hp + am, data = mtcars, clusters = carb, weights = wt, se_type = se_type) + carow <- clubSandwich::Wald_test(lm(mpg ~ hp + am, weights = wt, data = mtcars), + cluster = mtcars$carb, + c(FALSE, TRUE, TRUE), + vcov = ifelse(se_type == "stata", "CR1S", se_type), + test = "Naive-F") - expect_equivalent( - cr2w$fstatistic[c(1, 3)], - c(caro$Fstat, caro$df) - ) + expect_equivalent( + lmcr$fstatistic[c(1, 3)], + c(caro$Fstat, caro$df) + ) + expect_equivalent( + lmcrw$fstatistic[c(1, 3)], + c(carow$Fstat, carow$df) + ) + } }) test_that("lm robust mlm gets right fstats", { - cocyl <- lm_robust(cyl ~ hp + am, data = mtcars, se_type = "classical") - compg <- lm_robust(mpg ~ hp + am, data = mtcars, se_type = "classical") - co2 <- lm_robust(cbind(cyl, mpg) ~ hp + am, data = mtcars, se_type = "classical") - expect_equivalent( - co2$fstatistic[1:2], - c(cocyl$fstatistic[1], compg$fstatistic[1]) - ) + for (se_type in se_types) { - hc2cyl <- lm_robust(cyl ~ hp + am, data = mtcars, se_type = "HC2") - hc2mpg <- lm_robust(mpg ~ hp + am, data = mtcars, se_type = "HC2") - hc22 <- lm_robust(cbind(cyl, mpg) ~ hp + am, data = mtcars, se_type = "HC2") + lmcyl <- lm_robust(cyl ~ hp + am, data = mtcars, se_type = se_type) + lmmpg <- lm_robust(mpg ~ hp + am, data = mtcars, se_type = se_type) + lm2 <- lm_robust(cbind(cyl, mpg) ~ hp + am, data = mtcars, se_type = se_type) - expect_equivalent( - hc22$fstatistic[1:2], - c(hc2cyl$fstatistic[1], hc2mpg$fstatistic[1]) - ) + expect_equivalent( + lm2$fstatistic[1:2], + c(lmcyl$fstatistic[1], lmmpg$fstatistic[1]) + ) - cr2cyl <- lm_robust(cyl ~ hp + am, data = mtcars, cluster = carb, se_type = "CR2") - cr2mpg <- lm_robust(mpg ~ hp + am, data = mtcars, cluster = carb, se_type = "CR2") - cr22 <- lm_robust(cbind(cyl, mpg) ~ hp + am, data = mtcars, cluster = carb, se_type = "CR2") + lmwcyl <- lm_robust(cyl ~ hp + am, data = mtcars, weights = wt, se_type = se_type) + lmwmpg <- lm_robust(mpg ~ hp + am, data = mtcars, weights = wt, se_type = se_type) + lmw2 <- lm_robust(cbind(cyl, mpg) ~ hp + am, data = mtcars, weights = wt, se_type = se_type) - expect_equivalent( - cr22$fstatistic[1:2], - c(cr2cyl$fstatistic[1], cr2mpg$fstatistic[1]) - ) + expect_equivalent( + lmw2$fstatistic[1:2], + c(lmwcyl$fstatistic[1], lmwmpg$fstatistic[1]) + ) - cowcyl <- lm_robust(cyl ~ hp + am, data = mtcars, weights = wt, se_type = "classical") - cowmpg <- lm_robust(mpg ~ hp + am, data = mtcars, weights = wt, se_type = "classical") - cow2 <- lm_robust(cbind(cyl, mpg) ~ hp + am, data = mtcars, weights = wt, se_type = "classical") + } - expect_equivalent( - cow2$fstatistic[1:2], - c(cowcyl$fstatistic[1], cowmpg$fstatistic[1]) - ) + for (se_type in cr_se_types) { - hc2wcyl <- lm_robust(cyl ~ hp + am, data = mtcars, weights = wt, se_type = "HC2") - hc2wmpg <- lm_robust(mpg ~ hp + am, data = mtcars, weights = wt, se_type = "HC2") - hc2w2 <- lm_robust(cbind(cyl, mpg) ~ hp + am, data = mtcars, weights = wt, se_type = "HC2") + lmccyl <- lm_robust(cyl ~ hp + am, data = mtcars, cluster = carb, se_type = se_type) + lmcmpg <- lm_robust(mpg ~ hp + am, data = mtcars, cluster = carb, se_type = se_type) + lmc2 <- lm_robust(cbind(cyl, mpg) ~ hp + am, data = mtcars, cluster = carb, se_type = se_type) - expect_equivalent( - hc2w2$fstatistic[1:2], - c(hc2wcyl$fstatistic[1], hc2wmpg$fstatistic[1]) - ) + expect_equivalent( + lmc2$fstatistic[1:2], + c(lmccyl$fstatistic[1], lmcmpg$fstatistic[1]) + ) - cr2wcyl <- lm_robust(cyl ~ hp + am, data = mtcars, cluster = carb, weights = wt, se_type = "CR2") - cr2wmpg <- lm_robust(mpg ~ hp + am, data = mtcars, cluster = carb, weights = wt, se_type = "CR2") - cr2w2 <- lm_robust(cbind(cyl, mpg) ~ hp + am, data = mtcars, cluster = carb, weights = wt, se_type = "CR2") + lmcwcyl <- lm_robust(cyl ~ hp + am, data = mtcars, weights = wt, cluster = carb, se_type = se_type) + lmcwmpg <- lm_robust(mpg ~ hp + am, data = mtcars, weights = wt, cluster = carb, se_type = se_type) + lmcw2 <- lm_robust(cbind(cyl, mpg) ~ hp + am, data = mtcars, weights = wt, cluster = carb, se_type = se_type) - expect_equivalent( - cr2w2$fstatistic[1:2], - c(cr2wcyl$fstatistic[1], cr2wmpg$fstatistic[1]) - ) + expect_equivalent( + lmcw2$fstatistic[1:2], + c(lmcwcyl$fstatistic[1], lmcwmpg$fstatistic[1]) + ) + + } }) @@ -645,22 +567,12 @@ test_that("multiple outcomes", { vcov(lmro) ) - expect_equal( - sandwich::vcovHC(lmo, type = "HC0"), - vcov(lm_robust(cbind(mpg, hp) ~ cyl, data = mtcars, se_type = "HC0")) - ) - expect_equal( - sandwich::vcovHC(lmo, type = "HC1"), - vcov(lm_robust(cbind(mpg, hp) ~ cyl, data = mtcars, se_type = "HC1")) - ) - expect_equal( - sandwich::vcovHC(lmo, type = "HC2"), - vcov(lm_robust(cbind(mpg, hp) ~ cyl, data = mtcars, se_type = "HC2")) - ) - expect_equal( - sandwich::vcovHC(lmo, type = "HC3"), - vcov(lm_robust(cbind(mpg, hp) ~ cyl, data = mtcars, se_type = "HC3")) - ) + for (se_type in setdiff(se_types, "classical")) { + expect_equal( + sandwich::vcovHC(lmo, type = se_type), + vcov(lm_robust(cbind(mpg, hp) ~ cyl, data = mtcars, se_type = se_type)) + ) + } # with weights lmo <- lm(cbind(mpg, hp) ~ cyl, data = mtcars, weights = wt) diff --git a/tests/testthat/test-starprep.R b/tests/testthat/test-starprep.R index d10deda2..69cc08d8 100644 --- a/tests/testthat/test-starprep.R +++ b/tests/testthat/test-starprep.R @@ -25,9 +25,6 @@ test_that("starprep works", { }) -se_types <- c("classical", "HC0", "HC1", "HC2", "HC3") -cr_se_types <- c("CR0", "stata", "CR2", "CR3") - set.seed(43) N <- 20 dat <- data.frame( @@ -80,17 +77,11 @@ test_that("commarobust works with regular lm", { ## Test clustered SEs for (se_type in cr_se_types) { - # se_type <- "CR3" - # debugonce(estimatr::lm_robust_fit) - # datmiss <- datmiss[order(datmiss$cl),] + ro <- lm_robust(Y ~ Z + X + factor(B) + factor(B2), clusters = cl, data = datmiss, se_type = se_type) lo <- lm(Y ~ Z + X + factor(B) + factor(B2), data = datmiss) clo <- commarobust(lo, clusters = datmiss$cl[complete.cases(datmiss)], se_type = se_type) - # setdiff(names(ro$thing), names(clo$thing)) - # setdiff(names(clo$thing), names(ro$thing)) - # - # purrr::map(names(ro$thing), ~identical(ro$thing[[.x]], clo$thing[[.x]])) - # ro$thing$cluster == clo$thing$cluster + expect_equal( tidy(ro), tidy(clo) From 1decf234b9bd338fa5612be293e55a104b20e8a0 Mon Sep 17 00:00:00 2001 From: Luke Sonnet Date: Tue, 23 Oct 2018 14:33:21 -0700 Subject: [PATCH 4/7] Update docs for CR3 --- DESCRIPTION | 2 +- R/estimatr_iv_robust.R | 2 +- R/estimatr_lm_robust.R | 3 ++- man/difference_in_means.Rd | 4 ++-- man/extract.lm_robust.Rd | 14 +++++++------- man/horvitz_thompson.Rd | 7 ++++--- man/iv_robust.Rd | 2 +- man/lm_lin.Rd | 5 +++-- man/lm_robust.Rd | 3 ++- man/lm_robust_fit.Rd | 8 ++++---- src/lm_robust_helper.cpp | 27 +++++++-------------------- vignettes/mathematical-notes.Rmd | 21 +++++++++++++++++---- 12 files changed, 51 insertions(+), 47 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 0a84dd97..730eee94 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -20,7 +20,7 @@ Imports: Formula, rlang (>= 0.2.0) LinkingTo: Rcpp, RcppEigen -RoxygenNote: 6.0.1 +RoxygenNote: 6.1.0 LazyData: true Suggests: RcppEigen, diff --git a/R/estimatr_iv_robust.R b/R/estimatr_iv_robust.R index ed434b28..a981f537 100644 --- a/R/estimatr_iv_robust.R +++ b/R/estimatr_iv_robust.R @@ -22,7 +22,7 @@ #' @param se_type The sort of standard error sought. If `clusters` is #' not specified the options are "HC0", "HC1" (or "stata", the equivalent), #' "HC2" (default), "HC3", or -#' "classical". If `clusters` is specified the options are "CR0", "CR2" (default), or "stata". Can also specify "none", which may speed up estimation of the coefficients. +#' "classical". If `clusters` is specified the options are "CR0", "stata", "CR2" (default), or "CR3". Can also specify "none", which may speed up estimation of the coefficients. #' @param ci logical. Whether to compute and return p-values and confidence #' intervals, TRUE by default. #' @param alpha The significance level, 0.05 by default. diff --git a/R/estimatr_lm_robust.R b/R/estimatr_lm_robust.R index fbf33ac2..89fdbda2 100644 --- a/R/estimatr_lm_robust.R +++ b/R/estimatr_lm_robust.R @@ -19,7 +19,8 @@ #' @param se_type The sort of standard error sought. If `clusters` is #' not specified the options are "HC0", "HC1" (or "stata", the equivalent), #' "HC2" (default), "HC3", or -#' "classical". If `clusters` is specified the options are "CR0", "CR2" (default), or "stata". Can also specify "none", which may speed up estimation of the coefficients. +#' "classical". If `clusters` is specified the options are "CR0", "stata", +#' "CR2" (default), or "CR3". Can also specify "none", which may speed up estimation of the coefficients. #' @param ci logical. Whether to compute and return p-values and confidence #' intervals, TRUE by default. #' @param alpha The significance level, 0.05 by default. diff --git a/man/difference_in_means.Rd b/man/difference_in_means.Rd index ac15442a..ee00e279 100644 --- a/man/difference_in_means.Rd +++ b/man/difference_in_means.Rd @@ -5,8 +5,8 @@ \title{Design-based difference-in-means estimator} \usage{ difference_in_means(formula, data, blocks, clusters, weights, subset, - se_type = c("default", "none"), condition1 = NULL, condition2 = NULL, - ci = TRUE, alpha = 0.05) + se_type = c("default", "none"), condition1 = NULL, + condition2 = NULL, ci = TRUE, alpha = 0.05) } \arguments{ \item{formula}{an object of class formula, as in \code{\link{lm}}, such as diff --git a/man/extract.lm_robust.Rd b/man/extract.lm_robust.Rd index 7039f4d1..7ae788a5 100644 --- a/man/extract.lm_robust.Rd +++ b/man/extract.lm_robust.Rd @@ -6,17 +6,17 @@ \alias{extract.iv_robust} \title{Extract model data for \pkg{texreg} package} \usage{ -extract.robust_default(model, include.ci = TRUE, include.rsquared = TRUE, - include.adjrs = TRUE, include.nobs = TRUE, include.fstatistic = FALSE, - include.rmse = TRUE, ...) +extract.robust_default(model, include.ci = TRUE, + include.rsquared = TRUE, include.adjrs = TRUE, include.nobs = TRUE, + include.fstatistic = FALSE, include.rmse = TRUE, ...) extract.lm_robust(model, include.ci = TRUE, include.rsquared = TRUE, - include.adjrs = TRUE, include.nobs = TRUE, include.fstatistic = FALSE, - include.rmse = TRUE, ...) + include.adjrs = TRUE, include.nobs = TRUE, + include.fstatistic = FALSE, include.rmse = TRUE, ...) extract.iv_robust(model, include.ci = TRUE, include.rsquared = TRUE, - include.adjrs = TRUE, include.nobs = TRUE, include.fstatistic = FALSE, - include.rmse = TRUE, ...) + include.adjrs = TRUE, include.nobs = TRUE, + include.fstatistic = FALSE, include.rmse = TRUE, ...) } \arguments{ \item{model}{an object of class \code{\link{lm_robust}} or \code{"iv_robust"}} diff --git a/man/horvitz_thompson.Rd b/man/horvitz_thompson.Rd index 188b6d4e..b7324fba 100644 --- a/man/horvitz_thompson.Rd +++ b/man/horvitz_thompson.Rd @@ -5,9 +5,10 @@ \title{Horvitz-Thompson estimator for two-armed trials} \usage{ horvitz_thompson(formula, data, blocks, clusters, simple = NULL, - condition_prs, condition_pr_mat = NULL, ra_declaration = NULL, subset, - condition1 = NULL, condition2 = NULL, se_type = c("youngs", "constant", - "none"), ci = TRUE, alpha = 0.05, return_condition_pr_mat = FALSE) + condition_prs, condition_pr_mat = NULL, ra_declaration = NULL, + subset, condition1 = NULL, condition2 = NULL, se_type = c("youngs", + "constant", "none"), ci = TRUE, alpha = 0.05, + return_condition_pr_mat = FALSE) } \arguments{ \item{formula}{an object of class formula, as in \code{\link{lm}}, such as diff --git a/man/iv_robust.Rd b/man/iv_robust.Rd index dffbccff..75c8297f 100644 --- a/man/iv_robust.Rd +++ b/man/iv_robust.Rd @@ -33,7 +33,7 @@ See 'Details'.} \item{se_type}{The sort of standard error sought. If `clusters` is not specified the options are "HC0", "HC1" (or "stata", the equivalent), "HC2" (default), "HC3", or -"classical". If `clusters` is specified the options are "CR0", "CR2" (default), or "stata". Can also specify "none", which may speed up estimation of the coefficients.} +"classical". If `clusters` is specified the options are "CR0", "stata", "CR2" (default), or "CR3". Can also specify "none", which may speed up estimation of the coefficients.} \item{ci}{logical. Whether to compute and return p-values and confidence intervals, TRUE by default.} diff --git a/man/lm_lin.Rd b/man/lm_lin.Rd index 0205beaf..d30d8240 100644 --- a/man/lm_lin.Rd +++ b/man/lm_lin.Rd @@ -4,8 +4,9 @@ \alias{lm_lin} \title{Linear regression with the Lin (2013) covariate adjustment} \usage{ -lm_lin(formula, covariates, data, weights, subset, clusters, se_type = NULL, - ci = TRUE, alpha = 0.05, return_vcov = TRUE, try_cholesky = FALSE) +lm_lin(formula, covariates, data, weights, subset, clusters, + se_type = NULL, ci = TRUE, alpha = 0.05, return_vcov = TRUE, + try_cholesky = FALSE) } \arguments{ \item{formula}{an object of class formula, as in \code{\link{lm}}, such as diff --git a/man/lm_robust.Rd b/man/lm_robust.Rd index dbe23851..ad6e563c 100644 --- a/man/lm_robust.Rd +++ b/man/lm_robust.Rd @@ -31,7 +31,8 @@ See 'Details'.} \item{se_type}{The sort of standard error sought. If `clusters` is not specified the options are "HC0", "HC1" (or "stata", the equivalent), "HC2" (default), "HC3", or -"classical". If `clusters` is specified the options are "CR0", "CR2" (default), or "stata". Can also specify "none", which may speed up estimation of the coefficients.} +"classical". If `clusters` is specified the options are "CR0", "stata", +"CR2" (default), or "CR3". Can also specify "none", which may speed up estimation of the coefficients.} \item{ci}{logical. Whether to compute and return p-values and confidence intervals, TRUE by default.} diff --git a/man/lm_robust_fit.Rd b/man/lm_robust_fit.Rd index f8e21ce1..59fe97c1 100644 --- a/man/lm_robust_fit.Rd +++ b/man/lm_robust_fit.Rd @@ -4,10 +4,10 @@ \alias{lm_robust_fit} \title{Internal method that creates linear fits} \usage{ -lm_robust_fit(y, X, yoriginal = NULL, Xoriginal = NULL, weights, cluster, - fixed_effects = NULL, ci = TRUE, se_type, has_int, alpha = 0.05, - return_vcov = TRUE, return_fit = TRUE, try_cholesky = FALSE, - iv_stage = list(0)) +lm_robust_fit(y, X, yoriginal = NULL, Xoriginal = NULL, weights, + cluster, fixed_effects = NULL, ci = TRUE, se_type, has_int, + alpha = 0.05, return_vcov = TRUE, return_fit = TRUE, + try_cholesky = FALSE, iv_stage = list(0)) } \arguments{ \item{y}{numeric outcome vector} diff --git a/src/lm_robust_helper.cpp b/src/lm_robust_helper.cpp index 87864fe0..3d7dc7c2 100644 --- a/src/lm_robust_helper.cpp +++ b/src/lm_robust_helper.cpp @@ -431,32 +431,19 @@ List lm_variance(Eigen::Map& X, } else if (se_type == "CR3") { - //if (Xunweighted.isNotNull()) { - - Eigen::CompleteOrthogonalDecomposition At( - Eigen::MatrixXd::Identity(len, len) - - Xoriginal.block(start_pos, 0, len, r_fe) * - meatXtX_inv * - X.block(start_pos, 0, len, r_fe).transpose() - ); - - //} - - // Eigen::ColPivHouseholderQR At( - // Eigen::MatrixXd::Identity(len, len) - - // Xoriginal.block(start_pos, 0, len, r_fe) * - // meatXtX_inv * - // X.block(start_pos, 0, len, r_fe).transpose() - // ); + Eigen::CompleteOrthogonalDecomposition At( + Eigen::MatrixXd::Identity(len, len) - + Xoriginal.block(start_pos, 0, len, r_fe) * + meatXtX_inv * + X.block(start_pos, 0, len, r_fe).transpose() + ); + if (Xunweighted.isNotNull()) { At_WX_inv = At.solve(Eigen::MatrixXd::Identity(len, len)).transpose() * X.block(start_pos, 0, len, r_fe); } else { - // Could speed up using LLT instead of QR for unweighted case as At is symmetric in that case At_WX_inv = At.solve(X.block(start_pos, 0, len, r_fe)); } - // Rcout << "At_WX_inv: " << std::endl << At_WX_inv << std::endl; - } if (ny > 1) { diff --git a/vignettes/mathematical-notes.Rmd b/vignettes/mathematical-notes.Rmd index a21cdc18..160d3292 100644 --- a/vignettes/mathematical-notes.Rmd +++ b/vignettes/mathematical-notes.Rmd @@ -93,31 +93,43 @@ For cluster-robust inference, we provide several estimators that are essentially | `"CR0"` | $(\mathbf{X}^{\top}\mathbf{X})^{-1} \sum^S_{s=1} \left[\mathbf{X}^\top_s \mathbf{e}_s\mathbf{e}^\top_s \mathbf{X}_s \right] (\mathbf{X}^{\top}\mathbf{X})^{-1}$ | $S - 1$ | | | `"stata"` | $\frac{N-1}{N-K}\frac{S}{S-1} (\mathbf{X}^{\top}\mathbf{X})^{-1} \sum^S_{s=1} \left[\mathbf{X}^\top_s \mathbf{e}_s\mathbf{e}^\top_s \mathbf{X}_s \right] (\mathbf{X}^{\top}\mathbf{X})^{-1}$ | $S - 1$ | The Stata variance estimator is the same as the CR0 estimate but with a special finite-sample correction. | | `"CR2"` (default) | $(\mathbf{X}^{\top}\mathbf{X})^{-1} \sum^S_{s=1} \left[\mathbf{X}^\top_s \mathbf{A}_s \mathbf{e}_s\mathbf{e}^\top_s \mathbf{A}^\top_s \mathbf{X}_s \right] (\mathbf{X}^{\top}\mathbf{X})^{-1}$ | $\frac{\left(\sum^S_{i = 1} \mathbf{p}^\top_i \mathbf{p}_i \right)^2}{\sum^S_{i=1}\sum^S_{j=1} \left(\mathbf{p}^\top_i \mathbf{p}_j \right)^2}$ | These estimates of the variance and degrees of freedom come from @pustejovskytipton2016, which is an extension to certain models with a particular set of dummy variables of the method proposed by @bellmccaffrey2002. Note that the degrees of freedom can vary for each coefficient. See below for more complete notation. | +| `"CR3"` | $(\mathbf{X}^{\top}\mathbf{X})^{-1} \sum^S_{s=1} \left[\mathbf{X}^\top_s \mathbf{A}_s \mathbf{e}_s\mathbf{e}^\top_s \mathbf{A}^\top_s \mathbf{X}_s \right] (\mathbf{X}^{\top}\mathbf{X})^{-1}$ | $S - 1$ * $S$ is the number of clusters * $\mathbf{X}_s$ is the rows of $\mathbf{X}$ that belong to cluster $s$ * $I_n$ is an identity matrix of size $n\times n$ * $\mathbf{e}_s$ is the elements of the residual matrix $\mathbf{e}$ in cluster $s$, or $\mathbf{e}_s = \mathbf{y}_s - \mathbf{X}_s \widehat{\beta}$ * $\mathbf{A}_s$ and $\mathbf{p}$ are defined in the notes below -**Further notes on CR2:** The variance estimator we implement is shown in equations (4) and (5) in @pustejovskytipton2016 and equation (11), where we set $\mathbf{\Phi}$ to be $I$, following @bellmccaffrey2002. Further note that the @pustejovskytipton2016 CR2 estimator and the @bellmccaffrey2002 estimator are identical when $\mathbf{B_s}$ is full rank. It could be rank-deficient if there were dummy variables, or fixed effects, that were also your clusters. In this case, the original @bellmccaffrey2002 estimator could not be computed. You can see the simpler @bellmccaffrey2002 estimator written up plainly on page 709 of @imbenskolesar2016 along with the degrees of freedom denoted as $K_{BM}$. +**Further notes on CR2 and CR3:** The variance estimator we implement for CR2 is shown in equations (4) and (5) in @pustejovskytipton2016 and equation (11), where we set $\mathbf{\Phi}$ to be $I$, following @bellmccaffrey2002. Further note that the @pustejovskytipton2016 CR2 estimator and the @bellmccaffrey2002 estimator are identical when $\mathbf{B_s}$ is full rank. It could be rank-deficient if there were dummy variables, or fixed effects, that were also your clusters. In this case, the original @bellmccaffrey2002 estimator could not be computed. You can see the simpler @bellmccaffrey2002 estimator written up plainly on page 709 of @imbenskolesar2016 along with the degrees of freedom denoted as $K_{BM}$. In the CR2 variance calculation, we get $\mathbf{A}_s$ as follows: $$ \begin{aligned} \mathbf{H} &= \mathbf{X}(\mathbf{X}^{\top}\mathbf{X})^{-1}\mathbf{X}^\top \\\\\\ -\mathbf{B}_s &= (I_{N} - \mathbf{H})_s (I_{N} - \mathbf{H})^\top_s \\\\\\ +\mathbf{B}_s &= (\mathbf{I}_{N} - \mathbf{H})_s (I_{N} - \mathbf{H})^\top_s \\\\\\ \mathbf{A}_s &= \mathbf{B}^{+1/2}_s \end{aligned} $$ -where $\mathbf{B}^{+1/2}_s$ is the symmetric square root of the Moore–Penrose inverse of $\mathbf{B}_s$ and $(I - \mathbf{H})_s$ are the $N_s$ columns that correspond to cluster $s$. To get the corresponding degrees of freedom, note that +where $\mathbf{B}^{+1/2}_s$ is the symmetric square root of the Moore–Penrose inverse of $\mathbf{B}_s$ and $(\mathbf{I}_{N} - \mathbf{H})_s$ are the $N_s$ columns that correspond to cluster $s$. To get the corresponding degrees of freedom, note that \[ -\mathbf{p}_s = (I_N - \mathbf{H})^\top_s \mathbf{A}_s \mathbf{X}_s (\mathbf{X}^{\top}\mathbf{X})^{-1} \mathbf{z}_{k} +\mathbf{p}_s = (\mathbf{I}_N - \mathbf{H})^\top_s \mathbf{A}_s \mathbf{X}_s (\mathbf{X}^{\top}\mathbf{X})^{-1} \mathbf{z}_{k} \] where $\mathbf{z}_{k}$ is a vector of length $K$, the number of coefficients, where the $k$th element is 1 and all other elements are 0. The $k$ signifies the coefficient for which we are computing the degrees of freedom. +In the CR3 variance calculation, we get $\mathbf{A}_s$ as follows: + +$$ +\begin{aligned} +\mathbf{H}_s &= \mathbf{X}_s(\mathbf{X}^{\top}\mathbf{X})^{-1}\mathbf{X}_s^\top \\\\\\ +\mathbf{A}_s &= (\mathbf{I}_{N_s} - \mathbf{H}_s)^{-1} +\end{aligned} +$$ + +where $(\mathbf{I}_{N_s} - \mathbf{H}_s)^{-1}$ is the Moore–Penrose inverse and $\mathbf{X}_s$ are the $N_s$ rows of $\mathbf{X}$ that correspond to cluster $s$. + ### Confidence intervals and hypothesis testing If $\widehat{\mathbb{V}}_k$ is the $k$th diagonal element of $\widehat{\mathbb{V}}$, we build confidence intervals using the user specified $\alpha$ as: @@ -194,6 +206,7 @@ Because Stata does not default to using finite sample corrections and tests with | `iv_robust(y ~ x | z, clusters = clust,` `se_type = "CR0")` | `ivregress 2sls y (x = z), vce(cl clust)` | Stata uses z-tests here. | | `iv_robust(y ~ x | z, clusters = clust,` `se_type = "stata")` | `ivregress 2sls y (x = z), vce(cl clust) small` | | | `iv_robust(y ~ x | z, clusters = clust,` `se_type = "CR2")` (default) | N/A | | +| `iv_robust(y ~ x | z, clusters = clust,` `se_type = "CR3")` | N/A | | ### Confidence intervals and hypothesis testing From 17f5c6217ff2e88be6cfa683ffcdd318a0bc4c0b Mon Sep 17 00:00:00 2001 From: Luke Sonnet Date: Tue, 23 Oct 2018 21:18:29 -0700 Subject: [PATCH 5/7] nf comments --- src/lm_robust_helper.cpp | 6 +++++- tests/testthat/helper-return-cleaners.R | 7 +------ 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/src/lm_robust_helper.cpp b/src/lm_robust_helper.cpp index 3d7dc7c2..e04cc1c3 100644 --- a/src/lm_robust_helper.cpp +++ b/src/lm_robust_helper.cpp @@ -439,6 +439,10 @@ List lm_variance(Eigen::Map& X, ); if (Xunweighted.isNotNull()) { + // We require the transpose in both cases, but in the unweighted + // case `A` is symmetric and is not needed to be transposed first + // Solve against the identity matrix and then transpose in order + // to allow the more common unweighted case to be simpler At_WX_inv = At.solve(Eigen::MatrixXd::Identity(len, len)).transpose() * X.block(start_pos, 0, len, r_fe); } else { At_WX_inv = At.solve(X.block(start_pos, 0, len, r_fe)); @@ -463,7 +467,7 @@ List lm_variance(Eigen::Map& X, } else { - if (se_type == "CR2" || se_type == "CR3") { + if ((se_type == "CR2") || (se_type == "CR3")) { half_meat.row(clust_num) = ei.block(start_pos, 0, len, 1).transpose() * At_WX_inv.leftCols(r); diff --git a/tests/testthat/helper-return-cleaners.R b/tests/testthat/helper-return-cleaners.R index 5123361b..ea9c8ac0 100644 --- a/tests/testthat/helper-return-cleaners.R +++ b/tests/testthat/helper-return-cleaners.R @@ -1,10 +1,5 @@ # This fn removes calls from function returns to make testing easier -rmcall <- function(obj) { - if (!is.null(obj[["call"]])) { - obj[["call"]] <- NULL - } - return(obj) -} +rmcall <- function(obj) structure(obj, call = NULL) # This fn casts tibbles as data.frames for equivalency tests # TODO implement this everywhere From 9f368777e31a188d24303de8898b8acaf6a7ea27 Mon Sep 17 00:00:00 2001 From: Luke Sonnet Date: Tue, 23 Oct 2018 21:25:55 -0700 Subject: [PATCH 6/7] nf comments --- tests/testthat/helper-return-cleaners.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/helper-return-cleaners.R b/tests/testthat/helper-return-cleaners.R index ea9c8ac0..47084274 100644 --- a/tests/testthat/helper-return-cleaners.R +++ b/tests/testthat/helper-return-cleaners.R @@ -1,5 +1,5 @@ # This fn removes calls from function returns to make testing easier -rmcall <- function(obj) structure(obj, call = NULL) +rmcall <- function(obj) {structure(obj, call = NULL)} # This fn casts tibbles as data.frames for equivalency tests # TODO implement this everywhere From 2e60ec761631aabe761af5b90e80fa5614cb12df Mon Sep 17 00:00:00 2001 From: Luke Sonnet Date: Tue, 23 Oct 2018 21:27:46 -0700 Subject: [PATCH 7/7] nf comments --- tests/testthat/helper-return-cleaners.R | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/testthat/helper-return-cleaners.R b/tests/testthat/helper-return-cleaners.R index 47084274..5123361b 100644 --- a/tests/testthat/helper-return-cleaners.R +++ b/tests/testthat/helper-return-cleaners.R @@ -1,5 +1,10 @@ # This fn removes calls from function returns to make testing easier -rmcall <- function(obj) {structure(obj, call = NULL)} +rmcall <- function(obj) { + if (!is.null(obj[["call"]])) { + obj[["call"]] <- NULL + } + return(obj) +} # This fn casts tibbles as data.frames for equivalency tests # TODO implement this everywhere