DeclareDesign · graemeblair · Feb 28, 2025 · Mar 30, 2024 · Jan 6, 2025 · Jan 7, 2025
diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml
@@ -1,12 +1,17 @@
+# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
+# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
+#
+# NOTE: This workflow is overkill for most R packages and
+# check-standard.yaml is likely a better choice.
+# usethis::use_github_action("check-standard") will install it.
 on:
   push:
-    branches:
-      - master
+    branches: [main, master]
   pull_request:
-    branches:
-      - master
 
-name: R-CMD-check
+name: R-CMD-check.yaml
+
+permissions: read-all
 
 jobs:
   R-CMD-check:
@@ -18,72 +23,40 @@ jobs:
       fail-fast: false
       matrix:
         config:
-          - {os: macOS-latest,   r: 'devel'}
-          - {os: macOS-latest,   r: 'release'}
-          - {os: macOS-latest,   r: 'oldrel'}
-          - {os: windows-latest, r: 'devel'}
+          - {os: macos-latest,   r: 'release'}
+
           - {os: windows-latest, r: 'release'}
-          - {os: windows-latest, r: 'oldrel'}
-          - {os: windows-latest, r: '3.5'}
-          - {os: ubuntu-16.04,   r: 'devel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/xenial/latest"}
-          - {os: ubuntu-16.04,   r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/xenial/latest"}
-          - {os: ubuntu-16.04,   r: 'oldrel',  rspm: "https://packagemanager.rstudio.com/cran/__linux__/xenial/latest"}
-          - {os: ubuntu-16.04,   r: '3.5',     rspm: "https://packagemanager.rstudio.com/cran/__linux__/xenial/latest"}
-          - {os: ubuntu-16.04,   r: '3.4',     rspm: "https://packagemanager.rstudio.com/cran/__linux__/xenial/latest"}
-          - {os: ubuntu-16.04,   r: '3.3',     rspm: "https://packagemanager.rstudio.com/cran/__linux__/xenial/latest"}
+          # use 4.0 or 4.1 to check with rtools40's older compiler
+          - {os: windows-latest, r: 'oldrel-4'}
 
+          - {os: ubuntu-latest,  r: 'devel', http-user-agent: 'release'}
+          - {os: ubuntu-latest,  r: 'release'}
+          - {os: ubuntu-latest,  r: 'oldrel-1'}
+          - {os: ubuntu-latest,  r: 'oldrel-2'}
+          - {os: ubuntu-latest,  r: 'oldrel-3'}
+          - {os: ubuntu-latest,  r: 'oldrel-4'}
 
     env:
-      R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
-      RSPM: ${{ matrix.config.rspm }}
+      GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
+      R_KEEP_PKG_SOURCE: yes
 
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v4
 
-      - uses: r-lib/actions/setup-r@master
+      - uses: r-lib/actions/setup-pandoc@v2
+
+      - uses: r-lib/actions/setup-r@v2
         with:
           r-version: ${{ matrix.config.r }}
+          http-user-agent: ${{ matrix.config.http-user-agent }}
+          use-public-rspm: true
 
-      - uses: r-lib/actions/setup-pandoc@master
-
-      - name: Query dependencies
-        run: |
-          install.packages('remotes')
-          saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2)
-        shell: Rscript {0}
-
-      - name: Cache R packages
-        if: runner.os != 'Windows'
-        uses: actions/cache@v1
+      - uses: r-lib/actions/setup-r-dependencies@v2
         with:
-          path: ${{ env.R_LIBS_USER }}
-          key: ${{ runner.os }}-r-new-${{ matrix.config.r }}-1-${{ hashFiles('.github/depends.Rds') }}
-          restore-keys: ${{ runner.os }}-r-${{ matrix.config.r }}-1-
-
-      - name: Install system dependencies
-        if: runner.os == 'Linux'
-        env:
-          RHUB_PLATFORM: linux-x86_64-ubuntu-gcc
-        run: |
-          Rscript -e "remotes::install_github('r-hub/sysreqs')"
-          sysreqs=$(Rscript -e "cat(sysreqs::sysreq_commands('DESCRIPTION'))")
-          sudo -s eval "$sysreqs"
-
-      - name: Install dependencies
-        run: |
-          remotes::install_deps(dependencies = TRUE)
-          remotes::install_cran("rcmdcheck")
-        shell: Rscript {0}
-
-      - name: Check
-        env:
-          _R_CHECK_CRAN_INCOMING_: false
-        run: rcmdcheck::rcmdcheck(args = c("--no-manual", "--as-cran"), error_on = "warning", check_dir = "check")
-        shell: Rscript {0}
+          extra-packages: any::rcmdcheck
+          needs: check
 
-      - name: Upload check results
-        if: failure()
-        uses: actions/upload-artifact@master
+      - uses: r-lib/actions/check-r-package@v2
         with:
-          name: ${{ runner.os }}-r${{ matrix.config.r }}-results
-          path: check
+          upload-snapshots: true
+          build_args: 'c("--no-manual","--compact-vignettes=gs+qpdf")'
diff --git a/CRAN-SUBMISSION b/CRAN-SUBMISSION
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,15 +1,16 @@
 Package: estimatr
 Type: Package
 Title: Fast Estimators for Design-Based Inference
-Version: 1.0.2
+Version: 1.0.6
 Authors@R: c(person("Graeme", "Blair", email = "graeme.blair@gmail.com", role = c("aut", "cre")),
              person("Jasper", "Cooper", email = "jjc2247@columbia.edu", role = c("aut")),
              person("Alexander", "Coppock", email = "alex.coppock@yale.edu", role = c("aut")),
              person("Macartan", "Humphreys", email = "macartan@gmail.com", role = c("aut")),
              person("Luke", "Sonnet", email = "luke.sonnet@gmail.com", role = c("aut")),
              person("Neal", "Fultz", email = "nfultz@gmail.com", role = c("ctb")),
              person("Lily", "Medina", email = "lilymiru@gmail.com", role = c("ctb")),
-             person("Russell", "Lenth", email = "russell-lenth@uiowa.edu", role = c("ctb")))
+             person("Russell", "Lenth", email = "russell-lenth@uiowa.edu", role = c("ctb")),
+             person("Molly", "Offer-Westort", email = "mollyow@uchicago.edu", role = c("ctb")))
 Description: Fast procedures for small set of commonly-used, design-appropriate estimators with robust standard errors and confidence intervals. Includes estimators for linear regression, instrumental variables regression, difference-in-means, Horvitz-Thompson estimation, and regression improving precision of experimental estimates by interacting treatment with centered pre-treatment covariates introduced by Lin (2013) <doi:10.1214/12-AOAS583>.
 URL: https://declaredesign.org/r/estimatr/, https://github.com/DeclareDesign/estimatr
 BugReports: https://github.com/DeclareDesign/estimatr/issues

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,17 @@
+# estimatr 1.0.6 
+
+* Allows for prediction with lm_lin() when treatment is a factor and/or multi-valued.
+* Adds saved treatment_levels to the returned lm_lin model object.
+* Stops prediction for lm_lin if the treatment values in new data are not a subset of treatment_levels.
+* Standardizes model fit for lm_lin() models with no intercept.
+* Adds tests to ensure identical predictions from lm_lin() models where treatment is either numeric or factorial, and fit with/without an intercept.
+* Adds relevant examples to predict and lm_robust and lm_lin documentation.
+* Adds Molly Offer-Westort as a contributor.
+
+# estimatr 1.0.4
+
+* Test suite changes for M1 mac stay current on CRAN.
+
 # estimatr 1.0.2
 
 * Minor documentation changes to stay current on CRAN.

diff --git a/R/S3_predict.R b/R/S3_predict.R
@@ -61,6 +61,27 @@
 #' new_dat$w <- runif(n)
 #' predict(lm_out, newdata = new_dat, weights = w, interval = "prediction")
 #'
+#' # Works for 'lm_lin' models as well
+#' dat$z <- sample(1:3, size = nrow(dat), replace = TRUE)
+#' lmlin_out1 <- lm_lin(y ~ z, covariates = ~ x, data = dat)
+#' predict(lmlin_out1, newdata = dat, interval = "prediction")
+#'
+#' # Predictions from Lin models are equivalent with and without an intercept
+#' # and for multi-level treatments entered as numeric or factor variables
+#' lmlin_out2 <- lm_lin(y ~ z - 1, covariates = ~ x, data = dat)
+#' lmlin_out3 <- lm_lin(y ~ factor(z), covariates = ~ x, data = dat)
+#' lmlin_out4 <- lm_lin(y ~ factor(z) - 1, covariates = ~ x, data = dat)
+#'
+#' predict(lmlin_out2, newdata = dat, interval = "prediction")
+#' predict(lmlin_out3, newdata = dat, interval = "prediction")
+#' predict(lmlin_out4, newdata = dat, interval = "prediction")
+#'
+#' # In Lin models, predict will stop with an error message if new
+#' # treatment levels are supplied in the new data
+#' new_dat$z <- sample(0:3, size = nrow(new_dat), replace = TRUE)
+#' # predict(lmlin_out, newdata = new_dat)
+#'
+#'
 #' @export
 predict.lm_robust <- function(object,
                               newdata,
@@ -74,30 +95,6 @@ predict.lm_robust <- function(object,
 
   X <- get_X(object, newdata, na.action)
 
-  # lm_lin scaling
-  if (!is.null(object$scaled_center)) {
-    demeaned_covars <-
-      scale(
-        X[
-          ,
-          names(object$scaled_center),
-          drop = FALSE
-        ],
-        center = object$scaled_center,
-        scale = FALSE
-      )
-
-    # Interacted with treatment
-    treat_name <- attr(object$terms, "term.labels")[1]
-    interacted_covars <- X[, treat_name] * demeaned_covars
-
-    X <- cbind(
-      X[, attr(X, "assign") <= 1, drop = FALSE],
-      demeaned_covars,
-      interacted_covars
-    )
-  }
-
   # Get coefs
   coefs <- as.matrix(coef(object))
 
@@ -224,9 +221,64 @@ get_X <- function(object, newdata, na.action) {
 
   X <- model.matrix(rhs_terms, mf, contrasts.arg = object$contrasts)
 
+  # lm_lin scaling (moved down from predict.lm_robust)
+  if (!is.null(object$scaled_center)) {
+    # Covariates
+    demeaned_covars <-
+      scale(
+        X[
+          ,
+          names(object$scaled_center),
+          drop = FALSE
+        ],
+        center = object$scaled_center,
+        scale = FALSE
+      )
+
+    # Handle treatment variable reconstruction
+    treat_name <- attr(object$terms, "term.labels")[1]
+    treatment <- mf[, treat_name]
+    vals <- sort(unique(treatment))
+    old_vals <- object$treatment_levels
+
+    # Ensure treatment levels in newdata are subset of those for model fit
+    if (!all(as.character(vals) %in% as.character(old_vals))) {
+      stop(
+        "Levels of treatment variable in `newdata` must be a subset of those ",
+        "in the model fit."
+      )
+    }
+    treatment <- model.matrix(~ factor(treatment, levels = old_vals) - 1)
+
+    colnames(treatment) <- paste0(treat_name, "_", old_vals)
+    # Drop out first group if there is an intercept
+    if (attr(rhs_terms, "intercept") == 1) treatment <- treatment[, -1, drop = FALSE]
+
+    # Interactions matching original fitting logic
+    n_treat_cols <- ncol(treatment)
+    n_covars <- ncol(demeaned_covars)
+
+    interaction_matrix <- matrix(0, nrow = nrow(X), ncol = n_covars * n_treat_cols)
+
+    for (i in 1:n_covars) {
+      cols <- (i - 1) * n_treat_cols + (1:n_treat_cols)
+      interaction_matrix[, cols] <- treatment * demeaned_covars[, i]
+    }
+
+    X <- cbind(
+      if (attr(rhs_terms, "intercept") == 1) {
+        matrix(1, nrow = nrow(X), ncol = 1, dimnames = list(NULL, "(Intercept)"))
+      },
+      treatment,
+      if (attr(rhs_terms, "intercept") == 1 || ncol(treatment) == 1) demeaned_covars,
+      interaction_matrix
+    )
+  }
+
   return(X)
 }
 
+
 add_fes <- function(preds, object, newdata) {
 
   # Add factors!

diff --git a/R/estimatr_lm_lin.R b/R/estimatr_lm_lin.R
@@ -83,7 +83,7 @@
 #'   \item{weighted}{whether or not weights were applied}
 #'   \item{call}{the original function call}
 #'   \item{fitted.values}{the matrix of predicted means}
-#' We also return \code{terms} and \code{contrasts}, used by \code{predict},
+#' We also return \code{terms}, \code{contrasts}, and \code{treatment_levels}, used by \code{predict},
 #' and \code{scaled_center} (the means of each of the covariates used for centering them).
 #'
 #' @seealso \code{\link{lm_robust}}
@@ -127,21 +127,36 @@
 #'
 #' lm_lin(y ~ z_clust, covariates = ~ x, data = dat, clusters = clusterID)
 #'
-#' # Works with multi-valued treatments
+#' # Works with multi-valued treatments, whether treatment is specified as a
+#' # factor or not
 #' dat$z_multi <- sample(1:3, size = nrow(dat), replace = TRUE)
+#'
 #' lm_lin(y ~ z_multi, covariates = ~ x, data = dat)
+#' lm_lin(y ~ factor(z_multi), covariates = ~ x, data = dat)
 #'
 #' # Stratified estimator with blocks
 #' dat$blockID <- rep(1:5, each = 8)
 #' dat$z_block <- block_ra(blocks = dat$blockID)
 #'
 #' lm_lin(y ~ z_block, ~ factor(blockID), data = dat)
 #'
+#' # Fitting the model without an intercept provides estimates of mean outcomes
+#' # under each respective treatment condition
+#' lm_lin(y ~ z_multi - 1, covariates = ~ x, data = dat)
+#'
+#' # Predictions are the same in equivalent models with and without an intercept
+#' lmlin_out3 <- lm_lin(y ~ z_multi - 1, covariates = ~ x, data = dat)
+#' lmlin_out4 <- lm_lin(y ~ z_multi, covariates = ~ x, data = dat)
+#'
+#' predict(lmlin_out3, newdata = dat, se.fit = TRUE, interval = "confidence")
+#' predict(lmlin_out4, newdata = dat, se.fit = TRUE, interval = "confidence")
+#'
 #' \dontrun{
 #'   # Can also use 'margins' package if you have it installed to get
 #'   # marginal effects
 #'   library(margins)
-#'   lmlout <- lm_lin(y ~ z_block, ~ x, data = dat)
+#'   # Instruct 'margins' to treat z as a factor
+#'   lmlout <- lm_lin(y ~ factor(z_block), ~ x, data = dat)
 #'   summary(margins(lmlout))
 #'
 #'   # Can output results using 'texreg'
@@ -230,7 +245,7 @@ lm_lin <- function(formula,
   design_mat_treatment <- colnames(design_matrix)[treat_col]
 
   # Check case where treatment is not factor and is not binary
-  if (any(!(treatment %in% c(0, 1)))) {
+  if (any(!(treatment %in% c(0, 1))) | (!has_intercept&ncol(treatment) ==1) ) {
     # create dummies for non-factor treatment variable
 
     # Drop out first group if there is an intercept
@@ -313,20 +328,10 @@ lm_lin <- function(formula,
       interacted_covars
     )
   } else {
-    # If no intercept, but treatment is only one column,
-    # need to add base terms for covariates
-    if (n_treat_cols == 1) {
-      X <- cbind(
-        treatment,
-        demeaned_covars,
-        interacted_covars
-      )
-    } else {
-      X <- cbind(
-        treatment,
-        interacted_covars
-      )
-    }
+    X <- cbind(
+      treatment,
+      interacted_covars
+    )
   }
 
   # ----------
@@ -360,6 +365,12 @@ lm_lin <- function(formula,
 
   return_list[["scaled_center"]] <- center
   setNames(return_list[["scaled_center"]], original_covar_names)
+  # Store unique treatment values
+  if(attr(terms(model_data), "dataClasses")[attr(terms(model_data),"term.labels")[1]] == "factor"){
+    return_list[["treatment_levels"]] <- model_data$xlevels[[1]]
+  } else {
+    return_list[["treatment_levels"]] <- sort(unique(design_matrix[, design_mat_treatment]))
+  }
 
   return_list[["call"]] <- match.call()
 

diff --git a/cran-comments.md b/cran-comments.md
@@ -1,6 +1,6 @@
 ## Submission
 
-Small changes to update based on Kurt's 8/19/23 email and also address C++ warning in R CMD CHECK.
+Fixes M1 Mac errors.
 
 There are no changes to worse in reverse depends. 
 

diff --git a/estimatr.Rproj b/estimatr.Rproj
@@ -1,5 +1,5 @@
 Version: 1.0
-ProjectId: 533c90b7-36c9-4d31-ae3d-2adc22079fe1
+ProjectId: 2690710e-30ff-4c33-a75e-7b4e02172a9d
 
 RestoreWorkspace: Default
 SaveWorkspace: Default