bschneidr · bschneidr · Jun 22, 2024
diff --git a/R/quadratic_forms.R b/R/quadratic_forms.R
@@ -13,10 +13,12 @@
 #' \itemize{
 #'   \item \strong{"Yates-Grundy"}: The Yates-Grundy variance estimator based on
 #'     first-order and second-order inclusion probabilities. If this is used,
-#'     the argument \code{joint_probs} must also be used.
+#'     the argument \code{joint_probs} must also be used to specify the 
+#'     joint probabilities for the sample.
 #'   \item \strong{"Horvitz-Thompson"}: The Horvitz-Thompson variance estimator based on
 #'     first-order and second-order inclusion probabilities. If this is used,
-#'     the argument \code{joint_probs} must also be used.
+#'     the argument \code{joint_probs} must also be used to specify the 
+#'     joint probabilities for the sample.
 #'   \item \strong{"Stratified Multistage SRS"}: The usual stratified multistage variance estimator
 #'     based on estimating the variance of cluster totals within strata at each stage.
 #'     If this option is used, then it is necessary to also use the arguments
@@ -41,6 +43,10 @@
 #'     for variance estimation for systematic sampling.
 #'   \item \strong{"Deville-Tille"}: The estimator of Deville and Tillé (2005),
 #'     developed for balanced sampling using the cube method.
+#'   \item \strong{"Aronow-Samii"}: The estimator of Aronow and Samii (2013),
+#'     a conservative variance estimator for designs where some
+#'     pairwise inclusion probabilities are nonzero. Requires \code{joint_probs},
+#'     which should contain the \emph{population} joint probabilities for this estimator.
 #' }
 #' @param probs Required if \code{variance_estimator} equals \code{"Deville-1"}, \code{"Deville-2"}, or \code{"Breidt-Chauvet"}.
 #' This should be a matrix or data frame of sampling probabilities.
@@ -86,6 +92,7 @@
 #' | Deville-Tille            | Required   |             | Required    | Optional    |                  |            | Required |
 #' | Yates-Grundy             |            | Required    |             |             |                  |            |          |
 #' | Horvitz-Thompson         |            | Required    |             |             |                  |            |          |
+#' | Aronow-Samii             |            | Required    |             |             |                  |            |          |
 #' @md
 #' @return The matrix of the quadratic form representing the variance estimator.
 #' @export
@@ -172,11 +179,11 @@ make_quad_form_matrix <- function(variance_estimator = "Yates-Grundy",
                                   cluster_ids = NULL,
                                   strata_ids = NULL,
                                   strata_pop_sizes = NULL,
-                                  sort_order = NULL,
+                                  sort_order = NULL, 
                                   aux_vars = NULL) {
 
   accepted_variance_estimators <- c(
-    "Yates-Grundy", "Horvitz-Thompson",
+    "Yates-Grundy", "Horvitz-Thompson", "Aronow-Samii",
     "Ultimate Cluster", "Stratified Multistage SRS",
     "SD1", "SD2", "Deville-1", "Deville-2", "Deville-Tille"
   )
@@ -190,7 +197,7 @@ make_quad_form_matrix <- function(variance_estimator = "Yates-Grundy",
             variance_estimator) |> stop()
   }
 
-  if (variance_estimator %in% c("Yates-Grundy", "Horvitz-Thompson")) {
+  if (variance_estimator %in% c("Yates-Grundy", "Horvitz-Thompson", "Aronow-Samii")) {
     if (is.null(joint_probs)) {
       sprintf("For `variance_estimator='%s'`, must supply a matrix to the argument `joint_probs`.",
               variance_estimator) |>
@@ -325,6 +332,24 @@ make_quad_form_matrix <- function(variance_estimator = "Yates-Grundy",
     quad_form_matrix[lower.tri(quad_form_matrix)] <- t(quad_form_matrix)[lower.tri(quad_form_matrix)]
   }
 
+  if (variance_estimator == "Aronow-Samii") {
+    n <- number_of_ultimate_units
+    quad_form_matrix <- Matrix::Matrix(0, nrow = n, ncol = n) |> as("symmetricMatrix")
+    diag(quad_form_matrix) <- 1 - diag(joint_probs)
+    for (i in seq_len(n)) {
+      for (j in seq(from = i, to = n, by = 1)[-1]) {
+        if (joint_probs[i,j] > 0) {
+          quad_form_matrix[i,j] <- 1 - (joint_probs[i,i] * joint_probs[j,j])/joint_probs[i,j]
+        } else {
+          quad_form_matrix[i,j] <- 0
+          quad_form_matrix[i,i] <- quad_form_matrix[i,i] + (joint_probs[i,i])
+          quad_form_matrix[j,j] <- quad_form_matrix[j,j] + (joint_probs[j,j])
+        }
+      }
+    }
+    quad_form_matrix[lower.tri(quad_form_matrix)] <- t(quad_form_matrix)[lower.tri(quad_form_matrix)]
+  }
+
   if (variance_estimator == "Yates-Grundy") {
     n <- number_of_ultimate_units
     quad_form_matrix <- -(1 - as(Matrix::tcrossprod(diag(joint_probs)), "symmetricMatrix")/as(joint_probs, "symmetricMatrix"))

diff --git a/R/variance-estimators.R b/R/variance-estimators.R
@@ -141,7 +141,50 @@
 #' and \eqn{\mathbf{z}_k} denotes the vector of auxiliary variables for observation \eqn{k}
 #' included in sample \eqn{S}, with inclusion probability \eqn{\pi_k}. The value \eqn{c_k} is set to \eqn{\frac{n}{n-q}(1-\pi_k)},
 #' where \eqn{n} is the number of observations and \eqn{q} is the number of auxiliary variables.
+#' @section Aronow-Samii:
+#' The Aronow-Samii variance estimator is intended for use in 
+#' non-measurable survey designs, where some pairs of units in the population have zero
+#' pairwise inclusion probabilities (i.e., where \eqn{\pi_{ij} = 0} for some \eqn{i} and \eqn{j}).
+#' An important example is when a design samples only one unit from a stratum
+#' with multiple units. Unbiased variance estimation is not in general possible for such designs.
+#' 
+#' The estimator proposed by Aronow and Samii (2013) is conservative in the sense that
+#' its bias is guaranteed to be weakly greater than zero. Aronow and Samii (2013)
+#' provide a detailed discussion of this estimator's properties, concluding
+#' that the estimator should be effective for designs that sample one unit
+#' per stratum. However, they caution against using the estimator
+#' for systematic samples, as in such cases it may be too upwardly biased to be useful.
+#' 
+#' This estimator requires the joint probabilities for the entire \emph{population},
+#' or at least for each stratum in the population. Thus, it cannot
+#' be used with the functions \code{as_fays_gen_rep_design()} or \code{as_gen_boot_design()}.
+#' Instead, it can only be used with \code{make_quad_form_matrix()}.
+#' 
+#' Let \eqn{I_i} equal 1 if unit \eqn{i} is included in the sample and equal if not,
+#' and let \eqn{I_j} equal 1 if unit \eqn{j} is included in the sample and equal 0 if not.
+#' 
+#' Then the Aronow-Samii variance estimator is written as follows:
+#' \deqn{
+#'   v(\hat{Y}) = T_1 + T_2
+#' }
+#' 
+#' where
+#' \deqn{
+#'   T_1 = \sum_{i \in U}\sum_{j \in \{U: \pi_{ij} > 0\}} I_i I_j (1 - \frac{\pi_i \pi_j}{\pi_{ij}}) \frac{y_i}{\pi_i} \frac{y_j}{\pi_j}
+#' }
+#' 
+#' and
+#' 
+#' \deqn{
+#'   T_2 = \sum_{i \in U}\sum_{j \in \{U: \pi_{ij} = 0\}} I_i \frac{y_i^2}{2 \pi_i} + I_j \frac{y_j^2}{2 \pi_j}
+#' }
+#' 
 #' @references
+#' 
+#' Aronow, P., and Samii, C. (2013). "\emph{Conservative variance estimation 
+#' for sampling designs with zero pairwise inclusion probabilities}."
+#' \strong{Survey Methodology}, Statistics Canada, 39(1), 231-241.
+#' 
 #' Ash, S. (2014). "\emph{Using successive difference replication for estimating variances}."
 #' \strong{Survey Methodology}, Statistics Canada, 40(1), 47–59.
 #'

diff --git a/man/make_quad_form_matrix.Rd b/man/make_quad_form_matrix.Rd
diff --git a/man/variance-estimators.Rd b/man/variance-estimators.Rd
diff --git a/tests/testthat/test-quadratic-forms.R b/tests/testthat/test-quadratic-forms.R
@@ -573,6 +573,36 @@ library_stsys_sample <- library_stsys_sample |>
     )
 
   })
+
+# Check "Aronow-Samii" results ----
+
+  test_that("Correct quadratic form for Aronow-Samii", {
+
+    joint_probs <- matrix(
+      c(1/2, 0,
+        0  , 1/2),
+      nrow = 2, ncol = 2
+    )
+
+    expect_equal(
+      object = make_quad_form_matrix(
+        "Aronow-Samii", 
+        joint_probs = joint_probs
+      ) |> as.matrix(),
+      expected = diag(2)
+    )
+
+    expect_equal(
+      object = make_quad_form_matrix(
+        "Aronow-Samii",
+        joint_probs = election_jointprob
+      ) |> as.matrix(),
+      expected = make_quad_form_matrix(
+        "Horvitz-Thompson",
+        joint_probs = election_jointprob
+      ) |> as.matrix()
+    )
+  })
 
 # Ensure function checks inputs for issues ----