From eaa839fb6c2d9d341261452dc141a2957fd1b802 Mon Sep 17 00:00:00 2001 From: Ben Schneider Date: Sat, 22 Jun 2024 15:08:37 -0400 Subject: [PATCH] Added Aronow-Samii estimator. --- R/quadratic_forms.R | 35 ++++++++++++++++++--- R/variance-estimators.R | 43 ++++++++++++++++++++++++++ man/make_quad_form_matrix.Rd | 11 +++++-- man/variance-estimators.Rd | 44 +++++++++++++++++++++++++++ tests/testthat/test-quadratic-forms.R | 30 ++++++++++++++++++ 5 files changed, 156 insertions(+), 7 deletions(-) diff --git a/R/quadratic_forms.R b/R/quadratic_forms.R index 03f1c547..d970c671 100644 --- a/R/quadratic_forms.R +++ b/R/quadratic_forms.R @@ -13,10 +13,12 @@ #' \itemize{ #' \item \strong{"Yates-Grundy"}: The Yates-Grundy variance estimator based on #' first-order and second-order inclusion probabilities. If this is used, -#' the argument \code{joint_probs} must also be used. +#' the argument \code{joint_probs} must also be used to specify the +#' joint probabilities for the sample. #' \item \strong{"Horvitz-Thompson"}: The Horvitz-Thompson variance estimator based on #' first-order and second-order inclusion probabilities. If this is used, -#' the argument \code{joint_probs} must also be used. +#' the argument \code{joint_probs} must also be used to specify the +#' joint probabilities for the sample. #' \item \strong{"Stratified Multistage SRS"}: The usual stratified multistage variance estimator #' based on estimating the variance of cluster totals within strata at each stage. #' If this option is used, then it is necessary to also use the arguments @@ -41,6 +43,10 @@ #' for variance estimation for systematic sampling. #' \item \strong{"Deville-Tille"}: The estimator of Deville and Tillé (2005), #' developed for balanced sampling using the cube method. +#' \item \strong{"Aronow-Samii"}: The estimator of Aronow and Samii (2013), +#' a conservative variance estimator for designs where some +#' pairwise inclusion probabilities are nonzero. Requires \code{joint_probs}, +#' which should contain the \emph{population} joint probabilities for this estimator. #' } #' @param probs Required if \code{variance_estimator} equals \code{"Deville-1"}, \code{"Deville-2"}, or \code{"Breidt-Chauvet"}. #' This should be a matrix or data frame of sampling probabilities. @@ -86,6 +92,7 @@ #' | Deville-Tille | Required | | Required | Optional | | | Required | #' | Yates-Grundy | | Required | | | | | | #' | Horvitz-Thompson | | Required | | | | | | +#' | Aronow-Samii | | Required | | | | | | #' @md #' @return The matrix of the quadratic form representing the variance estimator. #' @export @@ -172,11 +179,11 @@ make_quad_form_matrix <- function(variance_estimator = "Yates-Grundy", cluster_ids = NULL, strata_ids = NULL, strata_pop_sizes = NULL, - sort_order = NULL, + sort_order = NULL, aux_vars = NULL) { accepted_variance_estimators <- c( - "Yates-Grundy", "Horvitz-Thompson", + "Yates-Grundy", "Horvitz-Thompson", "Aronow-Samii", "Ultimate Cluster", "Stratified Multistage SRS", "SD1", "SD2", "Deville-1", "Deville-2", "Deville-Tille" ) @@ -190,7 +197,7 @@ make_quad_form_matrix <- function(variance_estimator = "Yates-Grundy", variance_estimator) |> stop() } - if (variance_estimator %in% c("Yates-Grundy", "Horvitz-Thompson")) { + if (variance_estimator %in% c("Yates-Grundy", "Horvitz-Thompson", "Aronow-Samii")) { if (is.null(joint_probs)) { sprintf("For `variance_estimator='%s'`, must supply a matrix to the argument `joint_probs`.", variance_estimator) |> @@ -325,6 +332,24 @@ make_quad_form_matrix <- function(variance_estimator = "Yates-Grundy", quad_form_matrix[lower.tri(quad_form_matrix)] <- t(quad_form_matrix)[lower.tri(quad_form_matrix)] } + if (variance_estimator == "Aronow-Samii") { + n <- number_of_ultimate_units + quad_form_matrix <- Matrix::Matrix(0, nrow = n, ncol = n) |> as("symmetricMatrix") + diag(quad_form_matrix) <- 1 - diag(joint_probs) + for (i in seq_len(n)) { + for (j in seq(from = i, to = n, by = 1)[-1]) { + if (joint_probs[i,j] > 0) { + quad_form_matrix[i,j] <- 1 - (joint_probs[i,i] * joint_probs[j,j])/joint_probs[i,j] + } else { + quad_form_matrix[i,j] <- 0 + quad_form_matrix[i,i] <- quad_form_matrix[i,i] + (joint_probs[i,i]) + quad_form_matrix[j,j] <- quad_form_matrix[j,j] + (joint_probs[j,j]) + } + } + } + quad_form_matrix[lower.tri(quad_form_matrix)] <- t(quad_form_matrix)[lower.tri(quad_form_matrix)] + } + if (variance_estimator == "Yates-Grundy") { n <- number_of_ultimate_units quad_form_matrix <- -(1 - as(Matrix::tcrossprod(diag(joint_probs)), "symmetricMatrix")/as(joint_probs, "symmetricMatrix")) diff --git a/R/variance-estimators.R b/R/variance-estimators.R index 05a477e4..873ac2e0 100644 --- a/R/variance-estimators.R +++ b/R/variance-estimators.R @@ -141,7 +141,50 @@ #' and \eqn{\mathbf{z}_k} denotes the vector of auxiliary variables for observation \eqn{k} #' included in sample \eqn{S}, with inclusion probability \eqn{\pi_k}. The value \eqn{c_k} is set to \eqn{\frac{n}{n-q}(1-\pi_k)}, #' where \eqn{n} is the number of observations and \eqn{q} is the number of auxiliary variables. +#' @section Aronow-Samii: +#' The Aronow-Samii variance estimator is intended for use in +#' non-measurable survey designs, where some pairs of units in the population have zero +#' pairwise inclusion probabilities (i.e., where \eqn{\pi_{ij} = 0} for some \eqn{i} and \eqn{j}). +#' An important example is when a design samples only one unit from a stratum +#' with multiple units. Unbiased variance estimation is not in general possible for such designs. +#' +#' The estimator proposed by Aronow and Samii (2013) is conservative in the sense that +#' its bias is guaranteed to be weakly greater than zero. Aronow and Samii (2013) +#' provide a detailed discussion of this estimator's properties, concluding +#' that the estimator should be effective for designs that sample one unit +#' per stratum. However, they caution against using the estimator +#' for systematic samples, as in such cases it may be too upwardly biased to be useful. +#' +#' This estimator requires the joint probabilities for the entire \emph{population}, +#' or at least for each stratum in the population. Thus, it cannot +#' be used with the functions \code{as_fays_gen_rep_design()} or \code{as_gen_boot_design()}. +#' Instead, it can only be used with \code{make_quad_form_matrix()}. +#' +#' Let \eqn{I_i} equal 1 if unit \eqn{i} is included in the sample and equal if not, +#' and let \eqn{I_j} equal 1 if unit \eqn{j} is included in the sample and equal 0 if not. +#' +#' Then the Aronow-Samii variance estimator is written as follows: +#' \deqn{ +#' v(\hat{Y}) = T_1 + T_2 +#' } +#' +#' where +#' \deqn{ +#' T_1 = \sum_{i \in U}\sum_{j \in \{U: \pi_{ij} > 0\}} I_i I_j (1 - \frac{\pi_i \pi_j}{\pi_{ij}}) \frac{y_i}{\pi_i} \frac{y_j}{\pi_j} +#' } +#' +#' and +#' +#' \deqn{ +#' T_2 = \sum_{i \in U}\sum_{j \in \{U: \pi_{ij} = 0\}} I_i \frac{y_i^2}{2 \pi_i} + I_j \frac{y_j^2}{2 \pi_j} +#' } +#' #' @references +#' +#' Aronow, P., and Samii, C. (2013). "\emph{Conservative variance estimation +#' for sampling designs with zero pairwise inclusion probabilities}." +#' \strong{Survey Methodology}, Statistics Canada, 39(1), 231-241. +#' #' Ash, S. (2014). "\emph{Using successive difference replication for estimating variances}." #' \strong{Survey Methodology}, Statistics Canada, 40(1), 47–59. #' diff --git a/man/make_quad_form_matrix.Rd b/man/make_quad_form_matrix.Rd index 118b3a07..087a9add 100644 --- a/man/make_quad_form_matrix.Rd +++ b/man/make_quad_form_matrix.Rd @@ -22,10 +22,12 @@ Options include: \itemize{ \item \strong{"Yates-Grundy"}: The Yates-Grundy variance estimator based on first-order and second-order inclusion probabilities. If this is used, -the argument \code{joint_probs} must also be used. +the argument \code{joint_probs} must also be used to specify the +joint probabilities for the sample. \item \strong{"Horvitz-Thompson"}: The Horvitz-Thompson variance estimator based on first-order and second-order inclusion probabilities. If this is used, -the argument \code{joint_probs} must also be used. +the argument \code{joint_probs} must also be used to specify the +joint probabilities for the sample. \item \strong{"Stratified Multistage SRS"}: The usual stratified multistage variance estimator based on estimating the variance of cluster totals within strata at each stage. If this option is used, then it is necessary to also use the arguments @@ -50,6 +52,10 @@ This estimator is the basis of the "successive-differences replication" estimato for variance estimation for systematic sampling. \item \strong{"Deville-Tille"}: The estimator of Deville and Tillé (2005), developed for balanced sampling using the cube method. +\item \strong{"Aronow-Samii"}: The estimator of Aronow and Samii (2013), +a conservative variance estimator for designs where some +pairwise inclusion probabilities are nonzero. Requires \code{joint_probs}, +which should contain the \emph{population} joint probabilities for this estimator. }} \item{probs}{Required if \code{variance_estimator} equals \code{"Deville-1"}, \code{"Deville-2"}, or \code{"Breidt-Chauvet"}. @@ -118,6 +124,7 @@ Below are the arguments that are required or optional for each variance estimato Deville-Tille \tab Required \tab \tab Required \tab Optional \tab \tab \tab Required \cr Yates-Grundy \tab \tab Required \tab \tab \tab \tab \tab \cr Horvitz-Thompson \tab \tab Required \tab \tab \tab \tab \tab \cr + Aronow-Samii \tab \tab Required \tab \tab \tab \tab \tab \cr } } diff --git a/man/variance-estimators.Rd b/man/variance-estimators.Rd index e9c687fb..a647b01c 100644 --- a/man/variance-estimators.Rd +++ b/man/variance-estimators.Rd @@ -172,7 +172,51 @@ included in sample \eqn{S}, with inclusion probability \eqn{\pi_k}. The value \e where \eqn{n} is the number of observations and \eqn{q} is the number of auxiliary variables. } +\section{Aronow-Samii}{ + +The Aronow-Samii variance estimator is intended for use in +non-measurable survey designs, where some pairs of units in the population have zero +pairwise inclusion probabilities (i.e., where \eqn{\pi_{ij} = 0} for some \eqn{i} and \eqn{j}). +An important example is when a design samples only one unit from a stratum +with multiple units. Unbiased variance estimation is not in general possible for such designs. + +The estimator proposed by Aronow and Samii (2013) is conservative in the sense that +its bias is guaranteed to be weakly greater than zero. Aronow and Samii (2013) +provide a detailed discussion of this estimator's properties, concluding +that the estimator should be effective for designs that sample one unit +per stratum. However, they caution against using the estimator +for systematic samples, as in such cases it may be too upwardly biased to be useful. + +This estimator requires the joint probabilities for the entire \emph{population}, +or at least for each stratum in the population. Thus, it cannot +be used with the functions \code{as_fays_gen_rep_design()} or \code{as_gen_boot_design()}. +Instead, it can only be used with \code{make_quad_form_matrix()}. + +Let \eqn{I_i} equal 1 if unit \eqn{i} is included in the sample and equal if not, +and let \eqn{I_j} equal 1 if unit \eqn{j} is included in the sample and equal 0 if not. + +Then the Aronow-Samii variance estimator is written as follows: +\deqn{ + v(\hat{Y}) = T_1 + T_2 +} + +where +\deqn{ + T_1 = \sum_{i \in U}\sum_{j \in \{U: \pi_{ij} > 0\}} I_i I_j (1 - \frac{\pi_i \pi_j}{\pi_{ij}}) \frac{y_i}{\pi_i} \frac{y_j}{\pi_j} +} + +and + +\deqn{ + T_2 = \sum_{i \in U}\sum_{j \in \{U: \pi_{ij} = 0\}} I_i \frac{y_i^2}{2 \pi_i} + I_j \frac{y_j^2}{2 \pi_j} +} +} + \references{ +Aronow, P., and Samii, C. (2013). "\emph{Conservative variance estimation +for sampling designs with zero pairwise inclusion probabilities}." +\strong{Survey Methodology}, Statistics Canada, 39(1), 231-241. + Ash, S. (2014). "\emph{Using successive difference replication for estimating variances}." \strong{Survey Methodology}, Statistics Canada, 40(1), 47–59. diff --git a/tests/testthat/test-quadratic-forms.R b/tests/testthat/test-quadratic-forms.R index cd33bf8b..b9b88566 100644 --- a/tests/testthat/test-quadratic-forms.R +++ b/tests/testthat/test-quadratic-forms.R @@ -573,6 +573,36 @@ library_stsys_sample <- library_stsys_sample |> ) }) + +# Check "Aronow-Samii" results ---- + + test_that("Correct quadratic form for Aronow-Samii", { + + joint_probs <- matrix( + c(1/2, 0, + 0 , 1/2), + nrow = 2, ncol = 2 + ) + + expect_equal( + object = make_quad_form_matrix( + "Aronow-Samii", + joint_probs = joint_probs + ) |> as.matrix(), + expected = diag(2) + ) + + expect_equal( + object = make_quad_form_matrix( + "Aronow-Samii", + joint_probs = election_jointprob + ) |> as.matrix(), + expected = make_quad_form_matrix( + "Horvitz-Thompson", + joint_probs = election_jointprob + ) |> as.matrix() + ) + }) # Ensure function checks inputs for issues ----