Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 30 additions & 5 deletions R/quadratic_forms.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,12 @@
#' \itemize{
#' \item \strong{"Yates-Grundy"}: The Yates-Grundy variance estimator based on
#' first-order and second-order inclusion probabilities. If this is used,
#' the argument \code{joint_probs} must also be used.
#' the argument \code{joint_probs} must also be used to specify the
#' joint probabilities for the sample.
#' \item \strong{"Horvitz-Thompson"}: The Horvitz-Thompson variance estimator based on
#' first-order and second-order inclusion probabilities. If this is used,
#' the argument \code{joint_probs} must also be used.
#' the argument \code{joint_probs} must also be used to specify the
#' joint probabilities for the sample.
#' \item \strong{"Stratified Multistage SRS"}: The usual stratified multistage variance estimator
#' based on estimating the variance of cluster totals within strata at each stage.
#' If this option is used, then it is necessary to also use the arguments
Expand All @@ -41,6 +43,10 @@
#' for variance estimation for systematic sampling.
#' \item \strong{"Deville-Tille"}: The estimator of Deville and Tillé (2005),
#' developed for balanced sampling using the cube method.
#' \item \strong{"Aronow-Samii"}: The estimator of Aronow and Samii (2013),
#' a conservative variance estimator for designs where some
#' pairwise inclusion probabilities are nonzero. Requires \code{joint_probs},
#' which should contain the \emph{population} joint probabilities for this estimator.
#' }
#' @param probs Required if \code{variance_estimator} equals \code{"Deville-1"}, \code{"Deville-2"}, or \code{"Breidt-Chauvet"}.
#' This should be a matrix or data frame of sampling probabilities.
Expand Down Expand Up @@ -86,6 +92,7 @@
#' | Deville-Tille | Required | | Required | Optional | | | Required |
#' | Yates-Grundy | | Required | | | | | |
#' | Horvitz-Thompson | | Required | | | | | |
#' | Aronow-Samii | | Required | | | | | |
#' @md
#' @return The matrix of the quadratic form representing the variance estimator.
#' @export
Expand Down Expand Up @@ -172,11 +179,11 @@ make_quad_form_matrix <- function(variance_estimator = "Yates-Grundy",
cluster_ids = NULL,
strata_ids = NULL,
strata_pop_sizes = NULL,
sort_order = NULL,
sort_order = NULL,
aux_vars = NULL) {

accepted_variance_estimators <- c(
"Yates-Grundy", "Horvitz-Thompson",
"Yates-Grundy", "Horvitz-Thompson", "Aronow-Samii",
"Ultimate Cluster", "Stratified Multistage SRS",
"SD1", "SD2", "Deville-1", "Deville-2", "Deville-Tille"
)
Expand All @@ -190,7 +197,7 @@ make_quad_form_matrix <- function(variance_estimator = "Yates-Grundy",
variance_estimator) |> stop()
}

if (variance_estimator %in% c("Yates-Grundy", "Horvitz-Thompson")) {
if (variance_estimator %in% c("Yates-Grundy", "Horvitz-Thompson", "Aronow-Samii")) {
if (is.null(joint_probs)) {
sprintf("For `variance_estimator='%s'`, must supply a matrix to the argument `joint_probs`.",
variance_estimator) |>
Expand Down Expand Up @@ -325,6 +332,24 @@ make_quad_form_matrix <- function(variance_estimator = "Yates-Grundy",
quad_form_matrix[lower.tri(quad_form_matrix)] <- t(quad_form_matrix)[lower.tri(quad_form_matrix)]
}

if (variance_estimator == "Aronow-Samii") {
n <- number_of_ultimate_units
quad_form_matrix <- Matrix::Matrix(0, nrow = n, ncol = n) |> as("symmetricMatrix")
diag(quad_form_matrix) <- 1 - diag(joint_probs)
for (i in seq_len(n)) {
for (j in seq(from = i, to = n, by = 1)[-1]) {
if (joint_probs[i,j] > 0) {
quad_form_matrix[i,j] <- 1 - (joint_probs[i,i] * joint_probs[j,j])/joint_probs[i,j]
} else {
quad_form_matrix[i,j] <- 0
quad_form_matrix[i,i] <- quad_form_matrix[i,i] + (joint_probs[i,i])
quad_form_matrix[j,j] <- quad_form_matrix[j,j] + (joint_probs[j,j])
}
}
}
quad_form_matrix[lower.tri(quad_form_matrix)] <- t(quad_form_matrix)[lower.tri(quad_form_matrix)]
}

if (variance_estimator == "Yates-Grundy") {
n <- number_of_ultimate_units
quad_form_matrix <- -(1 - as(Matrix::tcrossprod(diag(joint_probs)), "symmetricMatrix")/as(joint_probs, "symmetricMatrix"))
Expand Down
43 changes: 43 additions & 0 deletions R/variance-estimators.R
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,50 @@
#' and \eqn{\mathbf{z}_k} denotes the vector of auxiliary variables for observation \eqn{k}
#' included in sample \eqn{S}, with inclusion probability \eqn{\pi_k}. The value \eqn{c_k} is set to \eqn{\frac{n}{n-q}(1-\pi_k)},
#' where \eqn{n} is the number of observations and \eqn{q} is the number of auxiliary variables.
#' @section Aronow-Samii:
#' The Aronow-Samii variance estimator is intended for use in
#' non-measurable survey designs, where some pairs of units in the population have zero
#' pairwise inclusion probabilities (i.e., where \eqn{\pi_{ij} = 0} for some \eqn{i} and \eqn{j}).
#' An important example is when a design samples only one unit from a stratum
#' with multiple units. Unbiased variance estimation is not in general possible for such designs.
#'
#' The estimator proposed by Aronow and Samii (2013) is conservative in the sense that
#' its bias is guaranteed to be weakly greater than zero. Aronow and Samii (2013)
#' provide a detailed discussion of this estimator's properties, concluding
#' that the estimator should be effective for designs that sample one unit
#' per stratum. However, they caution against using the estimator
#' for systematic samples, as in such cases it may be too upwardly biased to be useful.
#'
#' This estimator requires the joint probabilities for the entire \emph{population},
#' or at least for each stratum in the population. Thus, it cannot
#' be used with the functions \code{as_fays_gen_rep_design()} or \code{as_gen_boot_design()}.
#' Instead, it can only be used with \code{make_quad_form_matrix()}.
#'
#' Let \eqn{I_i} equal 1 if unit \eqn{i} is included in the sample and equal if not,
#' and let \eqn{I_j} equal 1 if unit \eqn{j} is included in the sample and equal 0 if not.
#'
#' Then the Aronow-Samii variance estimator is written as follows:
#' \deqn{
#' v(\hat{Y}) = T_1 + T_2
#' }
#'
#' where
#' \deqn{
#' T_1 = \sum_{i \in U}\sum_{j \in \{U: \pi_{ij} > 0\}} I_i I_j (1 - \frac{\pi_i \pi_j}{\pi_{ij}}) \frac{y_i}{\pi_i} \frac{y_j}{\pi_j}
#' }
#'
#' and
#'
#' \deqn{
#' T_2 = \sum_{i \in U}\sum_{j \in \{U: \pi_{ij} = 0\}} I_i \frac{y_i^2}{2 \pi_i} + I_j \frac{y_j^2}{2 \pi_j}
#' }
#'
#' @references
#'
#' Aronow, P., and Samii, C. (2013). "\emph{Conservative variance estimation
#' for sampling designs with zero pairwise inclusion probabilities}."
#' \strong{Survey Methodology}, Statistics Canada, 39(1), 231-241.
#'
#' Ash, S. (2014). "\emph{Using successive difference replication for estimating variances}."
#' \strong{Survey Methodology}, Statistics Canada, 40(1), 47–59.
#'
Expand Down
11 changes: 9 additions & 2 deletions man/make_quad_form_matrix.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

44 changes: 44 additions & 0 deletions man/variance-estimators.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

30 changes: 30 additions & 0 deletions tests/testthat/test-quadratic-forms.R
Original file line number Diff line number Diff line change
Expand Up @@ -573,6 +573,36 @@ library_stsys_sample <- library_stsys_sample |>
)

})

# Check "Aronow-Samii" results ----

test_that("Correct quadratic form for Aronow-Samii", {

joint_probs <- matrix(
c(1/2, 0,
0 , 1/2),
nrow = 2, ncol = 2
)

expect_equal(
object = make_quad_form_matrix(
"Aronow-Samii",
joint_probs = joint_probs
) |> as.matrix(),
expected = diag(2)
)

expect_equal(
object = make_quad_form_matrix(
"Aronow-Samii",
joint_probs = election_jointprob
) |> as.matrix(),
expected = make_quad_form_matrix(
"Horvitz-Thompson",
joint_probs = election_jointprob
) |> as.matrix()
)
})

# Ensure function checks inputs for issues ----

Expand Down