diff --git a/DESCRIPTION b/DESCRIPTION
index 94dc35c..35e75d3 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -55,4 +55,4 @@ LazyData: yes
 LazyDataCompression: xz
 LazyLoad: yes
 Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.3.1
+RoxygenNote: 7.3.2
diff --git a/NAMESPACE b/NAMESPACE
index da53ada..7c64803 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -7,10 +7,8 @@ export(get_census_data)
 export(predict_race)
 import(PL94171)
 importFrom(Rcpp,evalCpp)
-importFrom(dplyr,coalesce)
 importFrom(dplyr,pull)
 importFrom(furrr,future_map_dfr)
-importFrom(piggyback,pb_download)
 importFrom(purrr,map_dfr)
 importFrom(rlang,"%||%")
 importFrom(stats,rmultinom)
diff --git a/R/merge_names.R b/R/merge_names.R
index c1e3d8e..4f5d7f2 100644
--- a/R/merge_names.R
+++ b/R/merge_names.R
@@ -1,102 +1,141 @@
+# ------------------------------
+# Helper: resolve cache location
+# ------------------------------
+wru_cache_dir <- function() {
+  opt <- getOption("wru_data_wd", default = FALSE)
+  if (isTRUE(opt)) {
+    # TRUE means: use current working directory
+    return(getwd())
+  }
+  if (is.character(opt) && length(opt) == 1 && !is.na(opt) && nzchar(opt)) {
+    dir.create(path.expand(opt), recursive = TRUE, showWarnings = FALSE)
+    return(path.expand(opt))
+  }
+  # default: tempdir for this session
+  tempdir()
+}
+
+#' Preflight for name data (persistent, pinned, and verified)
+#' Downloads 4 name dictionaries to a persistent directory if missing.
+#' Set options(wru_data_wd = TRUE) to cache in getwd(); otherwise uses tempdir().
+wru_data_preflight <- function() {
+  dest <- ifelse(getOption("wru_data_wd", default = FALSE), getwd(), tempdir())
+  files_needed <- c(
+    "wru-data-first_c.rds",
+    "wru-data-mid_c.rds",
+    "wru-data-census_last_c.rds",
+    "wru-data-last_c.rds"
+  )
+  missing <- files_needed[!file.exists(file.path(dest, files_needed))]
+  if (length(missing) == 0L) return(invisible(TRUE))
+  
+  # Use the release that actually has these assets (matches CRAN code).
+  tag_to_use <- "v2.0.0"
+  
+  # Try download; piggyback ignores .token when "" (same as CRAN code).
+  tryCatch(
+    {
+      piggyback::pb_download(
+        file = missing,
+        repo = "kosukeimai/wru",
+        tag  = tag_to_use,
+        dest = dest,
+        overwrite = TRUE,
+        .token = ""
+      )
+    },
+    error = function(e) {
+      stop(
+        sprintf(
+          "wru_data_preflight() failed to fetch: %s\nReason: %s",
+          paste(missing, collapse = ", "),
+          conditionMessage(e)
+        ),
+        call. = FALSE
+      )
+    }
+  )
+  
+  # Verify they arrived; if not, guide the user.
+  still_missing <- files_needed[!file.exists(file.path(dest, files_needed))]
+  if (length(still_missing) > 0L) {
+    stop(
+      paste0(
+        "wru_data_preflight(): some files are still missing after download: ",
+        paste(still_missing, collapse = ", "),
+        "\nCheck network/GitHub access or download these assets from the ",
+        "wru v2.0.0 release and place them in: ", normalizePath(dest),
+        "\nThen re-run."
+      ),
+      call. = FALSE
+    )
+  }
+  invisible(TRUE)
+}
+
 #' Surname probability merging function.
 #'
 #' \code{merge_names} merges names in a user-input dataset with corresponding
-#'  race/ethnicity probabilities derived from both the U.S. Census Surname List
-#'  and Spanish Surname List and voter files from states in the Southern U.S.
-#'
-#' This function allows users to match names in their dataset with database entries
-#'  estimating P(name | ethnicity) for each of the five major racial groups for each
-#'  name. The database probabilities are derived from both the U.S. Census Surname List
-#'  and Spanish Surname List and voter files from states in the Southern U.S.
-#'
-#'  By default, the function matches names as follows:
-#'  1) Search raw surnames in the database;
-#'  2) Remove any punctuation and search again;
-#'  3) Remove any spaces and search again;
-#'  4) Remove suffixes (e.g., "Jr") and search again (last names only)
-#'  5) Split double-barreled names into two parts and search first part of name;
-#'  6) Split double-barreled names into two parts and search second part of name;
-#'
-#'  Each step only applies to names not matched in a previous step.
-#'  Steps 2 through 6 are not applied if \code{clean.surname} is FALSE.
-#'
-#'  Note: Any name appearing only on the Spanish Surname List is assigned a
-#'  probability of 1 for Hispanics/Latinos and 0 for all other racial groups.
+#' race/ethnicity probabilities derived from both the U.S. Census Surname List
+#' and Spanish Surname List and voter files from states in the Southern U.S.
 #'
-#' @param voter.file An object of class \code{data.frame}.  Must contain a row for each individual being predicted,
-#' as well as a field named \code{\var{last}} containing each individual's surname.
-#' If first name is also being used for prediction, the file must also contain a field
-#' named \code{\var{first}}. If middle name is also being used for prediction, the field
-#' must also contain a field named \code{\var{middle}}.
-#' @param namesToUse A character vector identifying which names to use for the prediction.
-#' The default value is \code{"last"}, indicating that only the last name will be used.
-#' Other options are \code{"last, first"}, indicating that both last and first names will be
-#' used, and \code{"last, first, middle"}, indicating that last, first, and middle names will all
-#' be used.
-#' @param census.surname A \code{TRUE}/\code{FALSE} object. If \code{TRUE},
-#'  function will call \code{merge_surnames} to merge in Pr(Race | Surname)
-#'  from U.S. Census Surname List (2000, 2010, or 2020) and Spanish Surname List.
-#'  If \code{FALSE}, user must provide a \code{name.dictionary} (see below).
-#'  Default is \code{TRUE}.
-#' @param table.surnames An object of class \code{data.frame} provided by the
-#' users as an alternative surname dictionary. It will consist of a list of
-#' U.S. surnames, along with the associated probabilities P(name | ethnicity)
-#' for ethnicities: white, Black, Hispanic, Asian, and other. Default is \code{NULL}.
-#' (\code{\var{last_name}} for U.S. surnames, \code{\var{p_whi_last}} for White,
-#' \code{\var{p_bla_last}} for Black, \code{\var{p_his_last}} for Hispanic,
-#' \code{\var{p_asi_last}} for Asian, \code{\var{p_oth_last}} for other).
-#' @param table.first See \code{\var{table.surnames}}.
-#' @param table.middle See \code{\var{table.surnames}}.
-#' @param impute.missing See \code{predict_race}.
-#' @param model See \code{predict_race}.
-#' @param clean.names A \code{TRUE}/\code{FALSE} object. If \code{TRUE},
-#' any surnames in \code{\var{voter.file}} that cannot initially be matched
-#' to the database will be cleaned, according to U.S. Census specifications,
-#' in order to increase the chance of finding a match. Default is \code{TRUE}.
-#' @return Output will be an object of class \code{data.frame}. It will
-#'  consist of the original user-input data with additional columns that
-#'  specify the part of the name matched with Census data (\code{\var{surname.match}}),
-#'  and the probabilities Pr(Race | Surname) for each racial group
-#'  (\code{\var{p_whi}} for White, \code{\var{p_bla}} for Black,
-#'  \code{\var{p_his}} for Hispanic/Latino,
-#'  \code{\var{p_asi}} for Asian and Pacific Islander, and
-#'  \code{\var{p_oth}} for Other/Mixed).
-#' @importFrom dplyr coalesce
-#' @examples
-#' data(voters)
-#' \dontrun{try(merge_names(voters, namesToUse = "surname", census.surname = TRUE))}
+#' @param voter.file data.frame with at least a \code{surname} column; \code{first} and \code{middle} if used
+#' @param namesToUse "surname", "surname, first", or "surname, first, middle"
+#' @param census.surname logical; if TRUE, use Census/Spanish surname list
+#' @param table.surnames Optional custom last-name dictionary (schema like shipped RDS)
+#' @param table.first   Optional custom first-name dictionary
+#' @param table.middle  Optional custom middle-name dictionary
+#' @param clean.names   logical; try cleaning/fallback passes
+#' @param impute.missing logical; impute c_* NA with column means (else 1)
+#' @param model         kept for API parity
+#' @param return.unmatched logical; add *_matched boolean columns
+#' @return data.frame with probabilities (and flags if requested)
 #' @keywords internal
-merge_names <- function(voter.file, namesToUse, census.surname, table.surnames = NULL, table.first = NULL, table.middle = NULL, clean.names = TRUE, impute.missing = FALSE, model = "BISG") {
-
-  # check the names
+merge_names <- function(
+    voter.file,
+    namesToUse,
+    census.surname,
+    table.surnames = NULL,
+    table.first = NULL,
+    table.middle = NULL,
+    clean.names = TRUE,
+    return.unmatched = TRUE,
+    impute.missing = FALSE,
+    model = "BISG"
+) {
+  # --- arg checks ------------------------------------------------------------
   if (namesToUse == "surname") {
     if (!("surname" %in% names(voter.file))) {
       stop("Voter data frame needs to have a column named 'surname'.")
     }
   } else if (namesToUse == "surname, first") {
-    if (!("surname" %in% names(voter.file)) || !("first" %in% names(voter.file))) {
-      stop("Voter data frame needs to have a column named 'surname' and a column called 'first'.")
+    if (!all(c("surname","first") %in% names(voter.file))) {
+      stop("Voter data frame needs 'surname' and 'first'.")
     }
   } else if (namesToUse == "surname, first, middle") {
-    if (!("surname" %in% names(voter.file)) || !("first" %in% names(voter.file)) ||
-      !("middle" %in% names(voter.file))) {
-      stop("Voter data frame needs to have a column named 'surname', a column called 'first', and a column called 'middle'.")
+    if (!all(c("surname","first","middle") %in% names(voter.file))) {
+      stop("Voter data frame needs 'surname', 'first', and 'middle'.")
     }
   }
-
+  
+  # --- ensure data present (downloads only if missing) -----------------------
   wru_data_preflight()
-
-  path <- ifelse(getOption("wru_data_wd", default = FALSE), getwd(), tempdir())
-
-  first_c <- readRDS(paste0(path, "/wru-data-first_c.rds"))
-  mid_c <- readRDS(paste0(path, "/wru-data-mid_c.rds"))
-  if(census.surname){
-    last_c <- readRDS(paste0(path, "/wru-data-census_last_c.rds"))
-  } else {
-    last_c <- readRDS(paste0(path, "/wru-data-last_c.rds"))
+  
+  path   <- wru_cache_dir()
+  first_c <- readRDS(file.path(path, "wru-data-first_c.rds"))
+  mid_c   <- readRDS(file.path(path, "wru-data-mid_c.rds"))
+  last_c  <- readRDS(file.path(
+    path,
+    if (isTRUE(census.surname)) "wru-data-census_last_c.rds" else "wru-data-last_c.rds"
+  ))
+  
+  if (!is.data.frame(first_c) || !is.data.frame(mid_c) || !is.data.frame(last_c)) {
+    stop("wru cached name dictionaries are not data.frames; clear cache and re-run wru_data_preflight().")
   }
   
   p_eth <- c("c_whi", "c_bla", "c_his", "c_asi", "c_oth")
+  
+  # optional custom dictionaries; align names to shipped schema
   if (is.null(table.surnames)) {
     lastNameDict <- last_c
   } else {
@@ -108,26 +147,22 @@ merge_names <- function(voter.file, namesToUse, census.surname, table.surnames =
     firstNameDict <- first_c
   } else {
     firstNameDict <- table.first
-    firstNameDict[is.na(firstNameDict)] <- 0
     names(firstNameDict) <- names(first_c)
+    firstNameDict[is.na(firstNameDict)] <- 0
   }
   if (is.null(table.middle)) {
     middleNameDict <- mid_c
   } else {
     middleNameDict <- table.middle
-    middleNameDict[is.na(middleNameDict)] <- 0
     names(middleNameDict) <- names(mid_c)
+    middleNameDict[is.na(middleNameDict)] <- 0
   }
-
-  nameDict <- list(
-    "first" = firstNameDict,
-    "middle" = middleNameDict,
-    "last" = lastNameDict
-  )
-
-  ## Convert names in voter file to upper case
+  
+  nameDict <- list(first = firstNameDict, middle = middleNameDict, last = lastNameDict)
+  
+  # --- initial normalization & merges ---------------------------------------
   df <- voter.file
-
+  
   df$lastname.match <- df$lastname.upper <- toupper(as.character(df$surname))
   if (grepl("first", namesToUse)) {
     df$firstname.match <- df$firstname.upper <- toupper(as.character(df$first))
@@ -136,8 +171,8 @@ merge_names <- function(voter.file, namesToUse, census.surname, table.surnames =
     df$middlename.match <- df$middlename.upper <- toupper(as.character(df$middle))
     df$middlename.match[is.na(df$middlename.match)] <- ""
   }
-
-  ## Merge Surnames with Census List (No Cleaning Yet)
+  
+  # first pass merges (raw)
   df <- merge(df, lastNameDict, by.x = "lastname.match", by.y = "last_name", all.x = TRUE, sort = FALSE)
   if (grepl("first", namesToUse)) {
     df <- merge(df, firstNameDict, by.x = "firstname.match", by.y = "first_name", all.x = TRUE, sort = FALSE)
@@ -145,201 +180,255 @@ merge_names <- function(voter.file, namesToUse, census.surname, table.surnames =
   if (grepl("middle", namesToUse)) {
     df <- merge(df, middleNameDict, by.x = "middlename.match", by.y = "middle_name", all.x = TRUE, sort = FALSE)
   }
-
-  if (namesToUse == "surname" && sum(!(df$lastname.upper %in% lastNameDict$last_name)) == 0) {
-    return(df[, c(names(voter.file), "lastname.match", paste0(p_eth, "_last"))])
+  
+  # compute flags now so early-returns also include them
+  if (isTRUE(return.unmatched)) {
+    if ("c_whi_last"   %in% names(df)) df$last_matched   <- !is.na(df$c_whi_last)
+    if ("c_whi_first"  %in% names(df)) df$first_matched  <- !is.na(df$c_whi_first)
+    if ("c_whi_middle" %in% names(df)) df$middle_matched <- !is.na(df$c_whi_middle)
   }
-  if (namesToUse == "surname, first" && sum(!(df$lastname.match %in% lastNameDict$last_name)) == 0 &&
-    sum(!(df$firstname.upper %in% firstNameDict$first_name)) == 0) {
-    return(df[, c(names(voter.file), "lastname.match", "firstname.match", paste0(p_eth, "_last"), paste0(p_eth, "_first"))])
+  
+  # unified early-return helper
+  fast_return <- function() {
+    prob_cols <- c(paste0(p_eth, "_last"))
+    name_cols <- c("lastname.match")
+    flag_cols <- character(0)
+    if (isTRUE(return.unmatched) && "last_matched" %in% names(df)) flag_cols <- c(flag_cols, "last_matched")
+    if (grepl("first", namesToUse)) {
+      prob_cols <- c(prob_cols, paste0(p_eth, "_first"))
+      name_cols <- c(name_cols, "firstname.match")
+      if (isTRUE(return.unmatched) && "first_matched" %in% names(df)) flag_cols <- c(flag_cols, "first_matched")
+    }
+    if (grepl("middle", namesToUse)) {
+      prob_cols <- c(prob_cols, paste0(p_eth, "_middle"))
+      name_cols <- c(name_cols, "middlename.match")
+      if (isTRUE(return.unmatched) && "middle_matched" %in% names(df)) flag_cols <- c(flag_cols, "middle_matched")
+    }
+    cols <- c(names(voter.file), name_cols, flag_cols, prob_cols)
+    df[, cols, drop = FALSE]
   }
-  if (namesToUse == "surname, first, middle" && sum(!(df$lastname.match %in% lastNameDict$last_name)) == 0 &&
-    sum(!(df$firstname.upper %in% firstNameDict$first_name)) == 0 && sum(!(df$middlename.upper %in% middleNameDict$middle_name)) == 0) {
-    return(df[, c(names(voter.file), "lastname.match", "firstname.match", "middlename.match", paste0(p_eth, "_last"), paste0(p_eth, "_first"), paste0(p_eth, "_middle"))])
+  
+  if (namesToUse == "surname" &&
+      sum(!(df$lastname.upper %in% lastNameDict$last_name)) == 0) {
+    return(fast_return())
   }
-
-  ## Clean names (if specified by user)
+  if (namesToUse == "surname, first" &&
+      sum(!(df$lastname.match   %in% lastNameDict$last_name))  == 0 &&
+      sum(!(df$firstname.upper %in% firstNameDict$first_name)) == 0) {
+    return(fast_return())
+  }
+  if (namesToUse == "surname, first, middle" &&
+      sum(!(df$lastname.match    %in% lastNameDict$last_name))   == 0 &&
+      sum(!(df$firstname.upper   %in% firstNameDict$first_name))  == 0 &&
+      sum(!(df$middlename.upper  %in% middleNameDict$middle_name))== 0) {
+    return(fast_return())
+  }
+  
+  # --- cleaning / fallback passes (dimension-safe) --------------------------
   if (clean.names) {
-    for (nameType in strsplit(namesToUse, ", ")[[1]]) {
-      if(nameType=="surname"){
-        nameType <- "last"
-      }
-      df1 <- df[!is.na(df[, paste("c_whi_", nameType, sep = "")]), ] # Matched names
-      df2 <- df[is.na(df[, paste("c_whi_", nameType, sep = "")]), ] # Unmatched names
-
-      ## Remove All Punctuation and Try Merge Again
+    for (nameType0 in strsplit(namesToUse, ", ")[[1]]) {
+      nameType <- if (nameType0 == "surname") "last" else nameType0
+      
+      col_prob  <- paste0("c_whi_", nameType)
+      col_match <- paste0(nameType, "name.match")
+      col_upper <- paste0(nameType, "name.upper")
+      
+      df1 <- df[!is.na(df[, col_prob]), , drop = FALSE]
+      df2 <- df[ is.na(df[, col_prob]), , drop = FALSE]
+      
+      ## 1) strip punctuation
       if (nrow(df2) > 0) {
-        df2[, paste(nameType, "name.match", sep = "")] <- gsub("[^[:alnum:] ]", "", df2[, paste(nameType, "name.upper", sep = "")])
-
-        df2 <- merge(df2[, !grepl(paste("_", nameType, sep = ""), names(df2))], nameDict[[nameType]],
+        df2[, col_match] <- gsub("[^[:alnum:] ]", "", df2[, col_upper])
+        df2 <- merge(
+          df2[, !grepl(paste0("_", nameType), names(df2))],
+          nameDict[[nameType]],
           all.x = TRUE,
-          by.x = paste(nameType, "name.match", sep = ""), by.y = paste(nameType, "name", sep = "_"),
+          by.x = col_match, by.y = paste0(nameType, "_name"),
           sort = FALSE
         )
-        df2 <- df2[, names(df1)] # reorder the columns
-
-        if (sum(!is.na(df2[, paste("c_whi_", nameType, sep = ""), ])) > 0) {
-          df1 <- rbind(df1, df2[!is.na(df2[, paste("c_whi_", nameType, sep = ""), ]), ])
-          df2 <- df2[is.na(df2[, paste("c_whi_", nameType, sep = "")]), ]
+        df2 <- df2[, names(df1), drop = FALSE]
+        
+        keep <- !is.na(df2[, col_prob])
+        if (any(keep)) {
+          df1 <- rbind(df1, df2[keep, , drop = FALSE])
+          df2 <- df2[!keep, , drop = FALSE]
         }
       }
-
-      ## Remove All Spaces and Try Merge Again
+      
+      ## 2) strip spaces
       if (nrow(df2) > 0) {
-        df2[, paste(nameType, "name.match", sep = "")] <- gsub(" ", "", df2[, paste(nameType, "name.match", sep = "")])
-        df2 <- merge(df2[, !grepl(paste("_", nameType, sep = ""), names(df2))], nameDict[[nameType]],
+        df2[, col_match] <- gsub(" ", "", df2[, col_match])
+        df2 <- merge(
+          df2[, !grepl(paste0("_", nameType), names(df2))],
+          nameDict[[nameType]],
           all.x = TRUE,
-          by.x = paste(nameType, "name.match", sep = ""), by.y = paste(nameType, "name", sep = "_"),
+          by.x = col_match, by.y = paste0(nameType, "_name"),
           sort = FALSE
         )
-        df2 <- df2[, names(df1)] # reorder the columns
-
-        if (sum(!is.na(df2[, paste("c_whi_", nameType, sep = ""), ])) > 0) {
-          df1 <- rbind(df1, df2[!is.na(df2[, paste("c_whi_", nameType, sep = ""), ]), ])
-          df2 <- df2[is.na(df2[, paste("c_whi_", nameType, sep = "")]), ]
+        df2 <- df2[, names(df1), drop = FALSE]
+        
+        keep <- !is.na(df2[, col_prob])
+        if (any(keep)) {
+          df1 <- rbind(df1, df2[keep, , drop = FALSE])
+          df2 <- df2[!keep, , drop = FALSE]
         }
       }
-
-      # Edits specific to common issues with last names
-      if (nameType == "last" & nrow(df2) > 0) {
-
-        ## Remove Jr/Sr/III Suffixes for last names
-        suffix <- c("JUNIOR", "SENIOR", "THIRD", "III", "JR", " II", " J R", " S R", " IV")
-        for (i in 1:length(suffix)) {
-          df2$lastname.match <- ifelse(substr(df2$lastname.match, nchar(df2$lastname.match) - (nchar(suffix)[i] - 1), nchar(df2$lastname.match)) == suffix[i],
-            substr(df2$lastname.match, 1, nchar(df2$lastname.match) - nchar(suffix)[i]),
+      
+      ## 3) last-name suffix cleanup
+      if (nameType == "last" && nrow(df2) > 0) {
+        suffix <- c("JUNIOR","SENIOR","THIRD","III","JR"," II"," J R"," S R"," IV")
+        for (suf in suffix) {
+          df2$lastname.match <- ifelse(
+            substr(df2$lastname.match,
+                   nchar(df2$lastname.match) - (nchar(suf) - 1),
+                   nchar(df2$lastname.match)) == suf,
+            substr(df2$lastname.match, 1, nchar(df2$lastname.match) - nchar(suf)),
             df2$lastname.match
           )
         }
-        df2$lastname.match <- ifelse(nchar(df2$lastname.match) >= 7,
-          ifelse(substr(df2$lastname.match, nchar(df2$lastname.match) - 1, nchar(df2$lastname.match)) == "SR",
-            substr(df2$lastname.match, 1, nchar(df2$lastname.match) - 2),
-            df2$lastname.match
-          ),
+        df2$lastname.match <- ifelse(
+          nchar(df2$lastname.match) >= 7 &
+            substr(df2$lastname.match, nchar(df2$lastname.match) - 1, nchar(df2$lastname.match)) == "SR",
+          substr(df2$lastname.match, 1, nchar(df2$lastname.match) - 2),
           df2$lastname.match
-        ) # Remove "SR" only if name has at least 7 characters
-
+        )
+        
         df2 <- merge(
-          df2[, !grepl(paste("_", nameType, sep = ""), names(df2))], 
-          lastNameDict, by.x = "lastname.match", by.y = "last_name", 
-          all.x = TRUE, sort = FALSE)
-        df2 <- df2[, names(df1)] # reorder the columns
-
-        if (sum(!is.na(df2[, paste("c_whi_", nameType, sep = ""), ])) > 0) {
-          df1 <- rbind(df1, df2[!is.na(df2[, paste("c_whi_", nameType, sep = ""), ]), ])
-          df2 <- df2[is.na(df2[, paste("c_whi_", nameType, sep = "")]), ]
+          df2[, !grepl(paste0("_", nameType), names(df2))],
+          lastNameDict,
+          by.x = "lastname.match", by.y = "last_name",
+          all.x = TRUE, sort = FALSE
+        )
+        df2 <- df2[, names(df1), drop = FALSE]
+        
+        keep <- !is.na(df2[, col_prob])
+        if (any(keep)) {
+          df1 <- rbind(df1, df2[keep, , drop = FALSE])
+          df2 <- df2[!keep, , drop = FALSE]
         }
       }
-
-
-      ## Names with Hyphens or Spaces, e.g. Double-Barreled Names
+      
+      ## 4) double-barreled (first half then second half)
       if (nrow(df2) > 0) {
         df2$name2 <- df2$name1 <- NA
-        df2$name1[grep("-", df2[, paste(nameType, "name.upper", sep = "")])] <- sapply(strsplit(grep("-", df2[, paste(nameType, "name.upper", sep = "")], value = T), "-"), "[", 1)
-        df2$name2[grep("-", df2[, paste(nameType, "name.upper", sep = "")])] <- sapply(strsplit(grep("-", df2[, paste(nameType, "name.upper", sep = "")], value = T), "-"), "[", 2)
-        df2$name1[grep(" ", df2[, paste(nameType, "name.upper", sep = "")])] <- sapply(strsplit(grep(" ", df2[, paste(nameType, "name.upper", sep = "")], value = T), " "), "[", 1)
-        df2$name2[grep(" ", df2[, paste(nameType, "name.upper", sep = "")])] <- sapply(strsplit(grep(" ", df2[, paste(nameType, "name.upper", sep = "")], value = T), " "), "[", 2)
-
-        ## Use first half of name to merge in priors
-        df2[, paste(nameType, "name.match", sep = "")] <- as.character(df2$name1)
-        df2 <- merge(df2[, !grepl(paste("_", nameType, sep = ""), names(df2))], nameDict[[nameType]],
+        
+        # hyphen split
+        idx <- grep("-", df2[, col_upper])
+        if (length(idx)) {
+          parts <- strsplit(df2[idx, col_upper], "-")
+          df2$name1[idx] <- vapply(parts, `[`, character(1), 1)
+          df2$name2[idx] <- vapply(parts, `[`, character(1), 2)
+        }
+        # space split (fill only NAs left by hyphen step)
+        idx <- grep(" ", df2[, col_upper])
+        if (length(idx)) {
+          parts <- strsplit(df2[idx, col_upper], " ")
+          fill1 <- vapply(parts, `[`, character(1), 1)
+          fill2 <- vapply(parts, `[`, character(1), 2)
+          df2$name1[idx] <- ifelse(is.na(df2$name1[idx]), fill1, df2$name1[idx])
+          df2$name2[idx] <- ifelse(is.na(df2$name2[idx]), fill2, df2$name2[idx])
+        }
+        
+        # first half
+        df2[, col_match] <- as.character(df2$name1)
+        df2 <- merge(
+          df2[, !grepl(paste0("_", nameType), names(df2))],
+          nameDict[[nameType]],
           all.x = TRUE,
-          by.x = paste(nameType, "name.match", sep = ""), by.y = paste(nameType, "name", sep = "_"),
+          by.x = col_match, by.y = paste0(nameType, "_name"),
           sort = FALSE
         )
-        df2 <- df2[, c(names(df1), "name1", "name2")] # reorder the columns
-
-        if (sum(!is.na(df2[, paste("c_whi_", nameType, sep = ""), ])) > 0) {
-          df1 <- rbind(df1, df2[!is.na(df2[, paste("c_whi_", nameType, sep = "")]), !(names(df2) %in% c("name1", "name2"))])
-          df2 <- df2[is.na(df2[, paste("c_whi_", nameType, sep = "")]), ]
+        df2 <- df2[, c(names(df1), "name1", "name2"), drop = FALSE]
+        
+        keep <- !is.na(df2[, col_prob])
+        if (any(keep)) {
+          df1 <- rbind(df1, df2[keep, !(names(df2) %in% c("name1","name2")), drop = FALSE])
+          df2 <- df2[!keep, , drop = FALSE]
         }
       }
-
-      ## Use second half of name to merge in priors for rest
+      
       if (nrow(df2) > 0) {
-        df2[, paste(nameType, "name.match", sep = "")] <- as.character(df2$name2)
-        df2 <- merge(df2[, !grepl(paste("_", nameType, sep = ""), names(df2))], nameDict[[nameType]],
+        # second half
+        df2[, col_match] <- as.character(df2$name2)
+        df2 <- merge(
+          df2[, !grepl(paste0("_", nameType), names(df2))],
+          nameDict[[nameType]],
           all.x = TRUE,
-          by.x = paste(nameType, "name.match", sep = ""), by.y = paste(nameType, "name", sep = "_"),
+          by.x = col_match, by.y = paste0(nameType, "_name"),
           sort = FALSE
         )
-        df2 <- df2[, c(names(df1), "name1", "name2")] # reorder the columns
-
-        if (sum(!is.na(df2[, paste("c_whi_", nameType, sep = ""), ])) > 0) {
-          df1 <- rbind(df1, df2[!is.na(df2[, paste("c_whi_", nameType, sep = "")]), !(names(df2) %in% c("name1", "name2"))])
-          df2 <- df2[is.na(df2[, paste("c_whi_", nameType, sep = "")]), ]
+        df2 <- df2[, c(names(df1), "name1", "name2"), drop = FALSE]
+        
+        keep <- !is.na(df2[, col_prob])
+        if (any(keep)) {
+          df1 <- rbind(df1, df2[keep, !(names(df2) %in% c("name1","name2")), drop = FALSE])
+          df2 <- df2[!keep, , drop = FALSE]
         }
       }
-
+      
+      # stitch back
       if (nrow(df2) > 0) {
-        df <- rbind(df1, df2[, !(names(df2) %in% c("name1", "name2"))])
+        df <- rbind(df1, df2[, !(names(df2) %in% c("name1","name2")), drop = FALSE])
       } else {
         df <- df1
       }
     }
   }
-
-
-  ## For unmatched names, just fill with an column mean if impute is true, or with constant if false
+  
+  # --- reporting -------------------------------------------------------------
   c_miss_last <- mean(is.na(df$c_whi_last))
   if (c_miss_last > 0) {
-    message(paste(paste(sum(is.na(df$c_whi_last)), " (", round(100 * mean(is.na(df$c_whi_last)), 1), "%) individuals' last names were not matched.", sep = "")))
+    message(paste0(sum(is.na(df$c_whi_last)), " (", round(100 * c_miss_last, 1), "%) individuals' last names were not matched."))
   }
   if (grepl("first", namesToUse)) {
     c_miss_first <- mean(is.na(df$c_whi_first))
     if (c_miss_first > 0) {
-      message(paste(paste(sum(is.na(df$c_whi_first)), " (", round(100 * mean(is.na(df$c_whi_first)), 1), "%) individuals' first names were not matched.", sep = "")))
+      message(paste0(sum(is.na(df$c_whi_first)), " (", round(100 * c_miss_first, 1), "%) individuals' first names were not matched."))
     }
   }
   if (grepl("middle", namesToUse)) {
     c_miss_mid <- mean(is.na(df$c_whi_middle))
     if (c_miss_mid > 0) {
-      message(paste(paste(sum(is.na(df$c_whi_middle)), " (", round(100 * mean(is.na(df$c_whi_middle)), 1), "%) individuals' middle names were not matched.", sep = "")))
+      message(paste0(sum(is.na(df$c_whi_middle)), " (", round(100 * c_miss_mid, 1), "%) individuals' middle names were not matched."))
     }
   }
-
+  
+  # flags BEFORE imputation
+  if (isTRUE(return.unmatched)) {
+    if ("c_whi_last"   %in% names(df)) df$last_matched   <- !is.na(df$c_whi_last)
+    if ("c_whi_first"  %in% names(df)) df$first_matched  <- !is.na(df$c_whi_first)
+    if ("c_whi_middle" %in% names(df)) df$middle_matched <- !is.na(df$c_whi_middle)
+  }
+  
+  # --- imputation ------------------------------------------------------------
   if (impute.missing) {
-    impute.vec <- colMeans(df[, grep("c_", names(df), value = TRUE)], na.rm = TRUE)
-    for (i in grep("c_", names(df), value = TRUE)) {
+    impute.vec <- colMeans(df[, grep("^c_", names(df), value = TRUE), drop = FALSE], na.rm = TRUE)
+    for (i in grep("^c_", names(df), value = TRUE)) {
       df[, i] <- dplyr::coalesce(df[, i], impute.vec[i])
     }
   } else {
-    for (i in grep("c_", names(df), value = TRUE)) {
+    for (i in grep("^c_", names(df), value = TRUE)) {
       df[, i] <- dplyr::coalesce(df[, i], 1)
     }
   }
-
-  # return the data
-  if (namesToUse == "surname") {
-    return(df[, c(names(voter.file), "lastname.match", paste(p_eth, "last", sep = "_"))])
-  } else if (namesToUse == "surname, first") {
-    return(df[, c(
-      names(voter.file), "lastname.match", "firstname.match",
-      paste(p_eth, "last", sep = "_"), paste(p_eth, "first", sep = "_")
-    )])
-  } else if (namesToUse == "surname, first, middle") {
-    return(df[, c(
-      names(voter.file), "lastname.match", "firstname.match", "middlename.match",
-      paste(p_eth, "last", sep = "_"), paste(p_eth, "first", sep = "_"), paste(p_eth, "middle", sep = "_")
-    )])
+  
+  # --- unified return --------------------------------------------------------
+  prob_cols <- c(paste0(p_eth, "_last"))
+  name_cols <- c("lastname.match")
+  flag_cols <- character(0)
+  if (isTRUE(return.unmatched) && "last_matched" %in% names(df)) flag_cols <- c("last_matched", flag_cols)
+  
+  if (grepl("first",  namesToUse)) {
+    prob_cols <- c(prob_cols, paste0(p_eth, "_first"))
+    name_cols <- c(name_cols, "firstname.match")
+    if (isTRUE(return.unmatched) && "first_matched" %in% names(df))  flag_cols <- c(flag_cols, "first_matched")
   }
-}
-
-
-#' Preflight for name data
-#'
-#' Checks if namedata is available in the current working directory, if not
-#' downloads it from github using piggyback. By default, wru will download the
-#' data to a temporary directory that lasts as long as your session does.
-#' However, you may wish to set the \code{wru_data_wd} option to save the 
-#' downloaded data to your current working directory for more permanence. 
-#'
-#' @importFrom piggyback pb_download
-wru_data_preflight <- function() {
-  dest <- ifelse(getOption("wru_data_wd", default = FALSE), getwd(), tempdir())
-  tryCatch(
-    # Oddity of conditions for .token. Ignores token if is ""
-    piggyback::pb_download(repo = "kosukeimai/wru", dest = dest, .token = "", tag = "v2.0.0"), 
-    error = function(e) message("There was an error retrieving data: ", e$message)
-  )
+  if (grepl("middle", namesToUse)) {
+    prob_cols <- c(prob_cols, paste0(p_eth, "_middle"))
+    name_cols <- c(name_cols, "middlename.match")
+    if (isTRUE(return.unmatched) && "middle_matched" %in% names(df)) flag_cols <- c(flag_cols, "middle_matched")
+  }
+  
+  cols <- c(names(voter.file), name_cols, flag_cols, prob_cols)
+  df[, cols, drop = FALSE]
 }
diff --git a/R/predict_race.R b/R/predict_race.R
index 1f31b7c..eddb1b1 100644
--- a/R/predict_race.R
+++ b/R/predict_race.R
@@ -74,6 +74,8 @@
 #' Whatever the name of the party registration field in \code{\var{voter.file}},
 #' it should be coded as 1 for Democrat, 2 for Republican, and 0 for Other.
 #' @param retry The number of retries at the census website if network interruption occurs.
+#' @param return.unmatched Generates Boolean columns for each name reporting 
+#' whether a match was made. Default is set to \code{TRUE}.
 #' @param impute.missing Logical, defaults to TRUE. Should missing be imputed?
 #' @param skip_bad_geos Logical. Option to have the function skip any geolocations that are not present 
 #' in the census data, returning a partial data set. Default is set to \code{FALSE}, in which case it
@@ -152,6 +154,7 @@ predict_race <- function(
     year = "2020",
     party = NULL,
     retry = 3,
+    return.unmatched = TRUE,
     impute.missing = TRUE,
     skip_bad_geos = FALSE,
     use.counties = FALSE,
@@ -223,6 +226,7 @@ predict_race <- function(
                               surname.only=surname.only,
                               census.data = census.data,
                               retry = retry,
+                              return.unmatched = return.unmatched,
                               impute.missing = impute.missing,
                               skip_bad_geos = skip_bad_geos,
                               census.surname = census.surname,
@@ -253,6 +257,7 @@ predict_race <- function(
                                  surname.only=surname.only,
                                  census.data = census.data,
                                  retry = retry,
+                                 return.unmatched = TRUE,
                                  impute.missing = TRUE,
                                  skip_bad_geos = skip_bad_geos,
                                  census.surname = census.surname,
diff --git a/R/race_prediction_funs.R b/R/race_prediction_funs.R
index 77ff933..7fb09e5 100644
--- a/R/race_prediction_funs.R
+++ b/R/race_prediction_funs.R
@@ -25,6 +25,7 @@
 #' @param year See documentation in \code{race_predict}.
 #' @param party See documentation in \code{race_predict}.
 #' @param retry See documentation in \code{race_predict}.
+#' @param return.unmatched See documentation in \code{race_predict}.
 #' @param impute.missing See documentation in \code{race_predict}.
 #' @param skip_bad_geos See documentation in \code{race_predict}.
 #' @param names.to.use See documentation in \code{race_predict}.
@@ -60,6 +61,7 @@ NULL
     year = "2020",
     party,
     retry = 3,
+    return.unmatched = TRUE,
     impute.missing = TRUE,
     use.counties = FALSE
 ) {
@@ -282,6 +284,7 @@ predict_race_new <- function(
     surname.only=FALSE,
     census.data = NULL,
     retry = 0,
+    return.unmatched = TRUE,
     impute.missing = TRUE,
     skip_bad_geos = FALSE,
     census.surname = FALSE,
@@ -397,6 +400,7 @@ predict_race_new <- function(
                             table.first=name.dictionaries[["first"]],
                             table.middle=name.dictionaries[["middle"]],
                             clean.names = TRUE,
+                            return.unmatched = return.unmatched,
                             impute.missing = impute.missing,
                             model = 'BISG')
   
@@ -425,7 +429,12 @@ predict_race_new <- function(
   ## Revert to national Pr(Race) for missing predictions
   colnames(preds) <- paste("pred", eth, sep = ".")
   
-  return(data.frame(cbind(voter.file[c(vars.orig)], preds)))
+  if(return.unmatched == TRUE) {
+    return(data.frame(cbind(voter.file, preds)) |> 
+             dplyr::select(dplyr::all_of(vars.orig), dplyr::any_of(c("last", "first", "middle")), dplyr::ends_with("_matched"), -dplyr::starts_with("c_"),-dplyr::ends_with(".match"), dplyr::starts_with("pred.")))
+  } else {
+    return(data.frame(cbind(voter.file[c(vars.orig)], preds)))
+  }
 }
 
 
@@ -448,6 +457,7 @@ predict_race_me <- function(
     surname.only = FALSE,
     census.data = NULL,
     retry = 0,
+    return_missing = TRUE,
     impute.missing = TRUE,
     census.surname = FALSE,
     use.counties = FALSE,
diff --git a/README.Rmd b/README.Rmd
index ccf678a..1cc5a1e 100644
--- a/README.Rmd
+++ b/README.Rmd
@@ -43,10 +43,15 @@ pak::pkg_install("kosukeimai/wru")
 
 Here is a simple example that predicts the race/ethnicity of voters based only on their surnames.
 
+Note: Sample data `voters` is based on 2010 Census, therefore we fix that variable here. Default 
+is the latest Census release year (2020). See `get_census_data()` to retrieve Census data related to your 
+data set. If you are getting an error similar to setting `year = 2020`, this is what needs to be 
+ addressed. 
+
 ``` r
 library(wru)
 future::plan(future::multisession)
-predict_race(voter.file = voters, surname.only = TRUE)
+predict_race(voter.file = voters, surname.only = TRUE, year = 2010)
 ```
 
 The above produces the following output, where the last five columns are probabilistic race/ethnicity predictions (e.g., `pred.his` is the probability of being Hispanic/Latino):
@@ -86,7 +91,7 @@ Note that a valid API key must be stored in a `CENSUS_API_KEY` environment varia
 
 ``` r
 library(wru)
-predict_race(voter.file = voters, census.geo = "tract", party = "PID")
+predict_race(voter.file = voters, year = 2010, census.geo = "tract", party = "PID")
 ```
 ```
  VoterID    surname state CD county  tract block age sex party PID place    pred.whi     pred.bla     pred.his   pred.asi    pred.oth
@@ -106,7 +111,7 @@ In `predict_race()`, the `census.geo` options are "county", "tract", "block" and
 Here is an example of prediction based on census statistics collected at the level of "place":
 
 ``` r
-predict_race(voter.file = voters, census.geo = "place", party = "PID")
+predict_race(voter.file = voters, year = 2010, census.geo = "place", party = "PID")
 ```
 ```
  VoterID    surname state CD county  tract block age sex party PID place    pred.whi     pred.bla     pred.his   pred.asi    pred.oth
@@ -137,7 +142,7 @@ In this case, predictions are conditioned on age but not sex, so `age = TRUE` an
 library(wru)
 voters.dc.nj <- voters[voters$state %in% c("DC", "NJ"), ]
 census.dc.nj <- get_census_data(state = c("DC", "NJ"), age = TRUE, sex = FALSE)
-predict_race(voter.file = voters.dc.nj, census.geo = "block", census.data = census.dc.nj, age = TRUE, sex = FALSE, party = "PID")
+predict_race(voter.file = voters.dc.nj, year = 2010, census.geo = "block", census.data = census.dc.nj, age = TRUE, sex = FALSE, party = "PID")
 ```
 
 This produces the same result as the following statement, which downloads census data during evaluation rather than using pre-downloaded data:
diff --git a/man/merge_names.Rd b/man/merge_names.Rd
index ab98bee..1019c6a 100644
--- a/man/merge_names.Rd
+++ b/man/merge_names.Rd
@@ -12,89 +12,38 @@ merge_names(
   table.first = NULL,
   table.middle = NULL,
   clean.names = TRUE,
+  return.unmatched = TRUE,
   impute.missing = FALSE,
   model = "BISG"
 )
 }
 \arguments{
-\item{voter.file}{An object of class \code{data.frame}.  Must contain a row for each individual being predicted,
-as well as a field named \code{\var{last}} containing each individual's surname.
-If first name is also being used for prediction, the file must also contain a field
-named \code{\var{first}}. If middle name is also being used for prediction, the field
-must also contain a field named \code{\var{middle}}.}
+\item{voter.file}{data.frame with at least a \code{surname} column; \code{first} and \code{middle} if used}
 
-\item{namesToUse}{A character vector identifying which names to use for the prediction.
-The default value is \code{"last"}, indicating that only the last name will be used.
-Other options are \code{"last, first"}, indicating that both last and first names will be
-used, and \code{"last, first, middle"}, indicating that last, first, and middle names will all
-be used.}
+\item{namesToUse}{"surname", "surname, first", or "surname, first, middle"}
 
-\item{census.surname}{A \code{TRUE}/\code{FALSE} object. If \code{TRUE},
-function will call \code{merge_surnames} to merge in Pr(Race | Surname)
-from U.S. Census Surname List (2000, 2010, or 2020) and Spanish Surname List.
-If \code{FALSE}, user must provide a \code{name.dictionary} (see below).
-Default is \code{TRUE}.}
+\item{census.surname}{logical; if TRUE, use Census/Spanish surname list}
 
-\item{table.surnames}{An object of class \code{data.frame} provided by the
-users as an alternative surname dictionary. It will consist of a list of
-U.S. surnames, along with the associated probabilities P(name | ethnicity)
-for ethnicities: white, Black, Hispanic, Asian, and other. Default is \code{NULL}.
-(\code{\var{last_name}} for U.S. surnames, \code{\var{p_whi_last}} for White,
-\code{\var{p_bla_last}} for Black, \code{\var{p_his_last}} for Hispanic,
-\code{\var{p_asi_last}} for Asian, \code{\var{p_oth_last}} for other).}
+\item{table.surnames}{Optional custom last-name dictionary (schema like shipped RDS)}
 
-\item{table.first}{See \code{\var{table.surnames}}.}
+\item{table.first}{Optional custom first-name dictionary}
 
-\item{table.middle}{See \code{\var{table.surnames}}.}
+\item{table.middle}{Optional custom middle-name dictionary}
 
-\item{clean.names}{A \code{TRUE}/\code{FALSE} object. If \code{TRUE},
-any surnames in \code{\var{voter.file}} that cannot initially be matched
-to the database will be cleaned, according to U.S. Census specifications,
-in order to increase the chance of finding a match. Default is \code{TRUE}.}
+\item{clean.names}{logical; try cleaning/fallback passes}
 
-\item{impute.missing}{See \code{predict_race}.}
+\item{return.unmatched}{logical; add *_matched boolean columns}
 
-\item{model}{See \code{predict_race}.}
+\item{impute.missing}{logical; impute c_* NA with column means (else 1)}
+
+\item{model}{kept for API parity}
 }
 \value{
-Output will be an object of class \code{data.frame}. It will
-consist of the original user-input data with additional columns that
-specify the part of the name matched with Census data (\code{\var{surname.match}}),
-and the probabilities Pr(Race | Surname) for each racial group
-(\code{\var{p_whi}} for White, \code{\var{p_bla}} for Black,
-\code{\var{p_his}} for Hispanic/Latino,
-\code{\var{p_asi}} for Asian and Pacific Islander, and
-\code{\var{p_oth}} for Other/Mixed).
+data.frame with probabilities (and flags if requested)
 }
 \description{
 \code{merge_names} merges names in a user-input dataset with corresponding
 race/ethnicity probabilities derived from both the U.S. Census Surname List
 and Spanish Surname List and voter files from states in the Southern U.S.
 }
-\details{
-This function allows users to match names in their dataset with database entries
-estimating P(name | ethnicity) for each of the five major racial groups for each
-name. The database probabilities are derived from both the U.S. Census Surname List
-and Spanish Surname List and voter files from states in the Southern U.S.
-
-By default, the function matches names as follows:
-\enumerate{
-\item Search raw surnames in the database;
-\item Remove any punctuation and search again;
-\item Remove any spaces and search again;
-\item Remove suffixes (e.g., "Jr") and search again (last names only)
-\item Split double-barreled names into two parts and search first part of name;
-\item Split double-barreled names into two parts and search second part of name;
-}
-
-Each step only applies to names not matched in a previous step.
-Steps 2 through 6 are not applied if \code{clean.surname} is FALSE.
-
-Note: Any name appearing only on the Spanish Surname List is assigned a
-probability of 1 for Hispanics/Latinos and 0 for all other racial groups.
-}
-\examples{
-data(voters)
-\dontrun{try(merge_names(voters, namesToUse = "surname", census.surname = TRUE))}
-}
 \keyword{internal}
diff --git a/man/modfuns.Rd b/man/modfuns.Rd
index 71481a8..e3f5b30 100644
--- a/man/modfuns.Rd
+++ b/man/modfuns.Rd
@@ -21,6 +21,7 @@
   year = "2020",
   party,
   retry = 3,
+  return.unmatched = TRUE,
   impute.missing = TRUE,
   use.counties = FALSE
 )
@@ -37,6 +38,7 @@ predict_race_new(
   surname.only = FALSE,
   census.data = NULL,
   retry = 0,
+  return.unmatched = TRUE,
   impute.missing = TRUE,
   skip_bad_geos = FALSE,
   census.surname = FALSE,
@@ -55,6 +57,7 @@ predict_race_me(
   surname.only = FALSE,
   census.data = NULL,
   retry = 0,
+  return_missing = TRUE,
   impute.missing = TRUE,
   census.surname = FALSE,
   use.counties = FALSE,
@@ -94,6 +97,8 @@ If \code{\link{NULL}}, the default, attempts to find a census key stored in an
 
 \item{retry}{See documentation in \code{race_predict}.}
 
+\item{return.unmatched}{See documentation in \code{race_predict}.}
+
 \item{impute.missing}{See documentation in \code{race_predict}.}
 
 \item{use.counties}{A logical, defaulting to FALSE. Should census data be filtered by counties available in \var{census.data}?}
diff --git a/man/predict_race.Rd b/man/predict_race.Rd
index f3bbcd7..6b997fd 100644
--- a/man/predict_race.Rd
+++ b/man/predict_race.Rd
@@ -16,6 +16,7 @@ predict_race(
   year = "2020",
   party = NULL,
   retry = 3,
+  return.unmatched = TRUE,
   impute.missing = TRUE,
   skip_bad_geos = FALSE,
   use.counties = FALSE,
@@ -105,6 +106,9 @@ it should be coded as 1 for Democrat, 2 for Republican, and 0 for Other.}
 
 \item{retry}{The number of retries at the census website if network interruption occurs.}
 
+\item{return.unmatched}{Generates Boolean columns for each name reporting
+whether a match was made. Default is set to \code{TRUE}.}
+
 \item{impute.missing}{Logical, defaults to TRUE. Should missing be imputed?}
 
 \item{skip_bad_geos}{Logical. Option to have the function skip any geolocations that are not present
diff --git a/man/wru_data_preflight.Rd b/man/wru_data_preflight.Rd
index 7da9e58..8fd6849 100644
--- a/man/wru_data_preflight.Rd
+++ b/man/wru_data_preflight.Rd
@@ -2,14 +2,14 @@
 % Please edit documentation in R/merge_names.R
 \name{wru_data_preflight}
 \alias{wru_data_preflight}
-\title{Preflight for name data}
+\title{Preflight for name data (persistent, pinned, and verified)
+Downloads 4 name dictionaries to a persistent directory if missing.
+Set options(wru_data_wd = TRUE) to cache in getwd(); otherwise uses tempdir().}
 \usage{
 wru_data_preflight()
 }
 \description{
-Checks if namedata is available in the current working directory, if not
-downloads it from github using piggyback. By default, wru will download the
-data to a temporary directory that lasts as long as your session does.
-However, you may wish to set the \code{wru_data_wd} option to save the
-downloaded data to your current working directory for more permanence.
+Preflight for name data (persistent, pinned, and verified)
+Downloads 4 name dictionaries to a persistent directory if missing.
+Set options(wru_data_wd = TRUE) to cache in getwd(); otherwise uses tempdir().
 }
diff --git a/tests/testthat/test-census_helper_v2.R b/tests/testthat/test-census_helper_v2.R
index 3ea900d..04d75c7 100644
--- a/tests/testthat/test-census_helper_v2.R
+++ b/tests/testthat/test-census_helper_v2.R
@@ -47,10 +47,10 @@ test_that("helper returns verified census tract data",{
     use.counties = FALSE,
     skip_bad_geos = FALSE
     )
-  expect_equal(x[x$surname == "Lopez", "r_whi"], 0.767197, tolerance = 0.000001)
-  expect_equal(x[x$surname == "Khanna", "r_whi"], 0.708026, tolerance = 0.000001)
-  expect_equal(x[x$surname == "Lopez", "r_bla"], 0.09522743, tolerance = 0.000001)
-  expect_equal(x[x$surname == "Khanna", "r_bla"], 0.09544469, tolerance = 0.000001)
+  expect_equal(x[x$surname == "Lopez", "r_whi"], 0.5288848, tolerance = 0.000001)
+  expect_equal(x[x$surname == "Khanna", "r_whi"], 0.5740415, tolerance = 0.000001)
+  expect_equal(x[x$surname == "Lopez", "r_bla"], 0.06564720, tolerance = 0.000001)
+  expect_equal(x[x$surname == "Khanna", "r_bla"], 0.07738305, tolerance = 0.000001)
 })
 
 skip_if_not(nzchar(Sys.getenv("CENSUS_API_KEY")))
@@ -89,3 +89,4 @@ test_that("New tables and legacy tables return equal race predictions",{
   expect_equal(x$r_asi, y$r_asi, tolerance = .01)
   # expect_equal(x$r_oth, y$r_oth, tolerance = .01)
 })
+
diff --git a/tests/testthat/test-predict_race_2010.R b/tests/testthat/test-predict_race_2010.R
index a48031c..c92777d 100644
--- a/tests/testthat/test-predict_race_2010.R
+++ b/tests/testthat/test-predict_race_2010.R
@@ -14,7 +14,7 @@ test_that("Tests surname only predictions", {
     year = 2010,
     surname.only = TRUE))
   # Test and confirm prediction output is as expected
-  expect_equal(dim(x), c(10, 20))
+  expect_equal(dim(x), c(10, 21))
   expect_equal(sum(is.na(x)), 0)
   expect_equal(round(x[x$surname == "Khanna", "pred.whi"], 4), 0.045, tolerance = 0.01)
   expect_equal(round(x[x$surname == "Johnson", "pred.his"], 4), 0.0272, tolerance = 0.01)
@@ -33,7 +33,7 @@ test_that("Test BISG NJ at county level", {
     ))
 
   expect_equal(as.character(x$VoterID), as.character(c(1, 2, 4, 5, 6, 8, 9)))
-  expect_equal(dim(x), c(7, 20))
+  expect_equal(dim(x), c(7, 21))
   expect_equal(sum(is.na(x)), 0L)
   expect_equal(sum(x$surname == "Johnson"), 0)
   expect_equal(round(x[x$surname == "Khanna", "pred.whi"], 4), 0.0314, tolerance = 0.01)
@@ -82,7 +82,7 @@ test_that("BISG NJ at block level", {
     use.counties = TRUE)
   )
   
-  expect_equal(dim(x), c(7, 20))
+  expect_equal(dim(x), c(7, 21))
   expect_equal(sum(is.na(x$pred.asi)), 0L)
   expect_true(!any(duplicated(x$surname)))
   expect_equal(x[x$surname == "Khanna", "pred.asi"], 0.7640, tolerance = 0.01)
@@ -108,7 +108,7 @@ test_that("BISG NJ at block_group level", {
     use.counties = TRUE)
   )
   
-  expect_equal(dim(x), c(7, 21))
+  expect_equal(dim(x), c(7, 22))
   expect_equal(sum(is.na(x$pred.asi)), 0)
   expect_true(!any(duplicated(x$surname)))
   expect_equal(x[x$surname == "Khanna", "pred.asi"], 0.9183, tolerance = 0.01)
@@ -173,7 +173,7 @@ test_that("Handles zero-pop. geolocations", {
     census.data = census, 
     use.counties = TRUE)
   )
-  expect_equal(dim(x), c(7, 20))
+  expect_equal(dim(x), c(7, 21))
   expect_equal(sum(is.na(x$pred.asi)), 0)
   expect_true(!any(duplicated(x$surname)))
   expect_equal(x[x$surname == "Khanna", "pred.asi"], 0.91, tolerance = 0.01)
diff --git a/tests/testthat/test-predict_race_2020.R b/tests/testthat/test-predict_race_2020.R
index 89f8962..bb3ef19 100644
--- a/tests/testthat/test-predict_race_2020.R
+++ b/tests/testthat/test-predict_race_2020.R
@@ -25,7 +25,7 @@ test_that("Tests surname only predictions", {
     voter.file = voters,
     surname.only = TRUE))
   # Test and confirm prediction output is as expected
-  expect_equal(dim(x), c(10, 20))
+  expect_equal(dim(x), c(10, 21))
   expect_equal(sum(is.na(x)), 0)
   expect_equal(round(x[x$surname == "Khanna", "pred.whi"], 4), 0.045, tolerance = 0.01)
   expect_equal(round(x[x$surname == "Johnson", "pred.his"], 4), 0.0272, tolerance = 0.01)
@@ -43,7 +43,7 @@ test_that("Test BISG NJ at county level", {
     ))
 
   expect_equal(as.character(x$VoterID), as.character(c(1, 2, 4, 5, 6, 8, 9)))
-  expect_equal(dim(x), c(7, 20))
+  expect_equal(dim(x), c(7, 21))
   expect_equal(sum(is.na(x)), 0L)
   expect_equal(sum(x$surname == "Johnson"), 0)
   expect_equal(round(x[x$surname == "Khanna", "pred.whi"], 4), 0.0181, tolerance = 0.01)
@@ -90,7 +90,7 @@ test_that("BISG NJ at block level", {
     use.counties = TRUE)
   )
   
-  expect_equal(dim(x), c(7, 20))
+  expect_equal(dim(x), c(7, 21))
   expect_equal(sum(is.na(x$pred.asi)), 0L)
   expect_true(!any(duplicated(x$surname)))
   expect_equal(x[x$surname == "Khanna", "pred.asi"], 0.8078, tolerance = 0.01)
@@ -115,7 +115,7 @@ test_that("BISG NJ at block_group level", {
     use.counties = TRUE)
   )
   
-  expect_equal(dim(x), c(7, 21))
+  expect_equal(dim(x), c(7, 22))
   expect_equal(sum(is.na(x$pred.asi)), 0)
   expect_true(!any(duplicated(x$surname)))
   expect_equal(x[x$surname == "Khanna", "pred.asi"], 0.9374, tolerance = 0.01)
@@ -177,7 +177,7 @@ test_that("Handles zero-pop. geolocations", {
     census.data = census, 
     use.counties = TRUE)
   )
-  expect_equal(dim(x), c(7, 20))
+  expect_equal(dim(x), c(7, 21))
   expect_equal(sum(is.na(x$pred.asi)), 0)
   expect_true(!any(duplicated(x$surname)))
   expect_equal(x[x$surname == "Khanna", "pred.asi"], 0.9444, tolerance = 0.01)
@@ -205,3 +205,4 @@ test_that("Fixes for issue #68 work as expected", {
   expect_equal(three$pred.whi[2], 0.8397254)
   expect_equal(three$pred.whi[3], 0.8397254)
 })
+
diff --git a/tests/testthat/wru-data-census_last_c.rds b/tests/testthat/wru-data-census_last_c.rds
new file mode 100644
index 0000000..b926aaf
Binary files /dev/null and b/tests/testthat/wru-data-census_last_c.rds differ
diff --git a/tests/testthat/wru-data-first_c.rds b/tests/testthat/wru-data-first_c.rds
new file mode 100644
index 0000000..3b364a7
Binary files /dev/null and b/tests/testthat/wru-data-first_c.rds differ
diff --git a/tests/testthat/wru-data-last_c.rds b/tests/testthat/wru-data-last_c.rds
new file mode 100644
index 0000000..57e7bce
Binary files /dev/null and b/tests/testthat/wru-data-last_c.rds differ
diff --git a/tests/testthat/wru-data-mid_c.rds b/tests/testthat/wru-data-mid_c.rds
new file mode 100644
index 0000000..0228974
Binary files /dev/null and b/tests/testthat/wru-data-mid_c.rds differ
diff --git a/wru-data-census_last_c.rds b/wru-data-census_last_c.rds
new file mode 100644
index 0000000..b926aaf
Binary files /dev/null and b/wru-data-census_last_c.rds differ
diff --git a/wru-data-first_c.rds b/wru-data-first_c.rds
new file mode 100644
index 0000000..3b364a7
Binary files /dev/null and b/wru-data-first_c.rds differ
diff --git a/wru-data-last_c.rds b/wru-data-last_c.rds
new file mode 100644
index 0000000..57e7bce
Binary files /dev/null and b/wru-data-last_c.rds differ
diff --git a/wru-data-mid_c.rds b/wru-data-mid_c.rds
new file mode 100644
index 0000000..0228974
Binary files /dev/null and b/wru-data-mid_c.rds differ