Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
.Rproj.user
.Rhistory
.RData
.Ruserdata

*.Rproj

data/*
58 changes: 52 additions & 6 deletions 00_Data_Processing2.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,18 +10,18 @@ library(tidyverse)
options(scipen = 999) # turn off scientific notation for all variables

#Specify Drive Path
drive_path <- "//Internal_Path/"
input_path <- paste0(drive_path, "Input_Data/Surveys/")
drive_path <- "./data/"
input_path <- paste0(drive_path, "MNSO-Data/")
output_path <- paste0(drive_path, "/Output_Data/")
shapefile_path <- paste0(drive_path, "Input_Data/Shapefiles/")
shapefile_path <- paste0(drive_path, "Shapefiles/")

#Load datasets
mphc_2018 <- read_dta(paste0(input_path, "mphc2018Data_AllRegions.dta"))
#mphc_structures_2018 <- read_dta(paste0(input_path, "mphc2018Data_structures.dta"))
ICT_data <- read_dta(paste0(input_path, "ICT Listing WorldPop.dta"))
IHS6_data <- read_dta(paste0(input_path, "IHS6 Listing WorldPop.dta"))
Naca_data <- read_dta(paste0(input_path, "Naca Listing WorldPop.dta"))
ea <- st_read(file.path(shapefile_path, "2018_MPHC_EAs_Final_for_Use_Corrected.shp"))
ea <- st_read(file.path(shapefile_path, "2018_MPHC_EAs_Final_for_Use.shp")) # replaces "2018_MPHC_EAs_Final_for_Use_Corrected.shp"
#mphc_structures_2018 <- st_read(paste0(output_path, "mphc_structures_points.gpkg"))
dhs_data <- read_dta(paste0(input_path, "MDHS_2024_NoDZLK_anonymized.dta"))
dhs_listing <- read_dta(paste0(input_path, "FINAL MDHS LISTING DATA_Annon.dta"))
Expand All @@ -41,7 +41,7 @@ mphc_2018 <- mphc_2018 %>%
mphc_2018_no_gps <- mphc_2018 %>%
filter(is.na(hh_longitude) | is.na(hh_latitude))

#Add additonal digits to EA and TA code
#Add additional digits to EA and TA code

mphc_2018_no_gps <- mphc_2018_no_gps %>%
mutate(new_ta = str_pad(ta, width = 2, pad = 0),
Expand All @@ -67,7 +67,7 @@ mphc_pop_no_gps <- mphc_2018_no_gps %>%
female_count = sum(p03 == 2, na.rm = TRUE))


#Create a bin for each age catgeory
#Create a bin for each age category
age_summary_no_gps <- mphc_2018_no_gps %>%
mutate(age_group = case_when(
p05 < 1 ~ "age_group_01_less", #less than 1
Expand Down Expand Up @@ -159,6 +159,52 @@ mphc_2018_df <- mphc_2018_df %>%
#check the summary of gps accuracy
summary(mphc_2018_df$hh_gps_accuracy)


# #Fix corrupt geometries
# st_make_valid(ea)

# #Turn off invalid geometries
# sf::sf_use_s2(FALSE)

# #transform
# mphc_2018_sf <- st_transform(mphc_2018_sf, crs = st_crs(ea))

# # EA Nearest Neighbor Assignment
# nearest_indices <- st_nearest_feature(mphc_2018_sf, ea)

# # Extract the EA_CODE of the nearest polygons
# nearest_ids <- ea$EA_CODE[nearest_indices]

# # Add the EA_CODE to data
# mphc_2018_sf$EA_CODE <- nearest_ids

# #Write to file
# st_write(mphc_2018_sf ,
# dsn = file.path(output_path, "mphc_2018_sf_ea.gpkg"),
# driver = "GPKG",
# delete_layer = TRUE
# )

# #load dataset
# #mphc_2018_sf <- st_read(paste0(output_path, "mphc_2018_sf_ea.gpkg"))

# #convert to dataframe
# mphc_2018_df <- mphc_2018_sf %>%
# as_tibble()

# #check the summary of gps accuracy
# summary(mphc_2018_df$hh_gps_accuracy)

# # NOTE: ONS CHANGE: Adding a 'hh_count' column of 1 per row. This is a required
# # column below but is not in the source data.
# # This change assumes that each row corresponds to a single resident of Malawi in
# # the census records.
# # Justification: total rows in source data is 17,563,749, matching the published
# # population count for the census. Also this replicates the logic of the no_gps
# # processing above.
# mphc_2018_df <- mphc_2018_df %>%
# mutate(hh_count = 1) # Individual observation

# Summarize data base on their spatial location

mphc_2018_pop_spatial <- mphc_2018_df %>%
Expand Down
28 changes: 18 additions & 10 deletions 01_Raster_Mosaicking_Buildings_2018.R
Original file line number Diff line number Diff line change
@@ -1,26 +1,34 @@
#Script to mosaic rasters using neighboring countries
# Script to mosaic rasters using neighboring countries

library(terra)
library(sf)
library(tictoc)

source("utils.R")

# Specify data path
drive_path <- "//Working/MALAWI/Ortis/"
base_path <- paste0(drive_path, "Input_Data/") ## Base path where the folders are located
shp_path <- paste0(drive_path, "Input_Data/Shapefiles/") ## Shapefile path
result_path <- paste0(drive_path, "Input_Data/Mosaic_Buildings_2018/") # Result path
building_path <- paste0(drive_path, "Input_Data/Malawi_Covs/2018_Buildings/")
drive_path <- "./data"
base_path <- paste0(drive_path) ## Base path where the folders are located
shp_path <- paste0(drive_path, "/Shapefiles/") ## Shapefile path
result_path <- paste0(drive_path, "/Mosaic_Buildings/") # Result path
building_path <- paste0(drive_path, "/covariate_data/")

#Load data
boundary <- st_read(paste0(shp_path, "Country_Shapefile_Buffer_10km.shp"))
r1 <- rast(paste0(building_path, "mwi_buildings_count_2018_glv2_5_t0_5_C_100m_v1.tif"))
boundary_data_filename <- "Country_Shapefile_Buffer_10km.shp"
if(file.exists(paste0(shp_path, boundary_data_filename))) {
boundary <- st_read(paste0(shp_path, boundary_data_filename))
} else {
boundary <- generate_buffered_country_boundary(shp_path, boundary_data_filename)
}

r1 <- rast(paste0(building_path, "mwi_buildings_count_BCB_ms_100m_v1_1.tif")) # replacing: "mwi_buildings_count_2018_glv2_5_t0_5_C_100m_v1.tif"

#Reproject boundary to r1
boundary <- st_transform(boundary, crs = st_crs(r1))

# Define folder names
folders <- c("Malawi_Covs/2018_Buildings","Mozambique_Covs/2018_Buildings",
"Tanzania_Covs/2018_Buildings", "Zambia_Covs/2018_Buildings")
folders <- c("Malawi_Covs/Buildings", "Tanzania_Covs/Buildings",
"Mozambique_Covs/Buildings", "Zambia_Covs/Buildings")


# Initialize a list to store raster file names
Expand Down
9 changes: 8 additions & 1 deletion 01_Raster_Mosaicking_Buildings_2024.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ library(terra)
library(sf)
library(tictoc)

source("utils.R")

# Specify data path
drive_path <- "//Working/MALAWI/Ortis/"
base_path <- paste0(drive_path, "Input_Data/") ## Base path where the folders are located
Expand All @@ -10,7 +12,12 @@ result_path <- paste0(drive_path, "Input_Data/Mosaic_Buildings_2024/") # Result
building_path <- paste0(drive_path, "Input_Data/Malawi_Covs/2024_Buildings/")

#Load data
boundary <- st_read(paste0(shp_path, "Country_Shapefile_Buffer_10km.shp"))
boundary_data_filename <- "Country_Shapefile_Buffer_10km.shp"
if(file.exists(paste0(shp_path, boundary_data_filename))) {
boundary <- st_read(paste0(shp_path, boundary_data_filename))
} else {
boundary <- generate_buffered_country_boundary(shp_path, boundary_data_filename)
}
r1 <- rast(paste0(building_path, "mwi_buildings_count_2023_glv2_5_t0_5_C_100m_v1.tif"))

#Reproject boundary to r1
Expand Down
12 changes: 10 additions & 2 deletions 01_Raster_Mosaicking_Workflow_2018.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,23 @@ library(terra)
library(sf)
library(tictoc)

source("utils.R")

# Specify data path
drive_path <- "//Working/MALAWI/Ortis/"
drive_path <- "./data/"
base_path <- paste0(drive_path, "Input_Data/") ## Base path where the folders are located
shp_path <- paste0(drive_path, "Input_Data/Shapefiles/") ## Shapefile path
result_path <- paste0(drive_path, "Input_Data/Mosaic_Covariates_2018/") # Result path
building_path <- paste0(drive_path, "Input_Data/Malawi_Covs/2024_Buildings/")

#Load data
boundary <- st_read(paste0(shp_path, "Country_Shapefile_Buffer_10km.shp"))
boundary_data_filename <- "Country_Shapefile_Buffer_10km.shp"
if(file.exists(paste0(shp_path, boundary_data_filename))) {
boundary <- st_read(paste0(shp_path, boundary_data_filename))
} else {
boundary <- generate_buffered_country_boundary(shp_path, boundary_data_filename)
}

r1 <- rast(paste0(building_path, "mwi_buildings_count_2023_glv2_5_t0_5_C_100m_v1.tif"))

#Reproject boundary to r1
Expand Down
12 changes: 10 additions & 2 deletions 01_Raster_Mosaicking_Workflow_2024.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,23 @@ library(terra)
library(sf)
library(tictoc)

source("utils.R")

# Specify data path
drive_path <- "//Working/MALAWI/Ortis/"
drive_path <- "./data/"
base_path <- paste0(drive_path, "Input_Data/") ## Base path where the folders are located
shp_path <- paste0(drive_path, "Input_Data/Shapefiles/") ## Shapefile path
result_path <- paste0(drive_path, "Input_Data/Mosaic_Covariates_2024/") # Result path
building_path <- paste0(drive_path, "Input_Data/Malawi_Covs/2024_Buildings/")

#Load data
boundary <- st_read(paste0(shp_path, "Country_Shapefile_Buffer_10km.shp"))
boundary_data_filename <- "Country_Shapefile_Buffer_10km.shp"
if(file.exists(paste0(shp_path, boundary_data_filename))) {
boundary <- st_read(paste0(shp_path, boundary_data_filename))
} else {
boundary <- generate_buffered_country_boundary(shp_path, boundary_data_filename)
}

r1 <- rast(paste0(building_path, "mwi_buildings_count_2023_glv2_5_t0_5_C_100m_v1.tif"))

#Reproject boundary to r1
Expand Down
6 changes: 3 additions & 3 deletions 02_Covariates_Extraction.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@ library(terra)
library(exactextractr)

#Specify Drive Path
drive_path <- "//Working/MALAWI/Ortis/"
drive_path <- "./data/"
input_path <- paste0(drive_path, "Output_Data/")
shapefile_path <- paste0(drive_path, "Input_Data/Shapefiles/")
shapefile_path <- paste0(drive_path, "Shapefiles/")
covs_path_2018 <- paste0(drive_path, "Input_Data/Mosaic_Covariates_2018/")
covs_path_2024 <- paste0(drive_path, "Input_Data/Mosaic_Covariates_2024/")
bcount_path_2018 <- paste0(drive_path, "Input_Data/Mosaic_Buildings_2018/")
Expand All @@ -20,7 +20,7 @@ output_path <- paste0(drive_path, "Output_Data/")


# Load dataset ------------------------------------------------------------
ea <- st_read(file.path(shapefile_path, "EA_Shapefile.shp"))
ea <- st_read(file.path(shapefile_path, "2018_MPHC_EAs_Final_for_Use.shp")) # replaces "EA_Shapefile.shp"
pop_data <- read.csv(file.path (input_path, "summarized_survey_data.csv"))
r1 <- rast(file.path(bcount_path_2024, "MOS_MLW_buildings_count_2023_glv2_5_t0_5_C_100m_v1.tif"))

Expand Down
2 changes: 1 addition & 1 deletion 03_HH_Model_Workflow_2024c.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ options(scipen = 999) # turn off scientific notation for all variables
#options(digits = 3)

#Specify Drive Path
drive_path <- "//Working/MALAWI/Ortis/"
drive_path <- "./data/"
input_path <- paste0(drive_path, "Output_Data/")
shapefile_path <- paste0(drive_path, "Input_Data/Shapefiles/")
output_path <- paste0(drive_path, "Output_Data/")
Expand Down
4 changes: 2 additions & 2 deletions 03_Pop_Model_Workflow_2018.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@ options(scipen = 999) # turn off scientific notation for all variables
#options(digits = 3)

#Specify Drive Path
drive_path <- "//Working/MALAWI/Ortis/"
drive_path <- "./data/"
input_path <- paste0(drive_path, "Output_Data/")
shapefile_path <- paste0(drive_path, "Input_Data/Shapefiles/")
shapefile_path <- paste0(drive_path, "Shapefiles/")
output_path <- paste0(drive_path, "Output_Data/")
output_path1 <- paste0(drive_path, "Output_Data/Pop_Rasters/")

Expand Down
6 changes: 3 additions & 3 deletions 04_Covs_Stack_Raster_cropping.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@ library(feather)
library(sf)

#Specify Drive Path
drive_path <- "//Working/MALAWI/Ortis/"
drive_path <- "./data/"
covs_path_2024 <- paste0(drive_path, "Input_Data/Mosaic_Covariates_2024/")
output_path <- paste0(drive_path, "Output_Data/")
shapefile_path <- paste0(drive_path, "Input_Data/Shapefiles/")
shapefile_path <- paste0(drive_path, "Shapefiles/")
bcount_path_2024 <- paste0(drive_path, "Input_Data/Mosaic_Buildings_2024/")


Expand Down Expand Up @@ -143,7 +143,7 @@ prediction_covs <- prediction_covs |>

#Read EA shapefiles and join to data

ea <- st_read(file.path(shapefile_path, "EA_Shapefile.shp"))
ea <- st_read(file.path(shapefile_path, "2018_MPHC_EAs_Final_for_Use.shp")) # replaces "EA_Shapefile.shp"

#create unique id for each district
district <- ea |>
Expand Down
6 changes: 3 additions & 3 deletions 04_Rasterize.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,13 @@ library(raster)


#Specify Drive Path
drive_path <- "//Working/MALAWI/Ortis/"
drive_path <- "./data/"
output_path <- paste0(drive_path, "Output_Data/")
shapefile_path <- paste0(drive_path, "Input_Data/Shapefiles/")
shapefile_path <- paste0(drive_path, "Shapefiles/")
bcount_path_2024 <- paste0(drive_path, "Input_Data/Mosaic_Buildings_2024/")

#Load datasets
ea <- st_read(file.path(shapefile_path, "EA_Shapefile.shp"))
ea <- st_read(file.path(shapefile_path, "2018_MPHC_EAs_Final_for_Use.shp")) # replaces "EA_Shapefile.shp"
bcount <- rast(file.path(bcount_path_2024, "MOS_MLW_buildings_count_2023_glv2_5_t0_5_C_100m_v1.tif"))
country <- st_read(file.path(shapefile_path, "Country_Shapefile.shp"))
hh_size <- read.csv(paste0(output_path, "summarized_survey_data.csv"))
Expand Down
41 changes: 41 additions & 0 deletions load_required_libraries.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
load_library <- function(library_name) {
#' Check if named library installed, if so load, if not install and load
#'
#' @param library_name (character). The name of the library to be installed/loaded.
#'
if (library_name %in% rownames(installed.packages()) == FALSE) {
print(paste0("The required library '", library_name, "' is not installed, installing now."))
install.packages(library_name, dependencies = TRUE)
library(library_name, character.only = TRUE)
} else {
library(library_name, character.only = TRUE)
}
}

load_libraries <- function(library_names) {
#' Apply the load_library function across a vector of library names.
#'
#' @param library_names (vector). A vector of libraries to be loaded (or installed).
#'
sapply(library_names, load_library)
}

# Add any required packages to this list
required_libraries <- c(
"sf", # package for handling spatial data
"tidyverse", # tidyverse
"haven", # package for import foreign statistical formats
"terra", # package for spatial data analysis
"tictoc", # package for timing R Scripts
"exactextractr", # zonal statistics of polygons
"gstat", # Spatial and Spatio-Temporal Geostatistical Modelling, Prediction and Simulation
"spdep", # spatial dependence and weights
"car", # Companion to Applied Regression
"caret", # Classification And REgression Training
"kableExtra", # for html tables
"inlabru", # package for Bayesian spatial modelling
"feather" # reading and writing feather files
)

# Execute code to load (and install) libraries
load_libraries(required_libraries)
Loading