From 379b49de357626991414b1000a64cdac701cc32a Mon Sep 17 00:00:00 2001 From: skitchen19 <32199480+skitchen19@users.noreply.github.com> Date: Mon, 22 Nov 2021 16:03:00 -0800 Subject: [PATCH] Update to Genotype Table Generation Changed how the coral_mlg_rep_sample_id is sorted and filtered, all samples that share a coral_mlg_clonal_id now share one coral_mlg_rep_sample_id (removed NAs) and removed the drop_na() prior to writing out the genotype.tabular table. --- .../coral_multilocus_genotype.R | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tools/corals/coral_multilocus_genotype/coral_multilocus_genotype.R b/tools/corals/coral_multilocus_genotype/coral_multilocus_genotype.R index b600dbd..cf6ee1e 100644 --- a/tools/corals/coral_multilocus_genotype/coral_multilocus_genotype.R +++ b/tools/corals/coral_multilocus_genotype/coral_multilocus_genotype.R @@ -776,7 +776,9 @@ prep_genotype_tibble <- id_data_table %>% mutate(coral_mlg_rep_sample_id=ifelse(is.na(coral_mlg_rep_sample_id.x),coral_mlg_rep_sample_id.y,coral_mlg_rep_sample_id.x)) %>% ungroup() %>% dplyr::select(-coral_mlg_rep_sample_id.x,-coral_mlg_rep_sample_id.y, -group.x,-group.y) %>% - distinct(); + group_by(coral_mlg_clonal_id) %>% + arrange(coral_mlg_rep_sample_id) %>% + slice(1); # Confirm that the representative mlg is the same between runs. uniques2 <- unique(prep_genotype_tibble[c("group", "coral_mlg_rep_sample_id")]); @@ -796,8 +798,7 @@ prep_genotype_tibble$coral_mlg_rep_sample_id[na.mlg3] <- uniques2$coral_mlg_rep_ representative_mlg_tibble <- prep_genotype_tibble %>% mutate(coral_mlg_rep_sample_id=ifelse(is.na(coral_mlg_rep_sample_id) & (db_match =="no_match"), affy_id, coral_mlg_rep_sample_id)) %>% ungroup() %>% - select(-group)%>% - distinct(); + select(-group); # prep_genotype_table_tibble looks like this: # affy_id coral_mlg_clonal_id user_specimen_id db_match # a550962...CEL HG0120 1090 match @@ -806,8 +807,8 @@ representative_mlg_tibble <- prep_genotype_tibble %>% prep_genotype_table_tibble <- stag_db_report %>% select("affy_id", "coral_mlg_clonal_id", "user_specimen_id", "db_match", "genetic_coral_species_call") %>% left_join(representative_mlg_tibble %>% - select("affy_id", "coral_mlg_rep_sample_id"), - by='affy_id'); + select("coral_mlg_rep_sample_id", "coral_mlg_clonal_id"), + by='coral_mlg_clonal_id'); # genotype_table_tibble looks like this: # affy_id coral_mlg_clonal_id user_specimen_id db_match # a550962-436.CEL HG0120 1090 match @@ -816,8 +817,7 @@ prep_genotype_table_tibble <- stag_db_report %>% genotype_table_tibble <- prep_genotype_table_tibble %>% left_join(affy_metadata_data_frame %>% select("user_specimen_id", "bcoral_genet_id"), - by='user_specimen_id') %>% - drop_na(coral_mlg_rep_sample_id); + by='user_specimen_id'); write_data_frame(output_data_dir, "genotype.tabular", genotype_table_tibble); # Output the file needed for populating the person table.