From 743597e4c4025988a12db007fc398a28c4d38b32 Mon Sep 17 00:00:00 2001 From: Seng Chan You Date: Tue, 2 Oct 2018 17:45:33 +0900 Subject: [PATCH 1/3] adding language argument Signed-off-by: Seng Chan You --- R/ExtractorFromDictionary.R | 8 ++++++-- R/diction.R | 7 ++++++- R/getTopicFromNoteSettings.R | 10 +++++----- R/wordToCovariate.R | 2 +- 4 files changed, 18 insertions(+), 9 deletions(-) diff --git a/R/ExtractorFromDictionary.R b/R/ExtractorFromDictionary.R index 7389246..e4c1eb6 100644 --- a/R/ExtractorFromDictionary.R +++ b/R/ExtractorFromDictionary.R @@ -6,9 +6,13 @@ #' @export #' @examples #' ExtractorFromDictionary() -ExtractorFromDictionary <- function(doc.df){ +ExtractorFromDictionary <- function(doc.df,language){ - dictionary <- diction() + if(language=="Korean") { + dictionary = kor_dictionary_db + } else { + stop("Currently only Korean is available") + } colnames(dictionary) <- c('word') kor_tmp_word <- c() diff --git a/R/diction.R b/R/diction.R index 9396cd3..9814b4e 100644 --- a/R/diction.R +++ b/R/diction.R @@ -7,7 +7,12 @@ #' @examples #' diction() diction <- function(){ - return(kor_dictionary_db) + if(language=="Korean") { + dicDb = kor_dictionary_db + } else { + stop("Currently only Korean is available") + } + return(dicDb) } diff --git a/R/getTopicFromNoteSettings.R b/R/getTopicFromNoteSettings.R index a85be4a..998ca25 100644 --- a/R/getTopicFromNoteSettings.R +++ b/R/getTopicFromNoteSettings.R @@ -17,9 +17,9 @@ if(!require(rJava)) { install.packages('rJava') } -if(!require(KoNLP)) { - install.packages('KoNLP') -} +# if(!require(KoNLP)) { +# install.packages('KoNLP') +# } if(!require(devtools)) { install.packages('devtools') } @@ -52,7 +52,7 @@ library(caret) library(dplyr) library(text2vec) library(e1071) -useSejongDic() +# useSejongDic() @@ -104,7 +104,7 @@ getTopicFromNoteSettings <- function(connection, row_id <- rawCovariates$row_id covariates_value <- rawCovariates$covariate_id - covariates <- wordToCovariate(row_id,covariates_value,useDictionary) + covariates <- wordToCovariate(row_id,covariates_value,useDictionary,covariateSettings$language) # Convert colum names to camelCase: colnames(covariates) <- SqlRender::snakeCaseToCamelCase(colnames(covariates)) diff --git a/R/wordToCovariate.R b/R/wordToCovariate.R index 9f77ae4..6923d79 100644 --- a/R/wordToCovariate.R +++ b/R/wordToCovariate.R @@ -12,7 +12,7 @@ wordToCovariate <- function(rowid,covariatesvalue,useDictionary){ doc.df <- LanguagePreProcessingFunction(result_xml_df) - df <- ExtractorFromDictionary(doc.df) + df <- ExtractorFromDictionary(doc.df,language) df <- cbind(df,rep(1,nrow(df))) colnames(df) <- c('row_id','covariate_id','covariate_value') From 2d93ddf80842f79d348d2a588ac3ffc58918122a Mon Sep 17 00:00:00 2001 From: Seng Chan You Date: Tue, 2 Oct 2018 17:51:37 +0900 Subject: [PATCH 2/3] adding argument for language Signed-off-by: Seng Chan You --- R/createTopicFromNoteSettings.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/createTopicFromNoteSettings.R b/R/createTopicFromNoteSettings.R index beaa2e1..bb8500d 100644 --- a/R/createTopicFromNoteSettings.R +++ b/R/createTopicFromNoteSettings.R @@ -16,6 +16,7 @@ createTopicFromNoteSettings <- function(useTopicFromNote = TRUE, LatentDimensionForGlove = 100L, useAutoencoder=FALSE, LatentDimensionForAutoEncoder = 100L, + language="Korean", sampleSize=-1){ if(sum(useDictionary) == 0){ stop('Not implemented.') From c419f28bda11062936c118bb61495749b5f3ccea Mon Sep 17 00:00:00 2001 From: Seng Chan You Date: Tue, 2 Oct 2018 17:54:48 +0900 Subject: [PATCH 3/3] fix bugs for language arguments Signed-off-by: Seng Chan You --- R/getTopicFromNoteSettings.R | 2 +- R/wordToCovariate.R | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/getTopicFromNoteSettings.R b/R/getTopicFromNoteSettings.R index 998ca25..8a2bc15 100644 --- a/R/getTopicFromNoteSettings.R +++ b/R/getTopicFromNoteSettings.R @@ -104,7 +104,7 @@ getTopicFromNoteSettings <- function(connection, row_id <- rawCovariates$row_id covariates_value <- rawCovariates$covariate_id - covariates <- wordToCovariate(row_id,covariates_value,useDictionary,covariateSettings$language) + covariates <- wordToCovariate(row_id,covariates_value,useDictionary,language=covariateSettings$language) # Convert colum names to camelCase: colnames(covariates) <- SqlRender::snakeCaseToCamelCase(colnames(covariates)) diff --git a/R/wordToCovariate.R b/R/wordToCovariate.R index 6923d79..848fc8f 100644 --- a/R/wordToCovariate.R +++ b/R/wordToCovariate.R @@ -6,7 +6,7 @@ #' @export #' @examples #' wordToCovariate() -wordToCovariate <- function(rowid,covariatesvalue,useDictionary){ +wordToCovariate <- function(rowid,covariatesvalue,useDictionary,language){ result_xml_df <- NoteXmlParser(rowid,covariatesvalue)