diff --git a/R/ExtractorFromDictionary.R b/R/ExtractorFromDictionary.R index 7389246..e4c1eb6 100644 --- a/R/ExtractorFromDictionary.R +++ b/R/ExtractorFromDictionary.R @@ -6,9 +6,13 @@ #' @export #' @examples #' ExtractorFromDictionary() -ExtractorFromDictionary <- function(doc.df){ +ExtractorFromDictionary <- function(doc.df,language){ - dictionary <- diction() + if(language=="Korean") { + dictionary = kor_dictionary_db + } else { + stop("Currently only Korean is available") + } colnames(dictionary) <- c('word') kor_tmp_word <- c() diff --git a/R/createTopicFromNoteSettings.R b/R/createTopicFromNoteSettings.R index beaa2e1..bb8500d 100644 --- a/R/createTopicFromNoteSettings.R +++ b/R/createTopicFromNoteSettings.R @@ -16,6 +16,7 @@ createTopicFromNoteSettings <- function(useTopicFromNote = TRUE, LatentDimensionForGlove = 100L, useAutoencoder=FALSE, LatentDimensionForAutoEncoder = 100L, + language="Korean", sampleSize=-1){ if(sum(useDictionary) == 0){ stop('Not implemented.') diff --git a/R/diction.R b/R/diction.R index 9396cd3..9814b4e 100644 --- a/R/diction.R +++ b/R/diction.R @@ -7,7 +7,12 @@ #' @examples #' diction() diction <- function(){ - return(kor_dictionary_db) + if(language=="Korean") { + dicDb = kor_dictionary_db + } else { + stop("Currently only Korean is available") + } + return(dicDb) } diff --git a/R/getTopicFromNoteSettings.R b/R/getTopicFromNoteSettings.R index a85be4a..8a2bc15 100644 --- a/R/getTopicFromNoteSettings.R +++ b/R/getTopicFromNoteSettings.R @@ -17,9 +17,9 @@ if(!require(rJava)) { install.packages('rJava') } -if(!require(KoNLP)) { - install.packages('KoNLP') -} +# if(!require(KoNLP)) { +# install.packages('KoNLP') +# } if(!require(devtools)) { install.packages('devtools') } @@ -52,7 +52,7 @@ library(caret) library(dplyr) library(text2vec) library(e1071) -useSejongDic() +# useSejongDic() @@ -104,7 +104,7 @@ getTopicFromNoteSettings <- function(connection, row_id <- rawCovariates$row_id covariates_value <- rawCovariates$covariate_id - covariates <- wordToCovariate(row_id,covariates_value,useDictionary) + covariates <- wordToCovariate(row_id,covariates_value,useDictionary,language=covariateSettings$language) # Convert colum names to camelCase: colnames(covariates) <- SqlRender::snakeCaseToCamelCase(colnames(covariates)) diff --git a/R/wordToCovariate.R b/R/wordToCovariate.R index 9f77ae4..848fc8f 100644 --- a/R/wordToCovariate.R +++ b/R/wordToCovariate.R @@ -6,13 +6,13 @@ #' @export #' @examples #' wordToCovariate() -wordToCovariate <- function(rowid,covariatesvalue,useDictionary){ +wordToCovariate <- function(rowid,covariatesvalue,useDictionary,language){ result_xml_df <- NoteXmlParser(rowid,covariatesvalue) doc.df <- LanguagePreProcessingFunction(result_xml_df) - df <- ExtractorFromDictionary(doc.df) + df <- ExtractorFromDictionary(doc.df,language) df <- cbind(df,rep(1,nrow(df))) colnames(df) <- c('row_id','covariate_id','covariate_value')