Skip to content

Commit 6ba37da

Browse files
committed
add melt-based data.table implementations
1 parent 8a79193 commit 6ba37da

35 files changed

+223
-149
lines changed

DESCRIPTION

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ Package: cdata
22
Type: Package
33
Title: Fluid Data Transformations
44
Version: 0.7.2
5-
Date: 2018-06-25
5+
Date: 2018-07-07
66
Authors@R: c(
77
person("John", "Mount", email = "jmount@win-vector.com", role = c("aut", "cre")),
88
person("Nina", "Zumel", email = "nzumel@win-vector.com", role = c("aut")),
@@ -21,13 +21,16 @@ RoxygenNote: 6.0.1
2121
Depends:
2222
R (>= 3.2.1)
2323
Imports:
24-
wrapr (>= 1.5.0)
24+
wrapr (>= 1.5.0),
25+
stats
2526
Suggests:
2627
DBI,
2728
RSQLite,
2829
testthat,
2930
knitr,
30-
rmarkdown
31+
rmarkdown,
32+
data.table (>= 1.11.4),
33+
reshape2
3134
VignetteBuilder: knitr
3235
ByteCompile: true
3336

NAMESPACE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ export(qlook)
2626
export(rowrecs_to_blocks)
2727
export(rowrecs_to_blocks_q)
2828
export(unpivot_to_blocks)
29+
importFrom(stats,as.formula)
2930
importFrom(stats,complete.cases)
3031
importFrom(wrapr,"%.>%")
3132
importFrom(wrapr,"%:=%")

NEWS.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11

2-
# cdata 0.7.2 2018/06/25
2+
# cdata 0.7.2 2018/07/07
33

4+
* switch local ops to data.table implementation.
45
* re-export more of wrapr
56
* move db fns to rquery.
67

R/LocalOps.R

Lines changed: 110 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,8 @@
44
# in-memory direct functionality
55

66

7-
7+
#' @importFrom stats as.formula
8+
NULL
89

910

1011
#' Build a blocks_to_rowrecs()/rowrecs_to_blocks() control table that specifies a pivot from a \code{data.frame}.
@@ -87,6 +88,36 @@ build_unpivot_control <- function(nameForNewKeyColumn,
8788
}
8889

8990

91+
# unpack control table into maps
92+
build_transform_maps <- function(controlTable) {
93+
cCheck <- checkControlTable(controlTable, FALSE)
94+
if(!is.null(cCheck)) {
95+
stop(paste("cdata:::build_transform_maps", cCheck))
96+
}
97+
# use control table to get into a triple-form (only one data column, all others keys).
98+
cells <- as.character(unlist(unlist(controlTable[, -1])))
99+
cells_to_row_labels <- controlTable
100+
for(i in 1:nrow(controlTable)) {
101+
cells_to_row_labels[i, ] <- cells_to_row_labels[i, 1]
102+
}
103+
cells_to_row_labels <- as.character(unlist(cells_to_row_labels[, -1]))
104+
names(cells_to_row_labels) <- cells
105+
cells_to_col_labels <- controlTable
106+
for(j in 2:ncol(controlTable)) {
107+
cells_to_col_labels[, j] <- colnames(controlTable)[[j]]
108+
}
109+
cells_to_col_labels <- as.character(unlist(cells_to_col_labels[, -1]))
110+
names(cells_to_col_labels) <- cells
111+
rows_cols_to_cells <- cells
112+
names(rows_cols_to_cells) <- paste(cells_to_row_labels, ",", cells_to_col_labels)
113+
list(
114+
cells = cells,
115+
cells_to_row_labels = cells_to_row_labels,
116+
cells_to_col_labels = cells_to_col_labels,
117+
rows_cols_to_cells = rows_cols_to_cells
118+
)
119+
}
120+
90121

91122
#' Map a set of columns to rows (takes a \code{data.frame}).
92123
#'
@@ -122,6 +153,7 @@ build_unpivot_control <- function(nameForNewKeyColumn,
122153
#' @param columnsToCopy character array of column names to copy
123154
#' @param checkNames logical, if TRUE check names
124155
#' @param strict logical, if TRUE check control table name forms
156+
#' @param use_data_table logical if TRUE try to use data.table for the pivots.
125157
#' @return long table built by mapping wideTable to one row per group
126158
#'
127159
#' @seealso \code{\link{build_unpivot_control}}, \code{\link{blocks_to_rowrecs_q}}
@@ -143,14 +175,16 @@ rowrecs_to_blocks <- function(wideTable,
143175
...,
144176
checkNames = TRUE,
145177
strict = FALSE,
146-
columnsToCopy = NULL) {
178+
columnsToCopy = NULL,
179+
use_data_table = TRUE) {
147180
wrapr::stop_if_dot_args(substitute(list(...)), "cdata::rowrecs_to_blocks")
148181
if(!is.data.frame(wideTable)) {
149182
stop("cdata::rowrecs_to_blocks wideTable shoud be a data.frame")
150183
}
151184
if(!is.data.frame(controlTable)) {
152185
stop("cdata::rowrecs_to_blocks controlTable shoud be a data.frame")
153186
}
187+
rownames(wideTable) <- NULL
154188
cCheck <- checkControlTable(controlTable, strict)
155189
if(!is.null(cCheck)) {
156190
stop(paste("cdata::rowrecs_to_blocks", cCheck))
@@ -165,6 +199,35 @@ rowrecs_to_blocks <- function(wideTable,
165199
paste(badCells, collapse = ', ')))
166200
}
167201
}
202+
203+
if( use_data_table &&
204+
requireNamespace("data.table", quietly = TRUE) &&
205+
requireNamespace("reshape2", quietly = TRUE) ) {
206+
maps <- build_transform_maps(controlTable)
207+
208+
# from rowrec to one value per row form (triple-like)
209+
d_thin_r <- data.table::melt(data.table::as.data.table(wideTable),
210+
variable.name = "cdata_cell_label",
211+
value.name = "cdata_cell_value",
212+
id.vars = columnsToCopy,
213+
measure.vars = maps$cells)
214+
d_thin_r$cdata_row_label <- maps$cells_to_row_labels[d_thin_r$cdata_cell_label]
215+
d_thin_r$cdata_col_label <- maps$cells_to_col_labels[d_thin_r$cdata_cell_label]
216+
217+
# cast to block form
218+
f <- paste0(paste(c(columnsToCopy, "cdata_row_label"), collapse = " + "), " ~ ", "cdata_col_label")
219+
r <- data.table::dcast(d_thin_r, as.formula(f), value.var = "cdata_cell_value")
220+
colnames(r)[which(colnames(r)=="cdata_row_label")] <- colnames(controlTable)[[1]]
221+
rownames(r) <- NULL
222+
return(as.data.frame(r))
223+
}
224+
225+
if( use_data_table ) {
226+
warning("cdata::rowrecs_to_blocks use_data_table==TRUE requires data.table and reshape2 packages")
227+
}
228+
229+
# fall back to local impl
230+
168231
n_row_in <- nrow(wideTable)
169232
n_rep <- nrow(controlTable)
170233
n_row_res <- n_rep*n_row_in
@@ -194,6 +257,7 @@ rowrecs_to_blocks <- function(wideTable,
194257
res[[cn]][indxs] <- wideTable[[col]]
195258
}
196259
}
260+
rownames(res) <- NULL
197261
res
198262
}
199263

@@ -233,6 +297,7 @@ rowrecs_to_blocks <- function(wideTable,
233297
#' @param columnsToCopy character, extra columns to copy (aribrary which row per group).
234298
#' @param checkNames logical, if TRUE check names
235299
#' @param strict logical, if TRUE check control table name forms
300+
#' @param use_data_table logical if TRUE try to use data.table for the pivots.
236301
#' @return wide table built by mapping key-grouped tallTable rows to one row per group
237302
#'
238303
#' @seealso \code{\link{build_pivot_control}}, \code{\link{rowrecs_to_blocks_q}}
@@ -258,14 +323,23 @@ blocks_to_rowrecs <- function(tallTable,
258323
...,
259324
columnsToCopy = NULL,
260325
checkNames = TRUE,
261-
strict = FALSE) {
326+
strict = FALSE,
327+
use_data_table = TRUE) {
262328
wrapr::stop_if_dot_args(substitute(list(...)), "cdata::blocks_to_rowrecs")
263329
if(!is.data.frame(tallTable)) {
264330
stop("cdata::blocks_to_rowrecs tallTable shoud be a data.frame")
265331
}
266332
if(!is.data.frame(controlTable)) {
267333
stop("cdata::blocks_to_rowrecs controlTable shoud be a data.frame")
268334
}
335+
rownames(tallTable) <- NULL
336+
clear_key_column <- FALSE
337+
if(length(keyColumns)<=0) {
338+
# avoid no-keys case
339+
tallTable$cdata_key_column <- 1
340+
keyColumns <- "cdata_key_column"
341+
clear_key_column <- TRUE
342+
}
269343
cCheck <- checkControlTable(controlTable, strict)
270344
if(!is.null(cCheck)) {
271345
stop(paste("cdata::blocks_to_rowrecs", cCheck))
@@ -278,6 +352,38 @@ blocks_to_rowrecs <- function(tallTable,
278352
paste(badCells, collapse = ', ')))
279353
}
280354
}
355+
356+
if( use_data_table &&
357+
requireNamespace("data.table", quietly = TRUE) &&
358+
requireNamespace("reshape2", quietly = TRUE) ) {
359+
maps <- build_transform_maps(controlTable)
360+
361+
# from block form to one value per row form (triple-like)
362+
d_thin_b <- data.table::melt(data.table::as.data.table(tallTable),
363+
variable.name = "cdata_col_label",
364+
value.name = "cdata_cell_value",
365+
id.vars = c(keyColumns, colnames(controlTable)[[1]]),
366+
measure.vars = colnames(controlTable)[-1])
367+
d_thin_b$cdata_row_label <- d_thin_b[[colnames(controlTable)[[1]]]]
368+
d_thin_b[[colnames(controlTable)[[1]]]] <- NULL
369+
d_thin_b$cdata_cell_label <- maps$rows_cols_to_cells[paste(d_thin_b$cdata_row_label, ",", d_thin_b$cdata_col_label)]
370+
371+
# cast to rowrec form
372+
f <- paste0(paste(keyColumns, collapse = " + "), " ~ ", "cdata_cell_label")
373+
r <- data.table::dcast(d_thin_b, as.formula(f), value.var = "cdata_cell_value")
374+
if(clear_key_column) {
375+
r$cdata_key_column <- NULL
376+
}
377+
rownames(r) <- NULL
378+
return(as.data.frame(r))
379+
}
380+
381+
if( use_data_table ) {
382+
warning("cdata::blocks_to_rowrecs use_data_table==TRUE requires data.table and reshape2 packages")
383+
}
384+
385+
# fall back to local impl
386+
281387
# make simple grouping keys
282388
tallTable$cdata_group_key_col <- 1
283389
if(length(keyColumns)>=1) {
@@ -312,6 +418,7 @@ blocks_to_rowrecs <- function(tallTable,
312418
}
313419
}
314420
res$cdata_group_key_col <- NULL
421+
rownames(res) <- NULL
315422
res
316423
}
317424

R/RowsColsQ.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,8 @@ checkControlTable <- function(controlTable, strict) {
1616
if(nrow(controlTable)<1) {
1717
return("control table must have at least 1 row")
1818
}
19-
if(ncol(controlTable)<1) {
20-
return("control table must have at least 1 column")
19+
if(ncol(controlTable)<2) {
20+
return("control table must have at least 2 columns")
2121
}
2222
classes <- vapply(controlTable, class, character(1))
2323
if(!all(classes=='character')) {

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ tab <- blocks_to_rowrecs_q('d',
4848
qlook(my_db, tab)
4949
```
5050

51-
## table `mvtcq_89137749253854889914_0000000001` spark_connection spark_shell_connection DBIConnection
51+
## table `mvtcq_87375781106602338323_0000000001` spark_connection spark_shell_connection DBIConnection
5252
## nrow: 1
5353
## 'data.frame': 1 obs. of 2 variables:
5454
## $ AUC: num 0.6

cran-comments.md

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,18 +6,16 @@
66
* using R version 3.5.0 (2018-04-23)
77
* using platform: x86_64-apple-darwin15.6.0 (64-bit)
88

9-
109
* win-builder
11-
* using R Under development (unstable) (2018-06-15 r74904)
10+
* using R Under development (unstable) (2018-07-01 r74950)
1211
* using platform: x86_64-w64-mingw32 (64-bit)
13-
14-
## R CMD check --as-cran cdata_0.7.1.tar.gz
1512

16-
* using session charset: UTF-8
13+
## R CMD check --as-cran cdata_0.7.2.tar.gz
14+
1715
* using option ‘--as-cran’
1816
* checking for file ‘cdata/DESCRIPTION’ ... OK
1917
* checking extension type ... Package
20-
* this is package ‘cdata’ version ‘0.7.1
18+
* this is package ‘cdata’ version ‘0.7.2
2119
* package encoding: UTF-8
2220
* checking CRAN incoming feasibility ... Note_to_CRAN_maintainers
2321
Maintainer: ‘John Mount <jmount@win-vector.com>
@@ -32,3 +30,4 @@ Checked all declared dependencies:
3230

3331
Checking 1 packages: WVPlots
3432
Checked WVPlots: 0 errors | 0 warnings | 0 notes
33+

docs/articles/FrameTools.html

Lines changed: 3 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

docs/articles/cdata.html

Lines changed: 3 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

docs/index.html

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)