Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
e2d9eff
Implement left join of two datasets with working test case.
JosephTLockwood Jun 30, 2023
e5d2488
Add description for leftJoin
JosephTLockwood Jun 30, 2023
a2a4991
Change type of columnIndex to Int
JosephTLockwood Jun 30, 2023
881d584
Set temp value
JosephTLockwood Jun 30, 2023
7c8d081
Clean up
JosephTLockwood Jun 30, 2023
9577aa9
Clean up
JosephTLockwood Jun 30, 2023
51f3c0f
Add splitter of dataset
JosephTLockwood Jul 5, 2023
080eb6c
Update withClue
JosephTLockwood Jul 5, 2023
f443427
Get rid of illegal reflective access warning
JosephTLockwood Jul 5, 2023
9009d16
Merge branch 'IgnitionModuleDevelopmentCommunity:main' into left-join
JosephTLockwood Jul 11, 2023
a91c538
Get rid of illegal reflective access warning
JosephTLockwood Jul 11, 2023
8199b0e
Remove unnecessary lambda
paul-griffith Jul 12, 2023
fdc341c
Update inductiveautomation/ignition Docker tag to v8.1.29
renovate[bot] Jul 7, 2023
bdb8087
Implement left join of two datasets with working test case.
JosephTLockwood Jun 30, 2023
df77faf
Add description for leftJoin
JosephTLockwood Jun 30, 2023
8b64dbe
Change type of columnIndex to Int
JosephTLockwood Jun 30, 2023
9920015
Set temp value
JosephTLockwood Jun 30, 2023
ea28eb3
Clean up
JosephTLockwood Jun 30, 2023
8d32fad
Clean up
JosephTLockwood Jun 30, 2023
9aa819a
Add splitter of dataset
JosephTLockwood Jul 5, 2023
6608026
Merge remote-tracking branch 'origin/left-join' into left-join
JosephTLockwood Jul 19, 2023
7ec8fa7
Add description for leftJoin
JosephTLockwood Jun 30, 2023
edea280
Add splitter of dataset
JosephTLockwood Jul 5, 2023
1082b0d
Merge remote-tracking branch 'origin/left-join' into left-join
JosephTLockwood Jul 19, 2023
f98b789
Convert to PyObject
JosephTLockwood Jul 20, 2023
9ab8ad2
Clean up
JosephTLockwood Jul 20, 2023
d06742f
Add right, left, inner, outer to joiner function.
JosephTLockwood Jul 21, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions common/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -17,5 +17,7 @@ dependencies {
tasks {
withType<Test> {
useJUnitPlatform()

jvmArgs = listOf("--add-opens", "java.base/java.io=ALL-UNNAMED")
}
}
285 changes: 285 additions & 0 deletions common/src/main/kotlin/org/imdc/extensions/common/DatasetExtensions.kt
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,291 @@ object DatasetExtensions {
return builder.build()
}

@Suppress("unused")
@ScriptFunction(docBundlePrefix = "DatasetExtensions")
@KeywordArgs(
names = ["leftDataset", "rightDataset", "joinType", "joinOn"],
types = [Dataset::class, Dataset::class, String::class, PyObject::class],
)
fun joiner(args: Array<PyObject>, keywords: Array<String>): Dataset? {
val parsedArgs = PyArgParser.parseArgs(
args,
keywords,
arrayOf("leftDataset", "rightDataset", "joinType", "joinOn"),
arrayOf(Dataset::class.java, Dataset::class.java, String::class.java, PyObject::class.java),
"joiner",
)
val leftDataset = parsedArgs.requirePyObject("leftDataset").toJava<Dataset>()
val rightDataset = parsedArgs.requirePyObject("rightDataset").toJava<Dataset>()
val joinType = parsedArgs.requirePyObject("joinType").toJava<String>()
val joinOn = parsedArgs.requirePyObject("joinOn") as PyFunction
return when (joinType) {
"left" -> {
leftJoin(leftDataset, rightDataset, joinOn)
}
"right" -> {
rightJoin(leftDataset, rightDataset, joinOn)
}
"inner" -> {
innerJoin(leftDataset, rightDataset, joinOn)
}
"outer" -> {
outerJoin(leftDataset, rightDataset, joinOn)
}
else -> {
throw Py.ValueError("joinType must be one of 'left', 'right', 'inner', or 'outer'")
}
}
}

@Suppress("unused")
private fun leftJoin(leftDataset: Dataset, rightDataset: Dataset, joinOn: PyFunction): Dataset? {
val leftColumnCount = leftDataset.columnCount
val rightColumnCount = rightDataset.columnCount

val leftColumnNames = leftDataset.columnNames.toList()
val rightColumnNames = rightDataset.columnNames.toList()
val combinedColumnNames = leftColumnNames + rightColumnNames

val leftColumnTypes = leftDataset.columnTypes.toList()
val rightColumnTypes = rightDataset.columnTypes.toList()
val combinedColumnTypes = leftColumnTypes + rightColumnTypes

val builder = DatasetBuilder.newBuilder()
.colNames(combinedColumnNames)
.colTypes(combinedColumnTypes)

for (leftRow in leftDataset.rowIndices) {
var matchingRow = -1
val leftRowValues = Array(leftColumnCount) { col ->
leftDataset[leftRow, col]
}

for (rightRow in rightDataset.rowIndices) {
val rightRowValues = Array(rightColumnCount) { col ->
rightDataset[rightRow, col]
}

if (joinOn.__call__(Py.java2py(leftRowValues), Py.java2py(rightRowValues)).toJava()) {
matchingRow = rightRow
break
}
}

val rightRowValues = if (matchingRow != -1) {
Array(rightColumnCount) { col ->
rightDataset[matchingRow, col]
}
} else {
Array<Any?>(rightColumnCount) { null }
}

val totalArray = leftRowValues.toMutableList() + rightRowValues.toMutableList()
builder.addRow(*totalArray.toTypedArray())
}

return builder.build()
}

private fun rightJoin(leftDataset: Dataset, rightDataset: Dataset, joinOn: PyFunction): Dataset? {
val leftColumnCount = leftDataset.columnCount
val rightColumnCount = rightDataset.columnCount

val leftColumnNames = leftDataset.columnNames.toList()
val rightColumnNames = rightDataset.columnNames.toList()
val combinedColumnNames = leftColumnNames + rightColumnNames

val leftColumnTypes = leftDataset.columnTypes.toList()
val rightColumnTypes = rightDataset.columnTypes.toList()
val combinedColumnTypes = leftColumnTypes + rightColumnTypes

val builder = DatasetBuilder.newBuilder()
.colNames(combinedColumnNames)
.colTypes(combinedColumnTypes)

for (rightRow in rightDataset.rowIndices) {
var matchingRow = -1
val rightRowValues = Array(rightColumnCount) { col ->
rightDataset[rightRow, col]
}

for (leftRow in leftDataset.rowIndices) {
val leftRowValues = Array(leftColumnCount) { col ->
leftDataset[leftRow, col]
}

if (joinOn.__call__(Py.java2py(leftRowValues), Py.java2py(rightRowValues)).toJava()) {
matchingRow = rightRow
break
}
}

val leftRowValues = if (matchingRow != -1) {
Array(leftColumnCount) { col ->
leftDataset[matchingRow, col]
}
} else {
Array<Any?>(leftColumnCount) { null }
}

val totalArray = leftRowValues.toMutableList() + rightRowValues.toMutableList()
builder.addRow(*totalArray.toTypedArray())
}

return builder.build()
}

private fun innerJoin(leftDataset: Dataset, rightDataset: Dataset, joinOn: PyFunction): Dataset? {
val leftColumnCount = leftDataset.columnCount
val rightColumnCount = rightDataset.columnCount

val leftColumnNames = leftDataset.columnNames.toList()
val rightColumnNames = rightDataset.columnNames.toList()
val combinedColumnNames = leftColumnNames + rightColumnNames

val leftColumnTypes = leftDataset.columnTypes.toList()
val rightColumnTypes = rightDataset.columnTypes.toList()
val combinedColumnTypes = leftColumnTypes + rightColumnTypes

val builder = DatasetBuilder.newBuilder()
.colNames(combinedColumnNames)
.colTypes(combinedColumnTypes)

for (leftRow in leftDataset.rowIndices) {
val leftRowValues = Array(leftColumnCount) { col ->
leftDataset[leftRow, col]
}

for (rightRow in rightDataset.rowIndices) {
val rightRowValues = Array(rightColumnCount) { col ->
rightDataset[rightRow, col]
}

if (joinOn.__call__(Py.java2py(leftRowValues), Py.java2py(rightRowValues)).toJava()) {
val totalArray = leftRowValues.toMutableList() + rightRowValues.toMutableList()
builder.addRow(*totalArray.toTypedArray())
}
}
}

return builder.build()
}

private fun outerJoin(leftDataset: Dataset, rightDataset: Dataset, joinOn: PyFunction): Dataset? {
val leftColumnCount = leftDataset.columnCount
val rightColumnCount = rightDataset.columnCount

val leftColumnNames = leftDataset.columnNames.toList()
val rightColumnNames = rightDataset.columnNames.toList()
val combinedColumnNames = leftColumnNames + rightColumnNames

val leftColumnTypes = leftDataset.columnTypes.toList()
val rightColumnTypes = rightDataset.columnTypes.toList()
val combinedColumnTypes = leftColumnTypes + rightColumnTypes

val builder = DatasetBuilder.newBuilder()
.colNames(combinedColumnNames)
.colTypes(combinedColumnTypes)

for (leftRow in leftDataset.rowIndices) {
val leftRowValues = Array(leftColumnCount) { col ->
leftDataset[leftRow, col]
}

var matched = false

for (rightRow in rightDataset.rowIndices) {
val rightRowValues = Array(rightColumnCount) { col ->
rightDataset[rightRow, col]
}

if (joinOn.__call__(Py.java2py(leftRowValues), Py.java2py(rightRowValues)).toJava()) {
matched = true
val totalArray = leftRowValues.toMutableList() + rightRowValues.toMutableList()
builder.addRow(*totalArray.toTypedArray())
}
}

if (!matched) {
// If no match found for the left row, add null values for the right dataset columns
val rightRowValues = Array<Any?>(rightColumnCount) { null }
val totalArray = leftRowValues.toMutableList() + rightRowValues.toMutableList()
builder.addRow(*totalArray.toTypedArray())
}
}

// Add rows from the right dataset that don't have a match in the left dataset
for (rightRow in rightDataset.rowIndices) {
val rightRowValues = Array(rightColumnCount) { col ->
rightDataset[rightRow, col]
}

var matched = false

for (leftRow in leftDataset.rowIndices) {
val leftRowValues = Array(leftColumnCount) { col ->
leftDataset[leftRow, col]
}

if (joinOn.__call__(Py.java2py(leftRowValues), Py.java2py(rightRowValues)).toJava()) {
matched = true
break
}
}

if (!matched) {
// If no match found for the right row, add null values for the left dataset columns
val leftRowValues = Array<Any?>(leftColumnCount) { null }
val totalArray = leftRowValues.toMutableList() + rightRowValues.toMutableList()
builder.addRow(*totalArray.toTypedArray())
}
}

return builder.build()
}

@Suppress("unused")
@ScriptFunction(docBundlePrefix = "DatasetExtensions")
@KeywordArgs(
names = ["dataset", "columnsToSplit"],
types = [Dataset::class, Array<Array<Int>>::class],
)
fun splitter(args: Array<PyObject>, keywords: Array<String>): Array<Dataset?> {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Returning as an array is a little weird - a plain list would be more typical for Python/Java/Ignition

val parsedArgs = PyArgParser.parseArgs(
args,
keywords,
arrayOf("dataset", "columnsToSplit"),
arrayOf(Dataset::class.java, PyObject::class.java),
"splitter",
)
val dataset = parsedArgs.requirePyObject("dataset").toJava<Dataset>()
val columnsToSplit = parsedArgs.requirePyObject("columnsToSplit").toJava<Array<Array<Int>>>()
val datasetSplit = Array<Dataset?>(columnsToSplit.size) { null }

for ((currentDataset, newDataSets) in columnsToSplit.withIndex()) {
val columnNames = mutableListOf<String>()
val columnTypes = mutableListOf<Class<*>>()

newDataSets.forEachIndexed { _, column ->
columnNames.add(dataset.columnNames[column])
columnTypes.add(dataset.columnTypes[column])
}

val builder = DatasetBuilder.newBuilder()
.colNames(columnNames)
.colTypes(columnTypes)

for (row in dataset.rowIndices) {
val listToAppend = newDataSets.map { dataset[row, it] }.toTypedArray()
builder.addRow(*listToAppend)
}

datasetSplit[currentDataset] = builder.build()
}

return datasetSplit
}

@Suppress("unused")
@ScriptFunction(docBundlePrefix = "DatasetExtensions")
@KeywordArgs(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,13 @@ map.param.mapper=A callable reference to invoke for each row. Will receive each
map.param.preserveColumnTypes=True if the types of the output dataset should match the input. Otherwise, the output dataset will lose type information.
map.returns=A modified dataset.

joiner.desc=Joins two datasets together, based on a list of columns.
joiner.param.leftDataset=The left dataset. Must not be null.
joiner.param.rightDataset=The right dataset. Must not be null.
joiner.param.joinType=The type of join to perform. Must be one of "inner", "left", "right", or "outer". Must not be null.
joiner.param.joinOn=Expression to join on. Must use column indexes. Must not be null.
joiner.return=The joined dataset.

filter.desc=Runs a filtering function on each row in a dataset, returning a truncated dataset.
filter.param.dataset=The dataset to filter. Must not be null.
filter.param.filter=A function to run on each row. Will be called with keyword arguments matching column names. The first argument will be named 'row' and is the row index. Return True to keep the row in the output dataset.
Expand Down Expand Up @@ -36,6 +43,11 @@ equals.param.dataset1=The first dataset. Must not be null.
equals.param.dataset2=The second dataset. Must not be null.
equals.returns=True if the two datasets have the same number of columns, with the same types, in the same order, with the same data in each row.

splitter.desc=Splits a dataset into any number of datasets, based on a list of columns.
splitter.param.dataset=Dataset to split. Must not be null.
splitter.param.columnsToSplit=List of columns that you would like to split dataset into
splitter.returns=List of datasets

valuesEqual.desc=Compares two datasets' content.
valuesEqual.param.dataset1=The first dataset. Must not be null.
valuesEqual.param.dataset2=The second dataset. Must not be null.
Expand Down
Loading