diff --git a/common/build.gradle.kts b/common/build.gradle.kts index 856193a..8b2ea26 100644 --- a/common/build.gradle.kts +++ b/common/build.gradle.kts @@ -17,5 +17,7 @@ dependencies { tasks { withType { useJUnitPlatform() + + jvmArgs = listOf("--add-opens", "java.base/java.io=ALL-UNNAMED") } } diff --git a/common/src/main/kotlin/org/imdc/extensions/common/DatasetExtensions.kt b/common/src/main/kotlin/org/imdc/extensions/common/DatasetExtensions.kt index 5b31d60..0101176 100644 --- a/common/src/main/kotlin/org/imdc/extensions/common/DatasetExtensions.kt +++ b/common/src/main/kotlin/org/imdc/extensions/common/DatasetExtensions.kt @@ -73,6 +73,291 @@ object DatasetExtensions { return builder.build() } + @Suppress("unused") + @ScriptFunction(docBundlePrefix = "DatasetExtensions") + @KeywordArgs( + names = ["leftDataset", "rightDataset", "joinType", "joinOn"], + types = [Dataset::class, Dataset::class, String::class, PyObject::class], + ) + fun joiner(args: Array, keywords: Array): Dataset? { + val parsedArgs = PyArgParser.parseArgs( + args, + keywords, + arrayOf("leftDataset", "rightDataset", "joinType", "joinOn"), + arrayOf(Dataset::class.java, Dataset::class.java, String::class.java, PyObject::class.java), + "joiner", + ) + val leftDataset = parsedArgs.requirePyObject("leftDataset").toJava() + val rightDataset = parsedArgs.requirePyObject("rightDataset").toJava() + val joinType = parsedArgs.requirePyObject("joinType").toJava() + val joinOn = parsedArgs.requirePyObject("joinOn") as PyFunction + return when (joinType) { + "left" -> { + leftJoin(leftDataset, rightDataset, joinOn) + } + "right" -> { + rightJoin(leftDataset, rightDataset, joinOn) + } + "inner" -> { + innerJoin(leftDataset, rightDataset, joinOn) + } + "outer" -> { + outerJoin(leftDataset, rightDataset, joinOn) + } + else -> { + throw Py.ValueError("joinType must be one of 'left', 'right', 'inner', or 'outer'") + } + } + } + + @Suppress("unused") + private fun leftJoin(leftDataset: Dataset, rightDataset: Dataset, joinOn: PyFunction): Dataset? { + val leftColumnCount = leftDataset.columnCount + val rightColumnCount = rightDataset.columnCount + + val leftColumnNames = leftDataset.columnNames.toList() + val rightColumnNames = rightDataset.columnNames.toList() + val combinedColumnNames = leftColumnNames + rightColumnNames + + val leftColumnTypes = leftDataset.columnTypes.toList() + val rightColumnTypes = rightDataset.columnTypes.toList() + val combinedColumnTypes = leftColumnTypes + rightColumnTypes + + val builder = DatasetBuilder.newBuilder() + .colNames(combinedColumnNames) + .colTypes(combinedColumnTypes) + + for (leftRow in leftDataset.rowIndices) { + var matchingRow = -1 + val leftRowValues = Array(leftColumnCount) { col -> + leftDataset[leftRow, col] + } + + for (rightRow in rightDataset.rowIndices) { + val rightRowValues = Array(rightColumnCount) { col -> + rightDataset[rightRow, col] + } + + if (joinOn.__call__(Py.java2py(leftRowValues), Py.java2py(rightRowValues)).toJava()) { + matchingRow = rightRow + break + } + } + + val rightRowValues = if (matchingRow != -1) { + Array(rightColumnCount) { col -> + rightDataset[matchingRow, col] + } + } else { + Array(rightColumnCount) { null } + } + + val totalArray = leftRowValues.toMutableList() + rightRowValues.toMutableList() + builder.addRow(*totalArray.toTypedArray()) + } + + return builder.build() + } + + private fun rightJoin(leftDataset: Dataset, rightDataset: Dataset, joinOn: PyFunction): Dataset? { + val leftColumnCount = leftDataset.columnCount + val rightColumnCount = rightDataset.columnCount + + val leftColumnNames = leftDataset.columnNames.toList() + val rightColumnNames = rightDataset.columnNames.toList() + val combinedColumnNames = leftColumnNames + rightColumnNames + + val leftColumnTypes = leftDataset.columnTypes.toList() + val rightColumnTypes = rightDataset.columnTypes.toList() + val combinedColumnTypes = leftColumnTypes + rightColumnTypes + + val builder = DatasetBuilder.newBuilder() + .colNames(combinedColumnNames) + .colTypes(combinedColumnTypes) + + for (rightRow in rightDataset.rowIndices) { + var matchingRow = -1 + val rightRowValues = Array(rightColumnCount) { col -> + rightDataset[rightRow, col] + } + + for (leftRow in leftDataset.rowIndices) { + val leftRowValues = Array(leftColumnCount) { col -> + leftDataset[leftRow, col] + } + + if (joinOn.__call__(Py.java2py(leftRowValues), Py.java2py(rightRowValues)).toJava()) { + matchingRow = rightRow + break + } + } + + val leftRowValues = if (matchingRow != -1) { + Array(leftColumnCount) { col -> + leftDataset[matchingRow, col] + } + } else { + Array(leftColumnCount) { null } + } + + val totalArray = leftRowValues.toMutableList() + rightRowValues.toMutableList() + builder.addRow(*totalArray.toTypedArray()) + } + + return builder.build() + } + + private fun innerJoin(leftDataset: Dataset, rightDataset: Dataset, joinOn: PyFunction): Dataset? { + val leftColumnCount = leftDataset.columnCount + val rightColumnCount = rightDataset.columnCount + + val leftColumnNames = leftDataset.columnNames.toList() + val rightColumnNames = rightDataset.columnNames.toList() + val combinedColumnNames = leftColumnNames + rightColumnNames + + val leftColumnTypes = leftDataset.columnTypes.toList() + val rightColumnTypes = rightDataset.columnTypes.toList() + val combinedColumnTypes = leftColumnTypes + rightColumnTypes + + val builder = DatasetBuilder.newBuilder() + .colNames(combinedColumnNames) + .colTypes(combinedColumnTypes) + + for (leftRow in leftDataset.rowIndices) { + val leftRowValues = Array(leftColumnCount) { col -> + leftDataset[leftRow, col] + } + + for (rightRow in rightDataset.rowIndices) { + val rightRowValues = Array(rightColumnCount) { col -> + rightDataset[rightRow, col] + } + + if (joinOn.__call__(Py.java2py(leftRowValues), Py.java2py(rightRowValues)).toJava()) { + val totalArray = leftRowValues.toMutableList() + rightRowValues.toMutableList() + builder.addRow(*totalArray.toTypedArray()) + } + } + } + + return builder.build() + } + + private fun outerJoin(leftDataset: Dataset, rightDataset: Dataset, joinOn: PyFunction): Dataset? { + val leftColumnCount = leftDataset.columnCount + val rightColumnCount = rightDataset.columnCount + + val leftColumnNames = leftDataset.columnNames.toList() + val rightColumnNames = rightDataset.columnNames.toList() + val combinedColumnNames = leftColumnNames + rightColumnNames + + val leftColumnTypes = leftDataset.columnTypes.toList() + val rightColumnTypes = rightDataset.columnTypes.toList() + val combinedColumnTypes = leftColumnTypes + rightColumnTypes + + val builder = DatasetBuilder.newBuilder() + .colNames(combinedColumnNames) + .colTypes(combinedColumnTypes) + + for (leftRow in leftDataset.rowIndices) { + val leftRowValues = Array(leftColumnCount) { col -> + leftDataset[leftRow, col] + } + + var matched = false + + for (rightRow in rightDataset.rowIndices) { + val rightRowValues = Array(rightColumnCount) { col -> + rightDataset[rightRow, col] + } + + if (joinOn.__call__(Py.java2py(leftRowValues), Py.java2py(rightRowValues)).toJava()) { + matched = true + val totalArray = leftRowValues.toMutableList() + rightRowValues.toMutableList() + builder.addRow(*totalArray.toTypedArray()) + } + } + + if (!matched) { + // If no match found for the left row, add null values for the right dataset columns + val rightRowValues = Array(rightColumnCount) { null } + val totalArray = leftRowValues.toMutableList() + rightRowValues.toMutableList() + builder.addRow(*totalArray.toTypedArray()) + } + } + + // Add rows from the right dataset that don't have a match in the left dataset + for (rightRow in rightDataset.rowIndices) { + val rightRowValues = Array(rightColumnCount) { col -> + rightDataset[rightRow, col] + } + + var matched = false + + for (leftRow in leftDataset.rowIndices) { + val leftRowValues = Array(leftColumnCount) { col -> + leftDataset[leftRow, col] + } + + if (joinOn.__call__(Py.java2py(leftRowValues), Py.java2py(rightRowValues)).toJava()) { + matched = true + break + } + } + + if (!matched) { + // If no match found for the right row, add null values for the left dataset columns + val leftRowValues = Array(leftColumnCount) { null } + val totalArray = leftRowValues.toMutableList() + rightRowValues.toMutableList() + builder.addRow(*totalArray.toTypedArray()) + } + } + + return builder.build() + } + + @Suppress("unused") + @ScriptFunction(docBundlePrefix = "DatasetExtensions") + @KeywordArgs( + names = ["dataset", "columnsToSplit"], + types = [Dataset::class, Array>::class], + ) + fun splitter(args: Array, keywords: Array): Array { + val parsedArgs = PyArgParser.parseArgs( + args, + keywords, + arrayOf("dataset", "columnsToSplit"), + arrayOf(Dataset::class.java, PyObject::class.java), + "splitter", + ) + val dataset = parsedArgs.requirePyObject("dataset").toJava() + val columnsToSplit = parsedArgs.requirePyObject("columnsToSplit").toJava>>() + val datasetSplit = Array(columnsToSplit.size) { null } + + for ((currentDataset, newDataSets) in columnsToSplit.withIndex()) { + val columnNames = mutableListOf() + val columnTypes = mutableListOf>() + + newDataSets.forEachIndexed { _, column -> + columnNames.add(dataset.columnNames[column]) + columnTypes.add(dataset.columnTypes[column]) + } + + val builder = DatasetBuilder.newBuilder() + .colNames(columnNames) + .colTypes(columnTypes) + + for (row in dataset.rowIndices) { + val listToAppend = newDataSets.map { dataset[row, it] }.toTypedArray() + builder.addRow(*listToAppend) + } + + datasetSplit[currentDataset] = builder.build() + } + + return datasetSplit + } + @Suppress("unused") @ScriptFunction(docBundlePrefix = "DatasetExtensions") @KeywordArgs( diff --git a/common/src/main/resources/org/imdc/extensions/common/DatasetExtensions.properties b/common/src/main/resources/org/imdc/extensions/common/DatasetExtensions.properties index b7a5530..7499a28 100644 --- a/common/src/main/resources/org/imdc/extensions/common/DatasetExtensions.properties +++ b/common/src/main/resources/org/imdc/extensions/common/DatasetExtensions.properties @@ -4,6 +4,13 @@ map.param.mapper=A callable reference to invoke for each row. Will receive each map.param.preserveColumnTypes=True if the types of the output dataset should match the input. Otherwise, the output dataset will lose type information. map.returns=A modified dataset. +joiner.desc=Joins two datasets together, based on a list of columns. +joiner.param.leftDataset=The left dataset. Must not be null. +joiner.param.rightDataset=The right dataset. Must not be null. +joiner.param.joinType=The type of join to perform. Must be one of "inner", "left", "right", or "outer". Must not be null. +joiner.param.joinOn=Expression to join on. Must use column indexes. Must not be null. +joiner.return=The joined dataset. + filter.desc=Runs a filtering function on each row in a dataset, returning a truncated dataset. filter.param.dataset=The dataset to filter. Must not be null. filter.param.filter=A function to run on each row. Will be called with keyword arguments matching column names. The first argument will be named 'row' and is the row index. Return True to keep the row in the output dataset. @@ -36,6 +43,11 @@ equals.param.dataset1=The first dataset. Must not be null. equals.param.dataset2=The second dataset. Must not be null. equals.returns=True if the two datasets have the same number of columns, with the same types, in the same order, with the same data in each row. +splitter.desc=Splits a dataset into any number of datasets, based on a list of columns. +splitter.param.dataset=Dataset to split. Must not be null. +splitter.param.columnsToSplit=List of columns that you would like to split dataset into +splitter.returns=List of datasets + valuesEqual.desc=Compares two datasets' content. valuesEqual.param.dataset1=The first dataset. Must not be null. valuesEqual.param.dataset2=The second dataset. Must not be null. diff --git a/common/src/test/kotlin/org/imdc/extensions/common/DatasetExtensionsTests.kt b/common/src/test/kotlin/org/imdc/extensions/common/DatasetExtensionsTests.kt index b7b2dce..cdd7af3 100644 --- a/common/src/test/kotlin/org/imdc/extensions/common/DatasetExtensionsTests.kt +++ b/common/src/test/kotlin/org/imdc/extensions/common/DatasetExtensionsTests.kt @@ -3,6 +3,7 @@ package org.imdc.extensions.common import com.inductiveautomation.ignition.common.BasicDataset import com.inductiveautomation.ignition.common.Dataset import com.inductiveautomation.ignition.common.util.DatasetBuilder +import io.kotest.assertions.asClue import io.kotest.assertions.withClue import io.kotest.engine.spec.tempfile import io.kotest.matchers.shouldBe @@ -16,12 +17,50 @@ class DatasetExtensionsTests : JythonTest( { globals -> globals["utils"] = DatasetExtensions globals["builder"] = DatasetBuilder.newBuilder() + globals["outerJoin1"] = DatasetBuilder.newBuilder() + .colNames("EmpID", "EmpName", "City", "Designation") + .colTypes(Int::class.javaObjectType, String::class.java, String::class.java, String::class.java) + .addRow(1, "Charlotte Robinson", "Chicago", "Consultant") + .addRow(2, "Madison Phillips", "Dallas", "Senior Analyst") + .addRow(3, "Emma Hernandez", "Phoenix", "Senior Analyst") + .addRow(4, "Samantha Sanchez", "San Diego", "Principal Conultant") + .addRow(5, "Sadie Ward", "San Antonio", "Consultant") + .addRow(6, "Savannah Perez", "New York", "Principal Conultant") + .addRow(7, "Victoria Gray", "Los Angeles", "Assistant") + .addRow(8, "Alyssa Lewis", "Houston", "Consultant") + .addRow(9, "Anna Lee", "San Jose", "Principal Conultant") + .addRow(10, "Riley Hall", "Philadelphia", "Senior Analyst") + .build() + globals["outerJoin2"] = DatasetBuilder.newBuilder() + .colNames("EmpID", "Department_ID", "DepartmentName") + .colTypes(Int::class.javaObjectType, Int::class.javaObjectType, String::class.java) + .addRow(1, 0, "Executive") + .addRow(2, 1, "Document Control") + .addRow(3, 2, "Finance") + .addRow(4, 3, "Engineering") + .addRow(5, 4, "Facilities and Maintenance") + .addRow(6, 2, "Finance") + .addRow(10, 4, "Facilities and Maintenance") + .build() + globals["dataset"] = DatasetBuilder.newBuilder() .colNames("a", "b", "c") .colTypes(Int::class.javaObjectType, Double::class.javaObjectType, String::class.java) .addRow(1, 3.14, "pi") .addRow(2, 6.28, "tau") .build() + globals["dataset2"] = DatasetBuilder.newBuilder() + .colNames("a", "b2", "c2") + .colTypes(Int::class.javaObjectType, Double::class.javaObjectType, String::class.java) + .addRow(1, 3.1415, "pi2") + .addRow(2, 56, "tau2") + .build() + + val tempArray: Array> = arrayOf( + arrayOf(0, 1, 2), + arrayOf(3), + ) + globals["splitAt"] = tempArray val excelSample = DatasetExtensionsTests::class.java.getResourceAsStream("sample.xlsx")!!.readAllBytes() @@ -56,10 +95,12 @@ class DatasetExtensionsTests : JythonTest( ) { private fun Dataset.asClue(assertions: (Dataset) -> Unit) { withClue( - lazy { - buildString { - printDataset(this, this@asClue, true) - } + { + lazy { + buildString { + printDataset(this, this@asClue, true) + } + }.value }, ) { assertions(this) @@ -103,6 +144,106 @@ class DatasetExtensionsTests : JythonTest( } } + context("Left Join test") { + test("Left Join") { + eval("utils.joiner(dataset, dataset2, 'left', lambda d1, d2: (d1[0] == d2[0]))").asClue { + it.columnNames shouldBe listOf("a", "b", "c", "a", "b2", "c2") + it.columnTypes shouldBe listOf( + Int::class.javaObjectType, + Double::class.javaObjectType, + String::class.java, + Int::class.javaObjectType, + Double::class.javaObjectType, + String::class.java, + ) + it.rowCount shouldBe 2 + it.getColumnAsList(0) shouldBe listOf(1, 2) + it.getColumnAsList(1) shouldBe listOf(3.14, 6.28) + it.getColumnAsList(2) shouldBe listOf("pi", "tau") + it.getColumnAsList(3) shouldBe listOf(1, 2) + it.getColumnAsList(4) shouldBe listOf(3.1415, 56.0) + it.getColumnAsList(5) shouldBe listOf("pi2", "tau2") + } + } + } + + context("Right Join test") { + test("Right Join") { + eval("utils.joiner(outerJoin1, outerJoin2, 'right', lambda d1, d2: (d1[0] == d2[0]))").asClue { + it.columnNames shouldBe listOf("EmpID", "EmpName", "City", "Designation", "EmpID", "Department_ID", "DepartmentName") + it.columnTypes shouldBe listOf( + Int::class.javaObjectType, + String::class.java, + String::class.java, + String::class.java, + Int::class.javaObjectType, + Int::class.javaObjectType, + String::class.java, + ) + it.getColumnAsList(5) shouldBe listOf(0, 1, 2, 3, 4, 2, 4) + it.rowCount shouldBe 7 + } + } + } + + // https://www.sqlshack.com/sql-outer-join-overview-and-examples/ + context("Outer Join test") { + test("Outer Join") { + eval("utils.joiner(outerJoin1, outerJoin2, 'outer', lambda d1, d2: (d1[0] == d2[0]))").asClue { + it.columnNames shouldBe listOf("EmpID", "EmpName", "City", "Designation", "EmpID", "Department_ID", "DepartmentName") + it.columnTypes shouldBe listOf( + Int::class.javaObjectType, + String::class.java, + String::class.java, + String::class.java, + Int::class.javaObjectType, + Int::class.javaObjectType, + String::class.java, + ) + it.getColumnAsList(6) shouldBe listOf("Executive", "Document Control", "Finance", "Engineering", "Facilities and Maintenance", "Finance", null, null, null, "Facilities and Maintenance") + it.rowCount shouldBe 10 + } + } + } + + context("Inner Join test") { + test("Inner Join") { + eval("utils.joiner(outerJoin1, outerJoin2, 'inner', lambda d1, d2: (d1[0] == d2[0]))").asClue { + it.columnNames shouldBe listOf("EmpID", "EmpName", "City", "Designation", "EmpID", "Department_ID", "DepartmentName") + it.columnTypes shouldBe listOf( + Int::class.javaObjectType, + String::class.java, + String::class.java, + String::class.java, + Int::class.javaObjectType, + Int::class.javaObjectType, + String::class.java, + ) + it.getColumnAsList(5) shouldBe listOf(0, 1, 2, 3, 4, 2, 4) + it.rowCount shouldBe 7 + } + } + } + + context("Dataset Splitter") { + test("Split Dataset") { + eval>("utils.splitter(outerJoin1, splitAt)").asClue { + it.get(0).columnNames shouldBe listOf("EmpID", "EmpName", "City") + it.get(0).columnTypes shouldBe listOf( + Int::class.javaObjectType, + String::class.java, + String::class.java, + ) + it.get(0).rowCount shouldBe 10 + it.get(1).columnNames shouldBe listOf("Designation") + it.get(1).columnTypes shouldBe listOf( + String::class.java, + ) + it.get(1).rowCount shouldBe 10 + } + } + } + context("Filter tests") { test("Constant filter") { eval("utils.filter(dataset, lambda **kwargs: False)").asClue { diff --git a/docker-compose.yml b/docker-compose.yml index bec0d09..a3e3567 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,6 +1,6 @@ services: gateway: - image: inductiveautomation/ignition:8.1.28 + image: inductiveautomation/ignition:8.1.29 ports: - 18088:8088 - 18000:8000