From c6548dd1f21eb1967666ffdfddc5687aecb33856 Mon Sep 17 00:00:00 2001 From: Ruslan Iushchenko Date: Thu, 20 Feb 2025 08:53:55 +0100 Subject: [PATCH 1/4] #744 Add the ability to specify default record length for record length mapping. --- .../raw/FixedWithRecordLengthExprRawRecordExtractor.scala | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/reader/extractors/raw/FixedWithRecordLengthExprRawRecordExtractor.scala b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/reader/extractors/raw/FixedWithRecordLengthExprRawRecordExtractor.scala index ba57a6718..ef1a9c00c 100644 --- a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/reader/extractors/raw/FixedWithRecordLengthExprRawRecordExtractor.scala +++ b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/reader/extractors/raw/FixedWithRecordLengthExprRawRecordExtractor.scala @@ -32,6 +32,7 @@ class FixedWithRecordLengthExprRawRecordExtractor(ctx: RawRecordContext, final private val lengthField = recordLengthField.map(_.field) final private val lengthMap = recordLengthField.map(_.valueMap).getOrElse(Map.empty) + final private val defaultRecordLength = lengthMap.get("_") final private val isLengthMapEmpty = lengthMap.isEmpty type RawRecord = (String, Array[Byte]) @@ -131,8 +132,8 @@ class FixedWithRecordLengthExprRawRecordExtractor(ctx: RawRecordContext, case i: Int => getRecordLengthFromMapping(i.toString) case l: Long => getRecordLengthFromMapping(l.toString) case s: String => getRecordLengthFromMapping(s) - case null => throw new IllegalStateException(s"Null encountered as a record length field (offset: $byteIndex, raw value: ${getBytesAsHexString(binaryDataStart)}).") - case _ => throw new IllegalStateException(s"Record length value of the field ${lengthAST.name} must be an integral type.") + case null => defaultRecordLength.getOrElse(throw new IllegalStateException(s"Null encountered as a record length field (offset: $byteIndex, raw value: ${getBytesAsHexString(binaryDataStart)}).")) + case _ => throw new IllegalStateException(s"Record length value of the field ${lengthAST.name} must be an integral type.") } } length + recordLengthAdjustment @@ -141,7 +142,7 @@ class FixedWithRecordLengthExprRawRecordExtractor(ctx: RawRecordContext, final private def getRecordLengthFromMapping(v: String): Int = { lengthMap.get(v) match { case Some(len) => len - case None => throw new IllegalStateException(s"Record length value '$v' is not mapped to a record length.") + case None => defaultRecordLength.getOrElse(throw new IllegalStateException(s"Record length value '$v' is not mapped to a record length.")) } } From 642006cbf7a40a306c58d053187e72187455e91c Mon Sep 17 00:00:00 2001 From: Ruslan Iushchenko Date: Thu, 20 Feb 2025 08:54:52 +0100 Subject: [PATCH 2/4] Remove POM RAT plugin superseded by sbt's licence plugin. --- pom.xml | 40 ---------------------------------------- 1 file changed, 40 deletions(-) diff --git a/pom.xml b/pom.xml index 4a118ae6e..ec3d371eb 100644 --- a/pom.xml +++ b/pom.xml @@ -367,46 +367,6 @@ - - org.apache.rat - apache-rat-plugin - ${maven.rat.plugin.version} - - - verify - - check - - - - - - **/*.sbt - **/*.properties - **/*.json - **/*.csv - **/*.txt - **/*.bin - **/*.md - **/*.iml - **/*.csv - **/*.cob - **/*.cpy - **/*.svg - **/*.plot - **/*.yml - **/*.interp - **/*.tokens - **/_* - **/dependency-reduced-pom.xml - **/.idea/** - **/target/** - **/org.apache.spark.sql.sources.DataSourceRegister - dependency-reduced-pom.xml - .github/CODEOWNERS - - - From c0fc92ab18eec1387851aabb2b1993e0cb7a4456 Mon Sep 17 00:00:00 2001 From: Ruslan Iushchenko Date: Fri, 21 Feb 2025 07:43:00 +0100 Subject: [PATCH 3/4] #744 Update README of the new feature. --- README.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/README.md b/README.md index 87a1de241..754d68ce4 100644 --- a/README.md +++ b/README.md @@ -485,6 +485,13 @@ If the record field contains a string that can be mapped to a record size, you c .option("record_length_map", """{"SEG1":100,"SEG2":200}""") ``` +You can specify the default record size by defining the key "_": +``` +.option("record_format", "F") +.option("record_length_field", "FIELD_STR") +.option("record_length_map", """{"SEG1":100,"SEG2":200,"_":100}""") +``` + ### Use cases for various variable length formats In order to understand the file format it is often sufficient to look at the first 4 bytes of the file (un case of RDW only files), From 085d26d992056b98c93e41e7fe9dad47c84a3291 Mon Sep 17 00:00:00 2001 From: Ruslan Iushchenko Date: Mon, 24 Feb 2025 11:40:26 +0100 Subject: [PATCH 4/4] #744 Add a unit test for the fixed record length extractor with record length mapping with default record length. --- .../reader/iterator/VRLRecordReaderSpec.scala | 52 +++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/reader/iterator/VRLRecordReaderSpec.scala b/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/reader/iterator/VRLRecordReaderSpec.scala index 4992c2c2b..c7bc2d522 100644 --- a/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/reader/iterator/VRLRecordReaderSpec.scala +++ b/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/reader/iterator/VRLRecordReaderSpec.scala @@ -225,6 +225,58 @@ class VRLRecordReaderSpec extends AnyWordSpec { assert(ex.getMessage == "The record length field LEN must be an integral type or a value mapping must be specified.") } + + "the length mapping with default record length" in { + val copybookWithLenbgthMap = + """ 01 RECORD. + 05 LEN_SPEC PIC X(1). + 05 N PIC 9(2). + 05 A PIC X(3). + """ + + val records = Array( + 0xC1, 0xF1, 0xF2, 0xC1, + 0xC2, 0xF3, 0xF4, 0xC2, 0xC3, + 0xC3, 0xF5, 0xF6, 0xC4, 0xC5, 0xC6 + ).map(_.toByte) + + val streamH = new ByteStreamMock(records) + val streamD = new ByteStreamMock(records) + val context = RawRecordContext(0, streamH, streamD, CopybookParser.parseSimple(copybookWithLenbgthMap), null, null, "") + + val readerParameters = ReaderParameters( + lengthFieldExpression = Some("LEN_SPEC"), + lengthFieldMap = Map("A" -> 4, "B" -> 5, "_" -> 6)) + + val reader = getUseCase( + copybook = copybookWithLenbgthMap, + records = records, + lengthFieldExpression = Some("LEN_SPEC"), + recordExtractor = Some(new FixedWithRecordLengthExprRawRecordExtractor(context, readerParameters))) + + assert(reader.hasNext) + val (segment1, record1) = reader.next() + assert(reader.hasNext) + val (segment2, record2) = reader.next() + assert(reader.hasNext) + val (segment3, record3) = reader.next() + assert(!reader.hasNext) + + assert(segment1.isEmpty) + assert(segment2.isEmpty) + assert(segment3.isEmpty) + assert(record1.length == 4) + assert(record2.length == 5) + assert(record3.length == 6) + assert(record1(0) == 0xC1.toByte) + assert(record1(1) == 0xF1.toByte) + assert(record1(2) == 0xF2.toByte) + assert(record1(3) == 0xC1.toByte) + assert(record2(0) == 0xC2.toByte) + assert(record2(1) == 0xF3.toByte) + assert(record3(0) == 0xC3.toByte) + assert(record3(1) == 0xF5.toByte) + } } "work with record length expressions" in {