From 3a30c48b72f86541fe45949f5eb806bd4eae9a4a Mon Sep 17 00:00:00 2001 From: Nicholas Clarke Date: Thu, 1 Jun 2023 10:34:00 +0200 Subject: [PATCH 1/6] Merged CLI and CLIParser classes since they are closely linked anyway. Added two new functional commands: digest and unchunk. More options and tweaks to come. --- pom.xml | 2 +- src/main/java/org/jwat/tools/JWATTools.java | 8 +- .../org/jwat/tools/tasks/UnpackTaskCLI.java | 22 --- .../tools/tasks/arc2warc/Arc2WarcTaskCLI.java | 88 +++++++++- .../tasks/arc2warc/Arc2WarcTaskCLIParser.java | 96 ---------- .../org/jwat/tools/tasks/cdx/CDXTaskCLI.java | 70 +++++++- .../tools/tasks/cdx/CDXTaskCLIParser.java | 78 --------- .../tools/tasks/changed/ChangedTaskCLI.java | 50 +++++- .../tasks/changed/ChangedTaskCLIParser.java | 58 ------- .../tools/tasks/compress/CompressTaskCLI.java | 156 ++++++++++++++++- .../tasks/compress/CompressTaskCLIParser.java | 164 ------------------ .../tasks/containermd/ContainerMDTaskCLI.java | 86 ++++++++- .../containermd/ContainerMDTaskCLIParser.java | 94 ---------- .../tasks/decompress/DecompressTaskCLI.java | 50 +++++- .../decompress/DecompressTaskCLIParser.java | 58 ------- .../tools/tasks/delete/DeleteTaskCLI.java | 58 ++++++- .../tasks/delete/DeleteTaskCLIParser.java | 66 ------- .../tools/tasks/digest/DigestOptions.java | 9 + .../jwat/tools/tasks/digest/DigestTask.java | 70 ++++++++ .../tools/tasks/digest/DigestTaskCLI.java | 66 +++++++ .../tools/tasks/extract/ExtractTaskCLI.java | 58 ++++++- .../tasks/extract/ExtractTaskCLIParser.java | 66 ------- .../tasks/headers2cdx/Headers2CDXTaskCLI.java | 70 +++++++- .../headers2cdx/Headers2CDXTaskCLIParser.java | 78 --------- .../tools/tasks/interval/IntervalTaskCLI.java | 90 +++++++++- .../tasks/interval/IntervalTaskCLIParser.java | 96 ---------- .../tasks/pathindex/PathIndexTaskCLI.java | 49 +++++- .../pathindex/PathIndexTaskCLIParser.java | 57 ------ .../jwat/tools/tasks/test/TestTaskCLI.java | 116 ++++++++++++- .../tools/tasks/test/TestTaskCLIParser.java | 124 ------------- .../tools/tasks/unchunk/UnchunkOptions.java | 9 + .../jwat/tools/tasks/unchunk/UnchunkTask.java | 56 ++++++ .../tools/tasks/unchunk/UnchunkTaskCLI.java | 66 +++++++ .../tasks/{ => unpack}/UnpackOptions.java | 2 +- .../tools/tasks/{ => unpack}/UnpackTask.java | 3 +- .../UnpackTaskCLI.java} | 24 ++- ...LIParser.java => TestArc2WarcTaskCLI.java} | 14 +- .../tools/tasks/cdx/TestCDXTaskCLIParser.java | 12 +- ...CLIParser.java => TestChangedTaskCLI.java} | 10 +- ...LIParser.java => TestCompressTaskCLI.java} | 14 +- ...Parser.java => TestDecompressTaskCLI.java} | 14 +- ...kCLIParser.java => TestDeleteTaskCLI.java} | 10 +- ...CLIParser.java => TestExtractTaskCLI.java} | 14 +- ...LIParser.java => TestIntervalTaskCLI.java} | 18 +- ...IParser.java => TestPathIndexTaskCLI.java} | 10 +- ...askCLIParser.java => TestTestTaskCLI.java} | 16 +- 46 files changed, 1299 insertions(+), 1146 deletions(-) delete mode 100644 src/main/java/org/jwat/tools/tasks/UnpackTaskCLI.java delete mode 100644 src/main/java/org/jwat/tools/tasks/arc2warc/Arc2WarcTaskCLIParser.java delete mode 100644 src/main/java/org/jwat/tools/tasks/cdx/CDXTaskCLIParser.java delete mode 100644 src/main/java/org/jwat/tools/tasks/changed/ChangedTaskCLIParser.java delete mode 100644 src/main/java/org/jwat/tools/tasks/compress/CompressTaskCLIParser.java delete mode 100644 src/main/java/org/jwat/tools/tasks/containermd/ContainerMDTaskCLIParser.java delete mode 100644 src/main/java/org/jwat/tools/tasks/decompress/DecompressTaskCLIParser.java delete mode 100644 src/main/java/org/jwat/tools/tasks/delete/DeleteTaskCLIParser.java create mode 100644 src/main/java/org/jwat/tools/tasks/digest/DigestOptions.java create mode 100644 src/main/java/org/jwat/tools/tasks/digest/DigestTask.java create mode 100644 src/main/java/org/jwat/tools/tasks/digest/DigestTaskCLI.java delete mode 100644 src/main/java/org/jwat/tools/tasks/extract/ExtractTaskCLIParser.java delete mode 100644 src/main/java/org/jwat/tools/tasks/headers2cdx/Headers2CDXTaskCLIParser.java delete mode 100644 src/main/java/org/jwat/tools/tasks/interval/IntervalTaskCLIParser.java delete mode 100644 src/main/java/org/jwat/tools/tasks/pathindex/PathIndexTaskCLIParser.java delete mode 100644 src/main/java/org/jwat/tools/tasks/test/TestTaskCLIParser.java create mode 100644 src/main/java/org/jwat/tools/tasks/unchunk/UnchunkOptions.java create mode 100644 src/main/java/org/jwat/tools/tasks/unchunk/UnchunkTask.java create mode 100644 src/main/java/org/jwat/tools/tasks/unchunk/UnchunkTaskCLI.java rename src/main/java/org/jwat/tools/tasks/{ => unpack}/UnpackOptions.java (70%) rename src/main/java/org/jwat/tools/tasks/{ => unpack}/UnpackTask.java (93%) rename src/main/java/org/jwat/tools/tasks/{UnpackTaskCLIParser.java => unpack/UnpackTaskCLI.java} (57%) rename src/test/java/org/jwat/tools/tasks/arc2warc/{TestArc2WarcTaskCLIParser.java => TestArc2WarcTaskCLI.java} (89%) rename src/test/java/org/jwat/tools/tasks/changed/{TestChangedTaskCLIParser.java => TestChangedTaskCLI.java} (87%) rename src/test/java/org/jwat/tools/tasks/compress/{TestCompressTaskCLIParser.java => TestCompressTaskCLI.java} (92%) rename src/test/java/org/jwat/tools/tasks/decompress/{TestDecompressTaskCLIParser.java => TestDecompressTaskCLI.java} (84%) rename src/test/java/org/jwat/tools/tasks/delete/{TestDeleteTaskCLIParser.java => TestDeleteTaskCLI.java} (89%) rename src/test/java/org/jwat/tools/tasks/extract/{TestExtractTaskCLIParser.java => TestExtractTaskCLI.java} (86%) rename src/test/java/org/jwat/tools/tasks/interval/{TestIntervalTaskCLIParser.java => TestIntervalTaskCLI.java} (85%) rename src/test/java/org/jwat/tools/tasks/pathindex/{TestPathIndexTaskCLIParser.java => TestPathIndexTaskCLI.java} (88%) rename src/test/java/org/jwat/tools/tasks/test/{TestTestTaskCLIParser.java => TestTestTaskCLI.java} (91%) diff --git a/pom.xml b/pom.xml index 7c21013..88243c3 100644 --- a/pom.xml +++ b/pom.xml @@ -79,7 +79,7 @@ 1.8 - 1.2.1 + 1.3.0-SNAPSHOT 3.2.7 0.2.0 0.7.0 diff --git a/src/main/java/org/jwat/tools/JWATTools.java b/src/main/java/org/jwat/tools/JWATTools.java index afa7563..7b9c924 100644 --- a/src/main/java/org/jwat/tools/JWATTools.java +++ b/src/main/java/org/jwat/tools/JWATTools.java @@ -9,7 +9,6 @@ import java.util.Map; import org.jwat.tools.tasks.TaskCLI; -import org.jwat.tools.tasks.UnpackTaskCLI; import org.jwat.tools.tasks.arc2warc.Arc2WarcTaskCLI; import org.jwat.tools.tasks.cdx.CDXTaskCLI; import org.jwat.tools.tasks.changed.ChangedTaskCLI; @@ -17,11 +16,14 @@ import org.jwat.tools.tasks.containermd.ContainerMDTaskCLI; import org.jwat.tools.tasks.decompress.DecompressTaskCLI; import org.jwat.tools.tasks.delete.DeleteTaskCLI; +import org.jwat.tools.tasks.digest.DigestTaskCLI; import org.jwat.tools.tasks.extract.ExtractTaskCLI; import org.jwat.tools.tasks.headers2cdx.Headers2CDXTaskCLI; import org.jwat.tools.tasks.interval.IntervalTaskCLI; import org.jwat.tools.tasks.pathindex.PathIndexTaskCLI; import org.jwat.tools.tasks.test.TestTaskCLI; +import org.jwat.tools.tasks.unchunk.UnchunkTaskCLI; +import org.jwat.tools.tasks.unpack.UnpackTaskCLI; import com.antiaction.common.cli.Argument; import com.antiaction.common.cli.ArgumentParser; @@ -92,7 +94,9 @@ public static void configure_cli() { PathIndexTaskCLI.class, TestTaskCLI.class, UnpackTaskCLI.class, - Headers2CDXTaskCLI.class + Headers2CDXTaskCLI.class, + DigestTaskCLI.class, + UnchunkTaskCLI.class }; addCommands((Class[])tasks); } diff --git a/src/main/java/org/jwat/tools/tasks/UnpackTaskCLI.java b/src/main/java/org/jwat/tools/tasks/UnpackTaskCLI.java deleted file mode 100644 index 2292900..0000000 --- a/src/main/java/org/jwat/tools/tasks/UnpackTaskCLI.java +++ /dev/null @@ -1,22 +0,0 @@ -package org.jwat.tools.tasks; - -import com.antiaction.common.cli.CommandLine; - -public class UnpackTaskCLI extends TaskCLI { - - public static final String commandName = "unpack"; - - public static final String commandDescription = "unpack multifile GZip"; - - @Override - public void show_help() { - System.out.println("Work in progress..."); - } - - @Override - public void runtask(CommandLine cmdLine) { - UnpackTask task = new UnpackTask(); - task.runtask(UnpackTaskCLIParser.parseArguments(cmdLine)); - } - -} diff --git a/src/main/java/org/jwat/tools/tasks/arc2warc/Arc2WarcTaskCLI.java b/src/main/java/org/jwat/tools/tasks/arc2warc/Arc2WarcTaskCLI.java index d16a1fd..acb981f 100644 --- a/src/main/java/org/jwat/tools/tasks/arc2warc/Arc2WarcTaskCLI.java +++ b/src/main/java/org/jwat/tools/tasks/arc2warc/Arc2WarcTaskCLI.java @@ -1,8 +1,15 @@ package org.jwat.tools.tasks.arc2warc; +import java.io.File; + +import org.jwat.tools.JWATTools; import org.jwat.tools.tasks.TaskCLI; +import com.antiaction.common.cli.Argument; +import com.antiaction.common.cli.ArgumentParser; +import com.antiaction.common.cli.ArgumentParserException; import com.antiaction.common.cli.CommandLine; +import com.antiaction.common.cli.Options; public class Arc2WarcTaskCLI extends TaskCLI { @@ -28,7 +35,86 @@ public void show_help() { @Override public void runtask(CommandLine cmdLine) { Arc2WarcTask task = new Arc2WarcTask(); - task.runtask(Arc2WarcTaskCLIParser.parseArguments(cmdLine)); + Arc2WarcOptions options = parseArguments(cmdLine); + task.runtask(options); + } + + public static final int A_DEST = 101; + public static final int A_OVERWRITE = 102; + public static final int A_PREFIX = 103; + + public static Arc2WarcOptions parseArguments(CommandLine cmdLine) { + Options cliOptions = new Options(); + try { + cliOptions.addOption(null, "--queue-first", JWATTools.A_QUEUE_FIRST, 0, null); + cliOptions.addOption("-w", "--workers", JWATTools.A_WORKERS, 0, null).setValueRequired(); + cliOptions.addOption("-d", "--destdir", A_DEST, 0, null).setValueRequired(); + cliOptions.addOption(null, "--overwrite", A_OVERWRITE, 0, null); + cliOptions.addOption(null, "--prefix", A_PREFIX, 0, null).setValueRequired(); + cliOptions.addNamedArgument("files", JWATTools.A_FILES, 1, Integer.MAX_VALUE); + cmdLine = ArgumentParser.parse(cmdLine.argsArray, cliOptions, cmdLine); + } + catch (ArgumentParserException e) { + System.out.println( Arc2WarcTaskCLI.class.getName() + ": " + e.getMessage() ); + System.exit( 1 ); + } + + Arc2WarcOptions options = new Arc2WarcOptions(); + + Argument argument; + + // Queue first. + options.bQueueFirst = cmdLine.idMap.containsKey(JWATTools.A_QUEUE_FIRST); + + // Thread workers. + argument = cmdLine.idMap.get( JWATTools.A_WORKERS ); + if ( argument != null && argument.value != null ) { + try { + options.threads = Integer.parseInt(argument.value); + } catch (NumberFormatException e) { + System.out.println( "Invalid number of threads requested: " + argument.value ); + System.exit( 1 ); + } + } + if ( options.threads < 1 ) { + System.out.println( "Invalid number of threads requested: " + options.threads ); + System.exit( 1 ); + } + + // Destination directory. + String dest = System.getProperty("user.dir"); + argument = cmdLine.idMap.get( A_DEST ); + if ( argument != null && argument.value != null ) { + dest = argument.value; + } + System.out.println( "Using '" + dest + "' as destination directory." ); + options.destDir = new File( dest ); + if ( !options.destDir.exists() ) { + if ( !options.destDir.mkdirs() ) { + System.out.println( "Could not create destination directory: '" + dest + "'!" ); + System.exit( 1 ); + } + } else if ( !options.destDir.isDirectory() ) { + System.out.println( "'" + dest + "' is not a directory!" ); + System.exit( 1 ); + } + + // Overwrite. + if ( cmdLine.idMap.containsKey( A_OVERWRITE) ) { + options.bOverwrite = true; + } + + // Prefix. + argument = cmdLine.idMap.get( A_PREFIX ); + if ( argument != null && argument.value != null ) { + options.prefix = argument.value; + } + + // Files. + argument = cmdLine.idMap.get( JWATTools.A_FILES ); + options.filesList = argument.values; + + return options; } } diff --git a/src/main/java/org/jwat/tools/tasks/arc2warc/Arc2WarcTaskCLIParser.java b/src/main/java/org/jwat/tools/tasks/arc2warc/Arc2WarcTaskCLIParser.java deleted file mode 100644 index 0e2d764..0000000 --- a/src/main/java/org/jwat/tools/tasks/arc2warc/Arc2WarcTaskCLIParser.java +++ /dev/null @@ -1,96 +0,0 @@ -package org.jwat.tools.tasks.arc2warc; - -import java.io.File; - -import org.jwat.tools.JWATTools; - -import com.antiaction.common.cli.Argument; -import com.antiaction.common.cli.ArgumentParserException; -import com.antiaction.common.cli.ArgumentParser; -import com.antiaction.common.cli.CommandLine; -import com.antiaction.common.cli.Options; - -public class Arc2WarcTaskCLIParser { - - public static final int A_DEST = 101; - public static final int A_OVERWRITE = 102; - public static final int A_PREFIX = 103; - - protected Arc2WarcTaskCLIParser() { - } - - public static Arc2WarcOptions parseArguments(CommandLine cmdLine) { - Options cliOptions = new Options(); - try { - cliOptions.addOption(null, "--queue-first", JWATTools.A_QUEUE_FIRST, 0, null); - cliOptions.addOption("-w", "--workers", JWATTools.A_WORKERS, 0, null).setValueRequired(); - cliOptions.addOption("-d", "--destdir", A_DEST, 0, null).setValueRequired(); - cliOptions.addOption(null, "--overwrite", A_OVERWRITE, 0, null); - cliOptions.addOption(null, "--prefix", A_PREFIX, 0, null).setValueRequired(); - cliOptions.addNamedArgument("files", JWATTools.A_FILES, 1, Integer.MAX_VALUE); - cmdLine = ArgumentParser.parse(cmdLine.argsArray, cliOptions, cmdLine); - } - catch (ArgumentParserException e) { - System.out.println( Arc2WarcTaskCLIParser.class.getName() + ": " + e.getMessage() ); - System.exit( 1 ); - } - - Arc2WarcOptions options = new Arc2WarcOptions(); - - Argument argument; - - // Queue first. - options.bQueueFirst = cmdLine.idMap.containsKey(JWATTools.A_QUEUE_FIRST); - - // Thread workers. - argument = cmdLine.idMap.get( JWATTools.A_WORKERS ); - if ( argument != null && argument.value != null ) { - try { - options.threads = Integer.parseInt(argument.value); - } catch (NumberFormatException e) { - System.out.println( "Invalid number of threads requested: " + argument.value ); - System.exit( 1 ); - } - } - if ( options.threads < 1 ) { - System.out.println( "Invalid number of threads requested: " + options.threads ); - System.exit( 1 ); - } - - // Destination directory. - String dest = System.getProperty("user.dir"); - argument = cmdLine.idMap.get( A_DEST ); - if ( argument != null && argument.value != null ) { - dest = argument.value; - } - System.out.println( "Using '" + dest + "' as destination directory." ); - options.destDir = new File( dest ); - if ( !options.destDir.exists() ) { - if ( !options.destDir.mkdirs() ) { - System.out.println( "Could not create destination directory: '" + dest + "'!" ); - System.exit( 1 ); - } - } else if ( !options.destDir.isDirectory() ) { - System.out.println( "'" + dest + "' is not a directory!" ); - System.exit( 1 ); - } - - // Overwrite. - if ( cmdLine.idMap.containsKey( A_OVERWRITE) ) { - options.bOverwrite = true; - } - - // Prefix. - argument = cmdLine.idMap.get( A_PREFIX ); - if ( argument != null && argument.value != null ) { - options.prefix = argument.value; - } - - // Files. - argument = cmdLine.idMap.get( JWATTools.A_FILES ); - options.filesList = argument.values; - - return options; - } - -} diff --git a/src/main/java/org/jwat/tools/tasks/cdx/CDXTaskCLI.java b/src/main/java/org/jwat/tools/tasks/cdx/CDXTaskCLI.java index 9bfc64b..22312d3 100644 --- a/src/main/java/org/jwat/tools/tasks/cdx/CDXTaskCLI.java +++ b/src/main/java/org/jwat/tools/tasks/cdx/CDXTaskCLI.java @@ -1,8 +1,15 @@ package org.jwat.tools.tasks.cdx; +import java.io.File; + +import org.jwat.tools.JWATTools; import org.jwat.tools.tasks.TaskCLI; +import com.antiaction.common.cli.Argument; +import com.antiaction.common.cli.ArgumentParser; +import com.antiaction.common.cli.ArgumentParserException; import com.antiaction.common.cli.CommandLine; +import com.antiaction.common.cli.Options; public class CDXTaskCLI extends TaskCLI { @@ -29,7 +36,68 @@ public void show_help() { @Override public void runtask(CommandLine cmdLine) { CDXTask task = new CDXTask(); - task.runtask(CDXTaskCLIParser.parseArguments(cmdLine)); + CDXOptions options = parseArguments(cmdLine); + task.runtask(options); + } + + public static final int A_OUTPUT = 101; + + public static CDXOptions parseArguments(CommandLine cmdLine) { + Options cliOptions = new Options(); + try { + cliOptions.addOption(null, "--queue-first", JWATTools.A_QUEUE_FIRST, 0, null); + cliOptions.addOption("-w", "--workers", JWATTools.A_WORKERS, 0, null).setValueRequired(); + cliOptions.addOption("-o", null, A_OUTPUT, 0, null).setValueRequired(); + cliOptions.addNamedArgument("files", JWATTools.A_FILES, 1, Integer.MAX_VALUE); + cmdLine = ArgumentParser.parse(cmdLine.argsArray, cliOptions, cmdLine); + } + catch (ArgumentParserException e) { + System.out.println( CDXTaskCLI.class.getName() + ": " + e.getMessage() ); + System.exit( 1 ); + } + + CDXOptions options = new CDXOptions(); + + Argument argument; + + // Queue first. + options.bQueueFirst = cmdLine.idMap.containsKey(JWATTools.A_QUEUE_FIRST); + + // Thread workers. + argument = cmdLine.idMap.get( JWATTools.A_WORKERS ); + if ( argument != null && argument.value != null ) { + try { + options.threads = Integer.parseInt(argument.value); + } catch (NumberFormatException e) { + System.out.println( "Invalid number of threads requested: " + argument.value ); + System.exit( 1 ); + } + } + if ( options.threads < 1 ) { + System.out.println( "Invalid number of threads requested: " + options.threads ); + System.exit( 1 ); + } + + // Output file. + argument = cmdLine.idMap.get( A_OUTPUT ); + if ( argument != null && argument.value != null ) { + options.outputFile = new File(argument.value); + if (options.outputFile.isDirectory()) { + System.out.println("Can not output to a directory!"); + System.exit(1); + } else if (options.outputFile.getParentFile() != null && !options.outputFile.getParentFile().exists()) { + if (!options.outputFile.getParentFile().mkdirs()) { + System.out.println("Could not create parent directories!"); + System.exit(1); + } + } + } + + // Files. + argument = cmdLine.idMap.get( JWATTools.A_FILES ); + options.filesList = argument.values; + + return options; } } diff --git a/src/main/java/org/jwat/tools/tasks/cdx/CDXTaskCLIParser.java b/src/main/java/org/jwat/tools/tasks/cdx/CDXTaskCLIParser.java deleted file mode 100644 index c96a0d0..0000000 --- a/src/main/java/org/jwat/tools/tasks/cdx/CDXTaskCLIParser.java +++ /dev/null @@ -1,78 +0,0 @@ -package org.jwat.tools.tasks.cdx; - -import java.io.File; - -import org.jwat.tools.JWATTools; - -import com.antiaction.common.cli.Argument; -import com.antiaction.common.cli.ArgumentParserException; -import com.antiaction.common.cli.ArgumentParser; -import com.antiaction.common.cli.CommandLine; -import com.antiaction.common.cli.Options; - -public class CDXTaskCLIParser { - - public static final int A_OUTPUT = 101; - - protected CDXTaskCLIParser() { - } - - public static CDXOptions parseArguments(CommandLine cmdLine) { - Options cliOptions = new Options(); - try { - cliOptions.addOption(null, "--queue-first", JWATTools.A_QUEUE_FIRST, 0, null); - cliOptions.addOption("-w", "--workers", JWATTools.A_WORKERS, 0, null).setValueRequired(); - cliOptions.addOption("-o", null, A_OUTPUT, 0, null).setValueRequired(); - cliOptions.addNamedArgument("files", JWATTools.A_FILES, 1, Integer.MAX_VALUE); - cmdLine = ArgumentParser.parse(cmdLine.argsArray, cliOptions, cmdLine); - } - catch (ArgumentParserException e) { - System.out.println( CDXTaskCLIParser.class.getName() + ": " + e.getMessage() ); - System.exit( 1 ); - } - - CDXOptions options = new CDXOptions(); - - Argument argument; - - // Queue first. - options.bQueueFirst = cmdLine.idMap.containsKey(JWATTools.A_QUEUE_FIRST); - - // Thread workers. - argument = cmdLine.idMap.get( JWATTools.A_WORKERS ); - if ( argument != null && argument.value != null ) { - try { - options.threads = Integer.parseInt(argument.value); - } catch (NumberFormatException e) { - System.out.println( "Invalid number of threads requested: " + argument.value ); - System.exit( 1 ); - } - } - if ( options.threads < 1 ) { - System.out.println( "Invalid number of threads requested: " + options.threads ); - System.exit( 1 ); - } - - // Output file. - argument = cmdLine.idMap.get( A_OUTPUT ); - if ( argument != null && argument.value != null ) { - options.outputFile = new File(argument.value); - if (options.outputFile.isDirectory()) { - System.out.println("Can not output to a directory!"); - System.exit(1); - } else if (options.outputFile.getParentFile() != null && !options.outputFile.getParentFile().exists()) { - if (!options.outputFile.getParentFile().mkdirs()) { - System.out.println("Could not create parent directories!"); - System.exit(1); - } - } - } - - // Files. - argument = cmdLine.idMap.get( JWATTools.A_FILES ); - options.filesList = argument.values; - - return options; - } - -} diff --git a/src/main/java/org/jwat/tools/tasks/changed/ChangedTaskCLI.java b/src/main/java/org/jwat/tools/tasks/changed/ChangedTaskCLI.java index 0f75b9b..0c3598c 100644 --- a/src/main/java/org/jwat/tools/tasks/changed/ChangedTaskCLI.java +++ b/src/main/java/org/jwat/tools/tasks/changed/ChangedTaskCLI.java @@ -1,8 +1,15 @@ package org.jwat.tools.tasks.changed; +import java.io.File; + +import org.jwat.tools.JWATTools; import org.jwat.tools.tasks.TaskCLI; +import com.antiaction.common.cli.Argument; +import com.antiaction.common.cli.ArgumentParser; +import com.antiaction.common.cli.ArgumentParserException; import com.antiaction.common.cli.CommandLine; +import com.antiaction.common.cli.Options; public class ChangedTaskCLI extends TaskCLI { @@ -27,7 +34,48 @@ public void show_help() { @Override public void runtask(CommandLine cmdLine) { ChangedTask task = new ChangedTask(); - task.runtask(ChangedTaskCLIParser.parseArguments(cmdLine)); + ChangedOptions options = parseArguments(cmdLine); + task.runtask(options); + } + + public static final int A_OUTPUT = 101; + + public static ChangedOptions parseArguments(CommandLine cmdLine) { + Options cliOptions = new Options(); + try { + cliOptions.addOption("-o", null, A_OUTPUT, 0, null).setValueRequired(); + cliOptions.addNamedArgument("files", JWATTools.A_FILES, 1, Integer.MAX_VALUE); + cmdLine = ArgumentParser.parse(cmdLine.argsArray, cliOptions, cmdLine); + } + catch (ArgumentParserException e) { + System.out.println( ChangedTaskCLI.class.getName() + ": " + e.getMessage() ); + System.exit( 1 ); + } + + ChangedOptions options = new ChangedOptions(); + + Argument argument; + + // Output file. + argument = cmdLine.idMap.get( A_OUTPUT ); + if ( argument != null && argument.value != null ) { + options.outputFile = new File(argument.value); + if (options.outputFile.isDirectory()) { + System.out.println("Can not output to a directory!"); + System.exit(1); + } else if (options.outputFile.getParentFile() != null && !options.outputFile.getParentFile().exists()) { + if (!options.outputFile.getParentFile().mkdirs()) { + System.out.println("Could not create parent directories!"); + System.exit(1); + } + } + } + + // Files + argument = cmdLine.idMap.get( JWATTools.A_FILES ); + options.filesList = argument.values; + + return options; } } diff --git a/src/main/java/org/jwat/tools/tasks/changed/ChangedTaskCLIParser.java b/src/main/java/org/jwat/tools/tasks/changed/ChangedTaskCLIParser.java deleted file mode 100644 index 6654a13..0000000 --- a/src/main/java/org/jwat/tools/tasks/changed/ChangedTaskCLIParser.java +++ /dev/null @@ -1,58 +0,0 @@ -package org.jwat.tools.tasks.changed; - -import java.io.File; - -import org.jwat.tools.JWATTools; - -import com.antiaction.common.cli.Argument; -import com.antiaction.common.cli.ArgumentParserException; -import com.antiaction.common.cli.ArgumentParser; -import com.antiaction.common.cli.CommandLine; -import com.antiaction.common.cli.Options; - -public class ChangedTaskCLIParser { - - public static final int A_OUTPUT = 101; - - protected ChangedTaskCLIParser() { - } - - public static ChangedOptions parseArguments(CommandLine cmdLine) { - Options cliOptions = new Options(); - try { - cliOptions.addOption("-o", null, A_OUTPUT, 0, null).setValueRequired(); - cliOptions.addNamedArgument("files", JWATTools.A_FILES, 1, Integer.MAX_VALUE); - cmdLine = ArgumentParser.parse(cmdLine.argsArray, cliOptions, cmdLine); - } - catch (ArgumentParserException e) { - System.out.println( ChangedTaskCLIParser.class.getName() + ": " + e.getMessage() ); - System.exit( 1 ); - } - - ChangedOptions options = new ChangedOptions(); - - Argument argument; - - // Output file. - argument = cmdLine.idMap.get( A_OUTPUT ); - if ( argument != null && argument.value != null ) { - options.outputFile = new File(argument.value); - if (options.outputFile.isDirectory()) { - System.out.println("Can not output to a directory!"); - System.exit(1); - } else if (options.outputFile.getParentFile() != null && !options.outputFile.getParentFile().exists()) { - if (!options.outputFile.getParentFile().mkdirs()) { - System.out.println("Could not create parent directories!"); - System.exit(1); - } - } - } - - // Files - argument = cmdLine.idMap.get( JWATTools.A_FILES ); - options.filesList = argument.values; - - return options; - } - -} diff --git a/src/main/java/org/jwat/tools/tasks/compress/CompressTaskCLI.java b/src/main/java/org/jwat/tools/tasks/compress/CompressTaskCLI.java index 1f6345e..e3ccee1 100644 --- a/src/main/java/org/jwat/tools/tasks/compress/CompressTaskCLI.java +++ b/src/main/java/org/jwat/tools/tasks/compress/CompressTaskCLI.java @@ -1,8 +1,15 @@ package org.jwat.tools.tasks.compress; +import java.io.File; + +import org.jwat.tools.JWATTools; import org.jwat.tools.tasks.TaskCLI; +import com.antiaction.common.cli.Argument; +import com.antiaction.common.cli.ArgumentParser; +import com.antiaction.common.cli.ArgumentParserException; import com.antiaction.common.cli.CommandLine; +import com.antiaction.common.cli.Options; public class CompressTaskCLI extends TaskCLI { @@ -39,7 +46,154 @@ public void show_help() { @Override public void runtask(CommandLine cmdLine) { CompressTask task = new CompressTask(); - task.runtask(CompressTaskCLIParser.parseArguments(cmdLine)); + CompressOptions options = parseArguments(cmdLine); + task.runtask(options); + } + + public static final int A_COMPRESS = 101; + public static final int A_BATCHMODE = 102; + public static final int A_DRYRUN = 103; + public static final int A_VERIFY = 104; + public static final int A_REMOVE = 105; + public static final int A_DEST = 106; + public static final int A_FILELIST = 107; + public static final int A_TWOPASS = 108; + public static final int A_HDRFILES = 109; + public static final int A_BLACKLIST = 110; + public static final int A_CHECKSUMS = 111; + + public static CompressOptions parseArguments(CommandLine cmdLine) { + Options cliOptions = new Options(); + try { + cliOptions.addOption(null, "--queue-first", JWATTools.A_QUEUE_FIRST, 0, null); + cliOptions.addOption("-w", "--workers", JWATTools.A_WORKERS, 0, null).setValueRequired(); + cliOptions.addOption("-1", "--fast", A_COMPRESS, 1, null); + cliOptions.addOption("-2", null, A_COMPRESS, 2, null); + cliOptions.addOption("-3", null, A_COMPRESS, 3, null); + cliOptions.addOption("-4", null, A_COMPRESS, 4, null); + cliOptions.addOption("-5", null, A_COMPRESS, 5, null); + cliOptions.addOption("-6", null, A_COMPRESS, 6, null); + cliOptions.addOption("-7", null, A_COMPRESS, 7, null); + cliOptions.addOption("-8", null, A_COMPRESS, 8, null); + cliOptions.addOption("-9", "--best", A_COMPRESS, 9, null); + cliOptions.addOption("-d", "--destdir", A_DEST, 0, null).setValueRequired(); + cliOptions.addOption(null, "--batch", A_BATCHMODE, 0, null); + cliOptions.addOption(null, "--remove", A_REMOVE, 0, null); + cliOptions.addOption(null, "--verify", A_VERIFY, 0, null); + cliOptions.addOption(null, "--dryrun", A_DRYRUN, 0, null); + cliOptions.addOption(null, "--twopass", A_TWOPASS, 0, null); + cliOptions.addOption(null, "--listfile", A_FILELIST, 0, null).setValueRequired(); + cliOptions.addOption(null, "--hdrfiles", A_HDRFILES, 0, null); + cliOptions.addOption("-q", "--quiet", JWATTools.A_QUIET, 0, null); + cliOptions.addOption(null, "--blacklist", A_BLACKLIST, 0, null).setValueRequired(); + cliOptions.addOption(null, "--checksums", A_CHECKSUMS, 0, null).setValueRequired(); + cliOptions.addNamedArgument("files", JWATTools.A_FILES, 1, Integer.MAX_VALUE); + cmdLine = ArgumentParser.parse(cmdLine.argsArray, cliOptions, cmdLine); + } + catch (ArgumentParserException e) { + System.out.println( CompressTaskCLI.class.getName() + ": " + e.getMessage() ); + System.exit( 1 ); + } + + CompressOptions options = new CompressOptions(); + + Argument argument; + + // Queue first. + options.bQueueFirst = cmdLine.idMap.containsKey(JWATTools.A_QUEUE_FIRST); + + // Thread workers. + argument = cmdLine.idMap.get( JWATTools.A_WORKERS ); + if ( argument != null && argument.value != null ) { + try { + options.threads = Integer.parseInt(argument.value); + } catch (NumberFormatException e) { + System.out.println( "Invalid number of threads requested: " + argument.value ); + System.exit( 1 ); + } + } + if ( options.threads < 1 ) { + System.out.println( "Invalid number of threads requested: " + options.threads ); + System.exit( 1 ); + } + + // Compression level. + argument = cmdLine.idMap.get( A_COMPRESS ); + if (argument != null) { + options.compressionLevel = argument.option.subId; + } + + argument = cmdLine.idMap.get( A_BATCHMODE ); + if (argument != null) { + options.bBatch = true; + } + + argument = cmdLine.idMap.get( A_DRYRUN ); + if (argument != null) { + options.bDryrun = true; + } + + argument = cmdLine.idMap.get( A_VERIFY ); + if (argument != null) { + options.bVerify = true; + } + + argument = cmdLine.idMap.get( A_REMOVE ); + if (argument != null) { + options.bRemove = true; + } + + argument = cmdLine.idMap.get( A_DEST ); + if (argument != null) { + options.dstPath = new File( argument.value ); + } + + argument = cmdLine.idMap.get( A_FILELIST ); + if (argument != null) { + options.lstFile = new File( argument.value ); + } + + argument = cmdLine.idMap.get( A_TWOPASS ); + if (argument != null) { + options.bTwopass = true; + } + + argument = cmdLine.idMap.get( A_HDRFILES ); + if (argument != null) { + options.bHeaderFiles = true; + } + + argument = cmdLine.idMap.get( A_BLACKLIST ); + if (argument != null) { + options.blacklistFile = new File( argument.value ); + } + + argument = cmdLine.idMap.get( A_CHECKSUMS ); + if (argument != null) { + options.checksumsFile = new File( argument.value ); + } + + options.bQuiet = cmdLine.idMap.containsKey( JWATTools.A_QUIET ); + + // Files. + argument = cmdLine.idMap.get( JWATTools.A_FILES ); + options.filesList = argument.values; + + if (!options.bQuiet) { + System.out.println("JWATTools v" + JWATTools.getVersionString("org.jwat.tools")); + System.out.println( "Compression level: " + options.compressionLevel ); + System.out.println( " Batch mode: " + options.bBatch ); + System.out.println( " Dry run: " + options.bDryrun ); + System.out.println( " Verify output: " + options.bVerify ); + System.out.println( " Remove input: " + options.bRemove ); + System.out.println( " Dest path: " + options.dstPath ); + System.out.println( " List file: " + options.lstFile ); + System.out.println( " Twopass: " + options.bTwopass ); + System.out.println( " Header Files: " + options.bHeaderFiles ); + System.out.println( " Quiet: " + options.bQuiet ); + } + + return options; } } diff --git a/src/main/java/org/jwat/tools/tasks/compress/CompressTaskCLIParser.java b/src/main/java/org/jwat/tools/tasks/compress/CompressTaskCLIParser.java deleted file mode 100644 index c5c1fec..0000000 --- a/src/main/java/org/jwat/tools/tasks/compress/CompressTaskCLIParser.java +++ /dev/null @@ -1,164 +0,0 @@ -package org.jwat.tools.tasks.compress; - -import java.io.File; - -import org.jwat.tools.JWATTools; - -import com.antiaction.common.cli.Argument; -import com.antiaction.common.cli.ArgumentParser; -import com.antiaction.common.cli.ArgumentParserException; -import com.antiaction.common.cli.CommandLine; -import com.antiaction.common.cli.Options; - -public class CompressTaskCLIParser { - - public static final int A_COMPRESS = 101; - public static final int A_BATCHMODE = 102; - public static final int A_DRYRUN = 103; - public static final int A_VERIFY = 104; - public static final int A_REMOVE = 105; - public static final int A_DEST = 106; - public static final int A_FILELIST = 107; - public static final int A_TWOPASS = 108; - public static final int A_HDRFILES = 109; - public static final int A_BLACKLIST = 110; - public static final int A_CHECKSUMS = 111; - - protected CompressTaskCLIParser() { - } - - public static CompressOptions parseArguments(CommandLine cmdLine) { - Options cliOptions = new Options(); - try { - cliOptions.addOption(null, "--queue-first", JWATTools.A_QUEUE_FIRST, 0, null); - cliOptions.addOption("-w", "--workers", JWATTools.A_WORKERS, 0, null).setValueRequired(); - cliOptions.addOption("-1", "--fast", A_COMPRESS, 1, null); - cliOptions.addOption("-2", null, A_COMPRESS, 2, null); - cliOptions.addOption("-3", null, A_COMPRESS, 3, null); - cliOptions.addOption("-4", null, A_COMPRESS, 4, null); - cliOptions.addOption("-5", null, A_COMPRESS, 5, null); - cliOptions.addOption("-6", null, A_COMPRESS, 6, null); - cliOptions.addOption("-7", null, A_COMPRESS, 7, null); - cliOptions.addOption("-8", null, A_COMPRESS, 8, null); - cliOptions.addOption("-9", "--best", A_COMPRESS, 9, null); - cliOptions.addOption("-d", "--destdir", A_DEST, 0, null).setValueRequired(); - cliOptions.addOption(null, "--batch", A_BATCHMODE, 0, null); - cliOptions.addOption(null, "--remove", A_REMOVE, 0, null); - cliOptions.addOption(null, "--verify", A_VERIFY, 0, null); - cliOptions.addOption(null, "--dryrun", A_DRYRUN, 0, null); - cliOptions.addOption(null, "--twopass", A_TWOPASS, 0, null); - cliOptions.addOption(null, "--listfile", A_FILELIST, 0, null).setValueRequired(); - cliOptions.addOption(null, "--hdrfiles", A_HDRFILES, 0, null); - cliOptions.addOption("-q", "--quiet", JWATTools.A_QUIET, 0, null); - cliOptions.addOption(null, "--blacklist", A_BLACKLIST, 0, null).setValueRequired(); - cliOptions.addOption(null, "--checksums", A_CHECKSUMS, 0, null).setValueRequired(); - cliOptions.addNamedArgument("files", JWATTools.A_FILES, 1, Integer.MAX_VALUE); - cmdLine = ArgumentParser.parse(cmdLine.argsArray, cliOptions, cmdLine); - } - catch (ArgumentParserException e) { - System.out.println( CompressTaskCLIParser.class.getName() + ": " + e.getMessage() ); - System.exit( 1 ); - } - - CompressOptions options = new CompressOptions(); - - Argument argument; - - // Queue first. - options.bQueueFirst = cmdLine.idMap.containsKey(JWATTools.A_QUEUE_FIRST); - - // Thread workers. - argument = cmdLine.idMap.get( JWATTools.A_WORKERS ); - if ( argument != null && argument.value != null ) { - try { - options.threads = Integer.parseInt(argument.value); - } catch (NumberFormatException e) { - System.out.println( "Invalid number of threads requested: " + argument.value ); - System.exit( 1 ); - } - } - if ( options.threads < 1 ) { - System.out.println( "Invalid number of threads requested: " + options.threads ); - System.exit( 1 ); - } - - // Compression level. - argument = cmdLine.idMap.get( A_COMPRESS ); - if (argument != null) { - options.compressionLevel = argument.option.subId; - } - - argument = cmdLine.idMap.get( A_BATCHMODE ); - if (argument != null) { - options.bBatch = true; - } - - argument = cmdLine.idMap.get( A_DRYRUN ); - if (argument != null) { - options.bDryrun = true; - } - - argument = cmdLine.idMap.get( A_VERIFY ); - if (argument != null) { - options.bVerify = true; - } - - argument = cmdLine.idMap.get( A_REMOVE ); - if (argument != null) { - options.bRemove = true; - } - - argument = cmdLine.idMap.get( A_DEST ); - if (argument != null) { - options.dstPath = new File( argument.value ); - } - - argument = cmdLine.idMap.get( A_FILELIST ); - if (argument != null) { - options.lstFile = new File( argument.value ); - } - - argument = cmdLine.idMap.get( A_TWOPASS ); - if (argument != null) { - options.bTwopass = true; - } - - argument = cmdLine.idMap.get( A_HDRFILES ); - if (argument != null) { - options.bHeaderFiles = true; - } - - argument = cmdLine.idMap.get( A_BLACKLIST ); - if (argument != null) { - options.blacklistFile = new File( argument.value ); - } - - argument = cmdLine.idMap.get( A_CHECKSUMS ); - if (argument != null) { - options.checksumsFile = new File( argument.value ); - } - - options.bQuiet = cmdLine.idMap.containsKey( JWATTools.A_QUIET ); - - // Files. - argument = cmdLine.idMap.get( JWATTools.A_FILES ); - options.filesList = argument.values; - - if (!options.bQuiet) { - System.out.println("JWATTools v" + JWATTools.getVersionString("org.jwat.tools")); - System.out.println( "Compression level: " + options.compressionLevel ); - System.out.println( " Batch mode: " + options.bBatch ); - System.out.println( " Dry run: " + options.bDryrun ); - System.out.println( " Verify output: " + options.bVerify ); - System.out.println( " Remove input: " + options.bRemove ); - System.out.println( " Dest path: " + options.dstPath ); - System.out.println( " List file: " + options.lstFile ); - System.out.println( " Twopass: " + options.bTwopass ); - System.out.println( " Header Files: " + options.bHeaderFiles ); - System.out.println( " Quiet: " + options.bQuiet ); - } - - return options; - } - -} diff --git a/src/main/java/org/jwat/tools/tasks/containermd/ContainerMDTaskCLI.java b/src/main/java/org/jwat/tools/tasks/containermd/ContainerMDTaskCLI.java index 51b90b2..d14ed15 100644 --- a/src/main/java/org/jwat/tools/tasks/containermd/ContainerMDTaskCLI.java +++ b/src/main/java/org/jwat/tools/tasks/containermd/ContainerMDTaskCLI.java @@ -1,8 +1,16 @@ package org.jwat.tools.tasks.containermd; +import java.io.File; + +import org.jwat.common.UriProfile; +import org.jwat.tools.JWATTools; import org.jwat.tools.tasks.TaskCLI; +import com.antiaction.common.cli.Argument; +import com.antiaction.common.cli.ArgumentParser; +import com.antiaction.common.cli.ArgumentParserException; import com.antiaction.common.cli.CommandLine; +import com.antiaction.common.cli.Options; public class ContainerMDTaskCLI extends TaskCLI { @@ -28,7 +36,83 @@ public void show_help() { @Override public void runtask(CommandLine cmdLine) { ContainerMDTask task = new ContainerMDTask(); - task.runtask(ContainerMDTaskCLIParser.parseArguments(cmdLine)); + ContainerMDOptions options = parseArguments(cmdLine); + task.runtask(options); + } + + public static final int A_DEST = 101; + public static final int A_LAX = 102; + + public static ContainerMDOptions parseArguments(CommandLine cmdLine) { + Options cliOptions = new Options(); + try { + cliOptions.addOption(null, "--queue-first", JWATTools.A_QUEUE_FIRST, 0, null); + cliOptions.addOption("-w", "--workers", JWATTools.A_WORKERS, 0, null).setValueRequired(); + cliOptions.addOption("-d", "--destdir", A_DEST, 0, null).setValueRequired(); + cliOptions.addOption("-l", null, A_LAX, 0, null); + cliOptions.addOption("-q", null, JWATTools.A_QUIET, 0, null); + cliOptions.addNamedArgument("files", JWATTools.A_FILES, 1, Integer.MAX_VALUE); + cmdLine = ArgumentParser.parse(cmdLine.argsArray, cliOptions, cmdLine); + } + catch (ArgumentParserException e) { + System.out.println( ContainerMDTaskCLI.class.getName() + ": " + e.getMessage() ); + System.exit( 1 ); + } + + ContainerMDOptions options = new ContainerMDOptions(); + + Argument argument; + + // Queue first. + options.bQueueFirst = cmdLine.idMap.containsKey(JWATTools.A_QUEUE_FIRST); + + // Thread workers. + argument = cmdLine.idMap.get( JWATTools.A_WORKERS ); + if ( argument != null && argument.value != null ) { + try { + options.threads = Integer.parseInt(argument.value); + } catch (NumberFormatException e) { + System.err.println( "Invalid number of threads requested: " + argument.value ); + System.exit( 1 ); + } + } + if ( options.threads < 1 ) { + System.err.println( "Invalid number of threads requested: " + options.threads ); + System.exit( 1 ); + } + + // Output directory + argument = cmdLine.idMap.get( A_DEST ); + if ( argument != null && argument.value != null ) { + File dir = new File(argument.value); + if (dir.exists()) { + if (dir.isDirectory()) { + options.outputDir = dir; + } else { + if (!options.bQuiet) System.err.println("Output '" + argument.value + "' invalid, defaulting to '" + options.outputDir + "'"); + } + } else { + if (dir.mkdirs()) { + options.outputDir = dir; + } else { + if (!options.bQuiet) System.err.println("Output '" + argument.value + "' invalid, defaulting to '" + options.outputDir + "'"); + } + } + } + + // Relaxed URI validation. + if ( cmdLine.idMap.containsKey( A_LAX ) ) { + options.uriProfile = UriProfile.RFC3986_ABS_16BIT_LAX; + if (!options.bQuiet) System.out.println("Using relaxed URI validation for ARC URL and WARC Target-URI."); + } + + options.bQuiet = cmdLine.idMap.containsKey( JWATTools.A_QUIET ); + + // Files. + argument = cmdLine.idMap.get( JWATTools.A_FILES ); + options.filesList = argument.values; + + return options; } } diff --git a/src/main/java/org/jwat/tools/tasks/containermd/ContainerMDTaskCLIParser.java b/src/main/java/org/jwat/tools/tasks/containermd/ContainerMDTaskCLIParser.java deleted file mode 100644 index 8cb55c2..0000000 --- a/src/main/java/org/jwat/tools/tasks/containermd/ContainerMDTaskCLIParser.java +++ /dev/null @@ -1,94 +0,0 @@ -package org.jwat.tools.tasks.containermd; - -import java.io.File; - -import org.jwat.common.UriProfile; -import org.jwat.tools.JWATTools; - -import com.antiaction.common.cli.Argument; -import com.antiaction.common.cli.ArgumentParserException; -import com.antiaction.common.cli.ArgumentParser; -import com.antiaction.common.cli.CommandLine; -import com.antiaction.common.cli.Options; - -public class ContainerMDTaskCLIParser { - - public static final int A_DEST = 101; - public static final int A_LAX = 102; - - protected ContainerMDTaskCLIParser() { - } - - public static ContainerMDOptions parseArguments(CommandLine cmdLine) { - Options cliOptions = new Options(); - try { - cliOptions.addOption(null, "--queue-first", JWATTools.A_QUEUE_FIRST, 0, null); - cliOptions.addOption("-w", "--workers", JWATTools.A_WORKERS, 0, null).setValueRequired(); - cliOptions.addOption("-d", "--destdir", A_DEST, 0, null).setValueRequired(); - cliOptions.addOption("-l", null, A_LAX, 0, null); - cliOptions.addOption("-q", null, JWATTools.A_QUIET, 0, null); - cliOptions.addNamedArgument("files", JWATTools.A_FILES, 1, Integer.MAX_VALUE); - cmdLine = ArgumentParser.parse(cmdLine.argsArray, cliOptions, cmdLine); - } - catch (ArgumentParserException e) { - System.out.println( ContainerMDTaskCLIParser.class.getName() + ": " + e.getMessage() ); - System.exit( 1 ); - } - - ContainerMDOptions options = new ContainerMDOptions(); - - Argument argument; - - // Queue first. - options.bQueueFirst = cmdLine.idMap.containsKey(JWATTools.A_QUEUE_FIRST); - - // Thread workers. - argument = cmdLine.idMap.get( JWATTools.A_WORKERS ); - if ( argument != null && argument.value != null ) { - try { - options.threads = Integer.parseInt(argument.value); - } catch (NumberFormatException e) { - System.err.println( "Invalid number of threads requested: " + argument.value ); - System.exit( 1 ); - } - } - if ( options.threads < 1 ) { - System.err.println( "Invalid number of threads requested: " + options.threads ); - System.exit( 1 ); - } - - // Output directory - argument = cmdLine.idMap.get( A_DEST ); - if ( argument != null && argument.value != null ) { - File dir = new File(argument.value); - if (dir.exists()) { - if (dir.isDirectory()) { - options.outputDir = dir; - } else { - if (!options.bQuiet) System.err.println("Output '" + argument.value + "' invalid, defaulting to '" + options.outputDir + "'"); - } - } else { - if (dir.mkdirs()) { - options.outputDir = dir; - } else { - if (!options.bQuiet) System.err.println("Output '" + argument.value + "' invalid, defaulting to '" + options.outputDir + "'"); - } - } - } - - // Relaxed URI validation. - if ( cmdLine.idMap.containsKey( A_LAX ) ) { - options.uriProfile = UriProfile.RFC3986_ABS_16BIT_LAX; - if (!options.bQuiet) System.out.println("Using relaxed URI validation for ARC URL and WARC Target-URI."); - } - - options.bQuiet = cmdLine.idMap.containsKey( JWATTools.A_QUIET ); - - // Files. - argument = cmdLine.idMap.get( JWATTools.A_FILES ); - options.filesList = argument.values; - - return options; - } - -} diff --git a/src/main/java/org/jwat/tools/tasks/decompress/DecompressTaskCLI.java b/src/main/java/org/jwat/tools/tasks/decompress/DecompressTaskCLI.java index d02f1d3..f1040f5 100644 --- a/src/main/java/org/jwat/tools/tasks/decompress/DecompressTaskCLI.java +++ b/src/main/java/org/jwat/tools/tasks/decompress/DecompressTaskCLI.java @@ -1,8 +1,13 @@ package org.jwat.tools.tasks.decompress; +import org.jwat.tools.JWATTools; import org.jwat.tools.tasks.TaskCLI; +import com.antiaction.common.cli.Argument; +import com.antiaction.common.cli.ArgumentParser; +import com.antiaction.common.cli.ArgumentParserException; import com.antiaction.common.cli.CommandLine; +import com.antiaction.common.cli.Options; public class DecompressTaskCLI extends TaskCLI { @@ -28,7 +33,50 @@ public void show_help() { @Override public void runtask(CommandLine cmdLine) { DecompressTask task = new DecompressTask(); - task.runtask(DecompressTaskCLIParser.parseArguments(cmdLine)); + DecompressOptions options = parseArguments(cmdLine); + task.runtask(options); + } + + public static DecompressOptions parseArguments(CommandLine cmdLine) { + Options cliOptions = new Options(); + try { + cliOptions.addOption(null, "--queue-first", JWATTools.A_QUEUE_FIRST, 0, null); + cliOptions.addOption("-w", "--workers", JWATTools.A_WORKERS, 0, null).setValueRequired(); + cliOptions.addNamedArgument("files", JWATTools.A_FILES, 1, Integer.MAX_VALUE); + cmdLine = ArgumentParser.parse(cmdLine.argsArray, cliOptions, cmdLine); + } + catch (ArgumentParserException e) { + System.out.println( DecompressTaskCLI.class.getName() + ": " + e.getMessage() ); + System.exit( 1 ); + } + + DecompressOptions options = new DecompressOptions(); + + Argument argument; + + // Queue first. + options.bQueueFirst = cmdLine.idMap.containsKey(JWATTools.A_QUEUE_FIRST); + + // Thread workers. + argument = cmdLine.idMap.get( JWATTools.A_WORKERS ); + if ( argument != null && argument.value != null ) { + try { + options.threads = Integer.parseInt(argument.value); + } catch (NumberFormatException e) { + System.out.println( "Invalid number of threads requested: " + argument.value ); + System.exit( 1 ); + } + } + if ( options.threads < 1 ) { + System.out.println( "Invalid number of threads requested: " + options.threads ); + System.exit( 1 ); + } + + // Files. + argument = cmdLine.idMap.get( JWATTools.A_FILES ); + options.filesList = argument.values; + + return options; } } diff --git a/src/main/java/org/jwat/tools/tasks/decompress/DecompressTaskCLIParser.java b/src/main/java/org/jwat/tools/tasks/decompress/DecompressTaskCLIParser.java deleted file mode 100644 index aa98cd1..0000000 --- a/src/main/java/org/jwat/tools/tasks/decompress/DecompressTaskCLIParser.java +++ /dev/null @@ -1,58 +0,0 @@ -package org.jwat.tools.tasks.decompress; - -import org.jwat.tools.JWATTools; - -import com.antiaction.common.cli.Argument; -import com.antiaction.common.cli.ArgumentParserException; -import com.antiaction.common.cli.ArgumentParser; -import com.antiaction.common.cli.CommandLine; -import com.antiaction.common.cli.Options; - -public class DecompressTaskCLIParser { - - protected DecompressTaskCLIParser() { - } - - public static DecompressOptions parseArguments(CommandLine cmdLine) { - Options cliOptions = new Options(); - try { - cliOptions.addOption(null, "--queue-first", JWATTools.A_QUEUE_FIRST, 0, null); - cliOptions.addOption("-w", "--workers", JWATTools.A_WORKERS, 0, null).setValueRequired(); - cliOptions.addNamedArgument("files", JWATTools.A_FILES, 1, Integer.MAX_VALUE); - cmdLine = ArgumentParser.parse(cmdLine.argsArray, cliOptions, cmdLine); - } - catch (ArgumentParserException e) { - System.out.println( DecompressTaskCLIParser.class.getName() + ": " + e.getMessage() ); - System.exit( 1 ); - } - - DecompressOptions options = new DecompressOptions(); - - Argument argument; - - // Queue first. - options.bQueueFirst = cmdLine.idMap.containsKey(JWATTools.A_QUEUE_FIRST); - - // Thread workers. - argument = cmdLine.idMap.get( JWATTools.A_WORKERS ); - if ( argument != null && argument.value != null ) { - try { - options.threads = Integer.parseInt(argument.value); - } catch (NumberFormatException e) { - System.out.println( "Invalid number of threads requested: " + argument.value ); - System.exit( 1 ); - } - } - if ( options.threads < 1 ) { - System.out.println( "Invalid number of threads requested: " + options.threads ); - System.exit( 1 ); - } - - // Files. - argument = cmdLine.idMap.get( JWATTools.A_FILES ); - options.filesList = argument.values; - - return options; - } - -} diff --git a/src/main/java/org/jwat/tools/tasks/delete/DeleteTaskCLI.java b/src/main/java/org/jwat/tools/tasks/delete/DeleteTaskCLI.java index f0e73a8..5fda39b 100644 --- a/src/main/java/org/jwat/tools/tasks/delete/DeleteTaskCLI.java +++ b/src/main/java/org/jwat/tools/tasks/delete/DeleteTaskCLI.java @@ -1,8 +1,15 @@ package org.jwat.tools.tasks.delete; +import java.io.File; + +import org.jwat.tools.JWATTools; import org.jwat.tools.tasks.TaskCLI; +import com.antiaction.common.cli.Argument; +import com.antiaction.common.cli.ArgumentParser; +import com.antiaction.common.cli.ArgumentParserException; import com.antiaction.common.cli.CommandLine; +import com.antiaction.common.cli.Options; public class DeleteTaskCLI extends TaskCLI { @@ -28,7 +35,56 @@ public void show_help() { @Override public void runtask(CommandLine cmdLine) { DeleteTask task = new DeleteTask(); - task.runtask(DeleteTaskCLIParser.parseArguments(cmdLine)); + DeleteOptions options = parseArguments(cmdLine); + task.runtask(options); + } + + public static final int A_OUTPUT = 101; + public static final int A_DRYRUN = 102; + + public static DeleteOptions parseArguments(CommandLine cmdLine) { + Options cliOptions = new Options(); + try { + cliOptions.addOption("-o", null, A_OUTPUT, 0, null).setValueRequired(); + cliOptions.addOption(null, "--dryrun", A_DRYRUN, 0, null); + cliOptions.addNamedArgument("files", JWATTools.A_FILES, 1, Integer.MAX_VALUE); + cmdLine = ArgumentParser.parse(cmdLine.argsArray, cliOptions, cmdLine); + } + catch (ArgumentParserException e) { + System.out.println( DeleteTaskCLI.class.getName() + ": " + e.getMessage() ); + System.exit( 1 ); + } + + DeleteOptions options = new DeleteOptions(); + + Argument argument; + + // Output file. + argument = cmdLine.idMap.get( A_OUTPUT ); + if ( argument != null && argument.value != null ) { + options.outputFile = new File(argument.value); + if (options.outputFile.isDirectory()) { + System.out.println("Can not output to a directory!"); + System.exit(1); + } else if (options.outputFile.getParentFile() != null && !options.outputFile.getParentFile().exists()) { + if (!options.outputFile.getParentFile().mkdirs()) { + System.out.println("Could not create parent directories!"); + System.exit(1); + } + } + } + + // Test run. + if ( cmdLine.idMap.containsKey( A_DRYRUN ) ) { + options.bDryRun = true; + } + System.out.println("Test run: " + options.bDryRun); + + // Files. + argument = cmdLine.idMap.get( JWATTools.A_FILES ); + options.filesList = argument.values; + + return options; } } diff --git a/src/main/java/org/jwat/tools/tasks/delete/DeleteTaskCLIParser.java b/src/main/java/org/jwat/tools/tasks/delete/DeleteTaskCLIParser.java deleted file mode 100644 index a837703..0000000 --- a/src/main/java/org/jwat/tools/tasks/delete/DeleteTaskCLIParser.java +++ /dev/null @@ -1,66 +0,0 @@ -package org.jwat.tools.tasks.delete; - -import java.io.File; - -import org.jwat.tools.JWATTools; - -import com.antiaction.common.cli.Argument; -import com.antiaction.common.cli.ArgumentParserException; -import com.antiaction.common.cli.ArgumentParser; -import com.antiaction.common.cli.CommandLine; -import com.antiaction.common.cli.Options; - -public class DeleteTaskCLIParser { - - public static final int A_OUTPUT = 101; - public static final int A_DRYRUN = 102; - - protected DeleteTaskCLIParser() { - } - - public static DeleteOptions parseArguments(CommandLine cmdLine) { - Options cliOptions = new Options(); - try { - cliOptions.addOption("-o", null, A_OUTPUT, 0, null).setValueRequired(); - cliOptions.addOption(null, "--dryrun", A_DRYRUN, 0, null); - cliOptions.addNamedArgument("files", JWATTools.A_FILES, 1, Integer.MAX_VALUE); - cmdLine = ArgumentParser.parse(cmdLine.argsArray, cliOptions, cmdLine); - } - catch (ArgumentParserException e) { - System.out.println( DeleteTaskCLIParser.class.getName() + ": " + e.getMessage() ); - System.exit( 1 ); - } - - DeleteOptions options = new DeleteOptions(); - - Argument argument; - - // Output file. - argument = cmdLine.idMap.get( A_OUTPUT ); - if ( argument != null && argument.value != null ) { - options.outputFile = new File(argument.value); - if (options.outputFile.isDirectory()) { - System.out.println("Can not output to a directory!"); - System.exit(1); - } else if (options.outputFile.getParentFile() != null && !options.outputFile.getParentFile().exists()) { - if (!options.outputFile.getParentFile().mkdirs()) { - System.out.println("Could not create parent directories!"); - System.exit(1); - } - } - } - - // Test run. - if ( cmdLine.idMap.containsKey( A_DRYRUN ) ) { - options.bDryRun = true; - } - System.out.println("Test run: " + options.bDryRun); - - // Files. - argument = cmdLine.idMap.get( JWATTools.A_FILES ); - options.filesList = argument.values; - - return options; - } - -} diff --git a/src/main/java/org/jwat/tools/tasks/digest/DigestOptions.java b/src/main/java/org/jwat/tools/tasks/digest/DigestOptions.java new file mode 100644 index 0000000..55bf481 --- /dev/null +++ b/src/main/java/org/jwat/tools/tasks/digest/DigestOptions.java @@ -0,0 +1,9 @@ +package org.jwat.tools.tasks.digest; + +import java.util.List; + +public class DigestOptions { + + public List filesList; + +} diff --git a/src/main/java/org/jwat/tools/tasks/digest/DigestTask.java b/src/main/java/org/jwat/tools/tasks/digest/DigestTask.java new file mode 100644 index 0000000..db695b4 --- /dev/null +++ b/src/main/java/org/jwat/tools/tasks/digest/DigestTask.java @@ -0,0 +1,70 @@ +package org.jwat.tools.tasks.digest; + +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; + +import org.jwat.common.Base16; +import org.jwat.common.Base32; +import org.jwat.common.Base64; +import org.jwat.tools.tasks.AbstractTask; + +import it.unimi.dsi.fastutil.io.FastBufferedInputStream; + +public class DigestTask extends AbstractTask { + + private DigestOptions options; + + public DigestTask() { + } + + public void runtask(DigestOptions options) { + this.options = options; + filelist_feeder( options.filesList, this ); + } + + private byte[] isBuffer = new byte[65536]; + + private byte[] readBuffer = new byte[65536]; + + @Override + public void process(File srcFile) { + MessageDigest md = null; + byte[] digest; + FastBufferedInputStream in = null; + String digestAlgorithm = "SHA-1"; + try { + md = MessageDigest.getInstance(digestAlgorithm); + } + catch (NoSuchAlgorithmException e) { + e.printStackTrace(); + System.exit(-1); + } + int read; + try { + in = new FastBufferedInputStream(new FileInputStream(srcFile), isBuffer); + while ((read = in.read(readBuffer)) != -1) { + md.update(readBuffer, 0, read); + } + digest = md.digest(); + System.out.println(digestAlgorithm + ":" + Base16.encodeArray(digest) + " (base16/hex)"); + System.out.println(digestAlgorithm + ":" + Base32.encodeArray(digest) + " (base32)"); + System.out.println(digestAlgorithm + ":" + Base64.encodeArray(digest) + " (base64)"); + } + catch (IOException e) { + e.printStackTrace(); + } + finally { + if (in != null) { + try { + in.close(); + } + catch (IOException e) { + } + } + } + } + +} diff --git a/src/main/java/org/jwat/tools/tasks/digest/DigestTaskCLI.java b/src/main/java/org/jwat/tools/tasks/digest/DigestTaskCLI.java new file mode 100644 index 0000000..897c4c1 --- /dev/null +++ b/src/main/java/org/jwat/tools/tasks/digest/DigestTaskCLI.java @@ -0,0 +1,66 @@ +package org.jwat.tools.tasks.digest; + +import java.util.LinkedList; + +import org.jwat.tools.JWATTools; +import org.jwat.tools.tasks.TaskCLI; + +import com.antiaction.common.cli.Argument; +import com.antiaction.common.cli.ArgumentParser; +import com.antiaction.common.cli.ArgumentParserException; +import com.antiaction.common.cli.CommandLine; +import com.antiaction.common.cli.Options; + +public class DigestTaskCLI extends TaskCLI { + + public static final String commandName = "digest"; + + public static final String commandDescription = "digest calculation"; + + @Override + public void show_help() { + System.out.println("jwattools [-o] digest "); + System.out.println(""); + System.out.println("digest file(s)"); + System.out.println(""); + System.out.println("\tDigest file(s)."); + /* + System.out.println(""); + System.out.println("options:"); + System.out.println(""); + */ + } + + @Override + public void runtask(CommandLine cmdLine) { + DigestTask task = new DigestTask(); + DigestOptions options = parseArguments(cmdLine); + task.runtask(options); + } + + public static DigestOptions parseArguments(CommandLine cmdLine) { + Options cliOptions = new Options(); + try { + cliOptions.addNamedArgument( "files", JWATTools.A_FILES, 1, 1 ); + cmdLine = ArgumentParser.parse(cmdLine.argsArray, cliOptions, cmdLine); + } + catch (ArgumentParserException e) { + System.out.println( DigestTaskCLI.class.getName() + ": " + e.getMessage() ); + System.exit( 1 ); + } + + DigestOptions options = new DigestOptions(); + + Argument argument; + String tmpStr; + + // Files + argument = cmdLine.idMap.get( JWATTools.A_FILES ); + options.filesList = new LinkedList(); + //options.filesList = argument.values; + options.filesList.add( argument.value ); + + return options; + } + +} diff --git a/src/main/java/org/jwat/tools/tasks/extract/ExtractTaskCLI.java b/src/main/java/org/jwat/tools/tasks/extract/ExtractTaskCLI.java index e3ba340..fc0bd7a 100644 --- a/src/main/java/org/jwat/tools/tasks/extract/ExtractTaskCLI.java +++ b/src/main/java/org/jwat/tools/tasks/extract/ExtractTaskCLI.java @@ -1,8 +1,13 @@ package org.jwat.tools.tasks.extract; +import org.jwat.tools.JWATTools; import org.jwat.tools.tasks.TaskCLI; +import com.antiaction.common.cli.Argument; +import com.antiaction.common.cli.ArgumentParser; +import com.antiaction.common.cli.ArgumentParserException; import com.antiaction.common.cli.CommandLine; +import com.antiaction.common.cli.Options; public class ExtractTaskCLI extends TaskCLI { @@ -26,7 +31,58 @@ public void show_help() { @Override public void runtask(CommandLine cmdLine) { ExtractTask task = new ExtractTask(); - task.runtask(ExtractTaskCLIParser.parseArguments(cmdLine)); + ExtractOptions options = parseArguments(cmdLine); + task.runtask(options); + } + + public static final int A_TARGET_URI = 101; + + public static ExtractOptions parseArguments(CommandLine cmdLine) { + Options cliOptions = new Options(); + try { + cliOptions.addOption(null, "--queue-first", JWATTools.A_QUEUE_FIRST, 0, null); + cliOptions.addOption("-w", "--workers", JWATTools.A_WORKERS, 0, null).setValueRequired(); + cliOptions.addOption("-u", null, A_TARGET_URI, 0, null).setValueRequired(); + cliOptions.addNamedArgument("files", JWATTools.A_FILES, 1, Integer.MAX_VALUE); + cmdLine = ArgumentParser.parse(cmdLine.argsArray, cliOptions, cmdLine); + } + catch (ArgumentParserException e) { + System.out.println( ExtractTaskCLI.class.getName() + ": " + e.getMessage() ); + System.exit( 1 ); + } + + ExtractOptions options = new ExtractOptions(); + + Argument argument; + + // Queue first. + options.bQueueFirst = cmdLine.idMap.containsKey(JWATTools.A_QUEUE_FIRST); + + // Thread workers. + argument = cmdLine.idMap.get( JWATTools.A_WORKERS ); + if ( argument != null && argument.value != null ) { + try { + options.threads = Integer.parseInt(argument.value); + } catch (NumberFormatException e) { + System.out.println( "Invalid number of threads requested: " + argument.value ); + System.exit( 1 ); + } + } + if ( options.threads < 1 ) { + System.out.println( "Invalid number of threads requested: " + options.threads ); + System.exit( 1 ); + } + + argument = cmdLine.idMap.get( A_TARGET_URI ); + if ( argument != null && argument.value != null ) { + options.targetUri = argument.value; + } + + // Files. + argument = cmdLine.idMap.get( JWATTools.A_FILES ); + options.filesList = argument.values; + + return options; } } diff --git a/src/main/java/org/jwat/tools/tasks/extract/ExtractTaskCLIParser.java b/src/main/java/org/jwat/tools/tasks/extract/ExtractTaskCLIParser.java deleted file mode 100644 index 75a668c..0000000 --- a/src/main/java/org/jwat/tools/tasks/extract/ExtractTaskCLIParser.java +++ /dev/null @@ -1,66 +0,0 @@ -package org.jwat.tools.tasks.extract; - -import org.jwat.tools.JWATTools; - -import com.antiaction.common.cli.Argument; -import com.antiaction.common.cli.ArgumentParserException; -import com.antiaction.common.cli.ArgumentParser; -import com.antiaction.common.cli.CommandLine; -import com.antiaction.common.cli.Options; - -public class ExtractTaskCLIParser { - - public static final int A_TARGET_URI = 101; - - protected ExtractTaskCLIParser() { - } - - public static ExtractOptions parseArguments(CommandLine cmdLine) { - Options cliOptions = new Options(); - try { - cliOptions.addOption(null, "--queue-first", JWATTools.A_QUEUE_FIRST, 0, null); - cliOptions.addOption("-w", "--workers", JWATTools.A_WORKERS, 0, null).setValueRequired(); - cliOptions.addOption("-u", null, A_TARGET_URI, 0, null).setValueRequired(); - cliOptions.addNamedArgument("files", JWATTools.A_FILES, 1, Integer.MAX_VALUE); - cmdLine = ArgumentParser.parse(cmdLine.argsArray, cliOptions, cmdLine); - } - catch (ArgumentParserException e) { - System.out.println( ExtractTaskCLIParser.class.getName() + ": " + e.getMessage() ); - System.exit( 1 ); - } - - ExtractOptions options = new ExtractOptions(); - - Argument argument; - - // Queue first. - options.bQueueFirst = cmdLine.idMap.containsKey(JWATTools.A_QUEUE_FIRST); - - // Thread workers. - argument = cmdLine.idMap.get( JWATTools.A_WORKERS ); - if ( argument != null && argument.value != null ) { - try { - options.threads = Integer.parseInt(argument.value); - } catch (NumberFormatException e) { - System.out.println( "Invalid number of threads requested: " + argument.value ); - System.exit( 1 ); - } - } - if ( options.threads < 1 ) { - System.out.println( "Invalid number of threads requested: " + options.threads ); - System.exit( 1 ); - } - - argument = cmdLine.idMap.get( A_TARGET_URI ); - if ( argument != null && argument.value != null ) { - options.targetUri = argument.value; - } - - // Files. - argument = cmdLine.idMap.get( JWATTools.A_FILES ); - options.filesList = argument.values; - - return options; - } - -} diff --git a/src/main/java/org/jwat/tools/tasks/headers2cdx/Headers2CDXTaskCLI.java b/src/main/java/org/jwat/tools/tasks/headers2cdx/Headers2CDXTaskCLI.java index a154be2..dac087a 100644 --- a/src/main/java/org/jwat/tools/tasks/headers2cdx/Headers2CDXTaskCLI.java +++ b/src/main/java/org/jwat/tools/tasks/headers2cdx/Headers2CDXTaskCLI.java @@ -1,8 +1,15 @@ package org.jwat.tools.tasks.headers2cdx; +import java.io.File; + +import org.jwat.tools.JWATTools; import org.jwat.tools.tasks.TaskCLI; +import com.antiaction.common.cli.Argument; +import com.antiaction.common.cli.ArgumentParser; +import com.antiaction.common.cli.ArgumentParserException; import com.antiaction.common.cli.CommandLine; +import com.antiaction.common.cli.Options; public class Headers2CDXTaskCLI extends TaskCLI { @@ -29,7 +36,68 @@ public void show_help() { @Override public void runtask(CommandLine cmdLine) { Headers2CDXTask task = new Headers2CDXTask(); - task.runtask(Headers2CDXTaskCLIParser.parseArguments(cmdLine)); + Headers2CDXOptions options = parseArguments(cmdLine); + task.runtask(options); + } + + public static final int A_OUTPUT = 101; + + public static Headers2CDXOptions parseArguments(CommandLine cmdLine) { + Options cliOptions = new Options(); + try { + cliOptions.addOption(null, "--queue-first", JWATTools.A_QUEUE_FIRST, 0, null); + cliOptions.addOption("-w", "--workers", JWATTools.A_WORKERS, 0, null).setValueRequired(); + cliOptions.addOption("-o", null, A_OUTPUT, 0, null).setValueRequired(); + cliOptions.addNamedArgument("files", JWATTools.A_FILES, 1, Integer.MAX_VALUE); + cmdLine = ArgumentParser.parse(cmdLine.argsArray, cliOptions, cmdLine); + } + catch (ArgumentParserException e) { + System.out.println( Headers2CDXTaskCLI.class.getName() + ": " + e.getMessage() ); + System.exit( 1 ); + } + + Headers2CDXOptions options = new Headers2CDXOptions(); + + Argument argument; + + // Queue first. + options.bQueueFirst = cmdLine.idMap.containsKey(JWATTools.A_QUEUE_FIRST); + + // Thread workers. + argument = cmdLine.idMap.get( JWATTools.A_WORKERS ); + if ( argument != null && argument.value != null ) { + try { + options.threads = Integer.parseInt(argument.value); + } catch (NumberFormatException e) { + System.out.println( "Invalid number of threads requested: " + argument.value ); + System.exit( 1 ); + } + } + if ( options.threads < 1 ) { + System.out.println( "Invalid number of threads requested: " + options.threads ); + System.exit( 1 ); + } + + // Output file. + argument = cmdLine.idMap.get( A_OUTPUT ); + if ( argument != null && argument.value != null ) { + options.outputFile = new File(argument.value); + if (options.outputFile.isDirectory()) { + System.out.println("Can not output to a directory!"); + System.exit(1); + } else if (options.outputFile.getParentFile() != null && !options.outputFile.getParentFile().exists()) { + if (!options.outputFile.getParentFile().mkdirs()) { + System.out.println("Could not create parent directories!"); + System.exit(1); + } + } + } + + // Files. + argument = cmdLine.idMap.get( JWATTools.A_FILES ); + options.filesList = argument.values; + + return options; } } diff --git a/src/main/java/org/jwat/tools/tasks/headers2cdx/Headers2CDXTaskCLIParser.java b/src/main/java/org/jwat/tools/tasks/headers2cdx/Headers2CDXTaskCLIParser.java deleted file mode 100644 index b156eeb..0000000 --- a/src/main/java/org/jwat/tools/tasks/headers2cdx/Headers2CDXTaskCLIParser.java +++ /dev/null @@ -1,78 +0,0 @@ -package org.jwat.tools.tasks.headers2cdx; - -import java.io.File; - -import org.jwat.tools.JWATTools; - -import com.antiaction.common.cli.Argument; -import com.antiaction.common.cli.ArgumentParser; -import com.antiaction.common.cli.ArgumentParserException; -import com.antiaction.common.cli.CommandLine; -import com.antiaction.common.cli.Options; - -public class Headers2CDXTaskCLIParser { - - public static final int A_OUTPUT = 101; - - protected Headers2CDXTaskCLIParser() { - } - - public static Headers2CDXOptions parseArguments(CommandLine cmdLine) { - Options cliOptions = new Options(); - try { - cliOptions.addOption(null, "--queue-first", JWATTools.A_QUEUE_FIRST, 0, null); - cliOptions.addOption("-w", "--workers", JWATTools.A_WORKERS, 0, null).setValueRequired(); - cliOptions.addOption("-o", null, A_OUTPUT, 0, null).setValueRequired(); - cliOptions.addNamedArgument("files", JWATTools.A_FILES, 1, Integer.MAX_VALUE); - cmdLine = ArgumentParser.parse(cmdLine.argsArray, cliOptions, cmdLine); - } - catch (ArgumentParserException e) { - System.out.println( Headers2CDXTaskCLIParser.class.getName() + ": " + e.getMessage() ); - System.exit( 1 ); - } - - Headers2CDXOptions options = new Headers2CDXOptions(); - - Argument argument; - - // Queue first. - options.bQueueFirst = cmdLine.idMap.containsKey(JWATTools.A_QUEUE_FIRST); - - // Thread workers. - argument = cmdLine.idMap.get( JWATTools.A_WORKERS ); - if ( argument != null && argument.value != null ) { - try { - options.threads = Integer.parseInt(argument.value); - } catch (NumberFormatException e) { - System.out.println( "Invalid number of threads requested: " + argument.value ); - System.exit( 1 ); - } - } - if ( options.threads < 1 ) { - System.out.println( "Invalid number of threads requested: " + options.threads ); - System.exit( 1 ); - } - - // Output file. - argument = cmdLine.idMap.get( A_OUTPUT ); - if ( argument != null && argument.value != null ) { - options.outputFile = new File(argument.value); - if (options.outputFile.isDirectory()) { - System.out.println("Can not output to a directory!"); - System.exit(1); - } else if (options.outputFile.getParentFile() != null && !options.outputFile.getParentFile().exists()) { - if (!options.outputFile.getParentFile().mkdirs()) { - System.out.println("Could not create parent directories!"); - System.exit(1); - } - } - } - - // Files. - argument = cmdLine.idMap.get( JWATTools.A_FILES ); - options.filesList = argument.values; - - return options; - } - -} diff --git a/src/main/java/org/jwat/tools/tasks/interval/IntervalTaskCLI.java b/src/main/java/org/jwat/tools/tasks/interval/IntervalTaskCLI.java index 114b28d..a92da96 100644 --- a/src/main/java/org/jwat/tools/tasks/interval/IntervalTaskCLI.java +++ b/src/main/java/org/jwat/tools/tasks/interval/IntervalTaskCLI.java @@ -1,8 +1,15 @@ package org.jwat.tools.tasks.interval; +import java.util.LinkedList; + +import org.jwat.tools.JWATTools; import org.jwat.tools.tasks.TaskCLI; +import com.antiaction.common.cli.Argument; +import com.antiaction.common.cli.ArgumentParser; +import com.antiaction.common.cli.ArgumentParserException; import com.antiaction.common.cli.CommandLine; +import com.antiaction.common.cli.Options; public class IntervalTaskCLI extends TaskCLI { @@ -18,7 +25,7 @@ public void show_help() { System.out.println(""); System.out.println("\tSkips data up to offset1 and save data to file until offset2 is reached."); System.out.println("\tOffset1/2 can be decimal or hexadecimal ($ or 0x)."); - System.out.println("\tOffset2 can also be a length-ofsset (+)."); + System.out.println("\tOffset2 can also be a length-offset (+ +$ +0x)."); /* System.out.println(""); System.out.println("options:"); @@ -29,7 +36,86 @@ public void show_help() { @Override public void runtask(CommandLine cmdLine) { IntervalTask task = new IntervalTask(); - task.runtask(IntervalTaskCLIParser.parseArguments(cmdLine)); + IntervalOptions options = parseArguments(cmdLine); + task.runtask(options); + } + + public static final int A_OFFSET1 = 101; + public static final int A_OFFSET2 = 102; + public static final int A_DSTFILE = 103; + + public static IntervalOptions parseArguments(CommandLine cmdLine) { + Options cliOptions = new Options(); + try { + cliOptions.addNamedArgument( "offset1", A_OFFSET1, 1, 1); + cliOptions.addNamedArgument( "offset2", A_OFFSET2, 1, 1); + cliOptions.addNamedArgument( "files", JWATTools.A_FILES, 1, 1 ); + cliOptions.addNamedArgument( "dstfile", A_DSTFILE, 1, 1); + cmdLine = ArgumentParser.parse(cmdLine.argsArray, cliOptions, cmdLine); + } + catch (ArgumentParserException e) { + System.out.println( IntervalTaskCLI.class.getName() + ": " + e.getMessage() ); + System.exit( 1 ); + } + + IntervalOptions options = new IntervalOptions(); + + Argument argument; + String tmpStr; + + argument = cmdLine.idMap.get( A_OFFSET1 ); + tmpStr = argument.value.toLowerCase(); + try { + if (tmpStr.startsWith("$")) { + options.sIdx = Long.parseLong(tmpStr.substring(1), 16); + } + else if (tmpStr.startsWith("0x")) { + options.sIdx = Long.parseLong(tmpStr.substring(2), 16); + } + else { + options.sIdx = Long.parseLong(tmpStr); + } + } + catch (NumberFormatException e) { + System.out.println("Incorrect sidx!"); + System.exit(1); + } + + argument = cmdLine.idMap.get( A_OFFSET2 ); + tmpStr = argument.value.toLowerCase(); + options.bPlusEIdx = tmpStr.startsWith("+"); + if (options.bPlusEIdx) { + tmpStr = tmpStr.substring(1); + } + try { + if (tmpStr.startsWith("$")) { + options.eIdx = Long.parseLong(tmpStr.substring(1), 16); + } + else if (tmpStr.startsWith("0x")) { + options.eIdx = Long.parseLong(tmpStr.substring(2), 16); + } + else { + options.eIdx = Long.parseLong(tmpStr); + } + if (options.bPlusEIdx) { + options.eIdx += options.sIdx; + } + } + catch (NumberFormatException e) { + System.out.println("Incorrect sidx!"); + System.exit(1); + } + + // Files + argument = cmdLine.idMap.get( JWATTools.A_FILES ); + options.filesList = new LinkedList(); + //options.filesList = argument.values; + options.filesList.add( argument.value ); + + argument = cmdLine.idMap.get( A_DSTFILE ); + options.dstName = argument.value; + + return options; } } diff --git a/src/main/java/org/jwat/tools/tasks/interval/IntervalTaskCLIParser.java b/src/main/java/org/jwat/tools/tasks/interval/IntervalTaskCLIParser.java deleted file mode 100644 index 8910f0e..0000000 --- a/src/main/java/org/jwat/tools/tasks/interval/IntervalTaskCLIParser.java +++ /dev/null @@ -1,96 +0,0 @@ -package org.jwat.tools.tasks.interval; - -import java.util.LinkedList; - -import org.jwat.tools.JWATTools; - -import com.antiaction.common.cli.Argument; -import com.antiaction.common.cli.ArgumentParserException; -import com.antiaction.common.cli.ArgumentParser; -import com.antiaction.common.cli.CommandLine; -import com.antiaction.common.cli.Options; - -public class IntervalTaskCLIParser { - - public static final int A_OFFSET1 = 101; - public static final int A_OFFSET2 = 102; - public static final int A_DSTFILE = 103; - - protected IntervalTaskCLIParser() { - } - - public static IntervalOptions parseArguments(CommandLine cmdLine) { - Options cliOptions = new Options(); - try { - cliOptions.addNamedArgument( "offset1", A_OFFSET1, 1, 1); - cliOptions.addNamedArgument( "offset2", A_OFFSET2, 1, 1); - cliOptions.addNamedArgument( "files", JWATTools.A_FILES, 1, 1 ); - cliOptions.addNamedArgument( "dstfile", A_DSTFILE, 1, 1); - cmdLine = ArgumentParser.parse(cmdLine.argsArray, cliOptions, cmdLine); - } - catch (ArgumentParserException e) { - System.out.println( IntervalTaskCLIParser.class.getName() + ": " + e.getMessage() ); - System.exit( 1 ); - } - - IntervalOptions options = new IntervalOptions(); - - Argument argument; - String tmpStr; - - argument = cmdLine.idMap.get( A_OFFSET1 ); - tmpStr = argument.value.toLowerCase(); - try { - if (tmpStr.startsWith("$")) { - options.sIdx = Long.parseLong(tmpStr.substring(1), 16); - } - else if (tmpStr.startsWith("0x")) { - options.sIdx = Long.parseLong(tmpStr.substring(2), 16); - } - else { - options.sIdx = Long.parseLong(tmpStr); - } - } - catch (NumberFormatException e) { - System.out.println("Incorrect sidx!"); - System.exit(1); - } - - argument = cmdLine.idMap.get( A_OFFSET2 ); - tmpStr = argument.value.toLowerCase(); - options.bPlusEIdx = tmpStr.startsWith("+"); - if (options.bPlusEIdx) { - tmpStr = tmpStr.substring(1); - } - try { - if (tmpStr.startsWith("$")) { - options.eIdx = Long.parseLong(tmpStr.substring(1), 16); - } - else if (tmpStr.startsWith("0x")) { - options.eIdx = Long.parseLong(tmpStr.substring(2), 16); - } - else { - options.eIdx = Long.parseLong(tmpStr); - } - if (options.bPlusEIdx) { - options.eIdx += options.sIdx; - } - } - catch (NumberFormatException e) { - System.out.println("Incorrect sidx!"); - System.exit(1); - } - - // Files - argument = cmdLine.idMap.get( JWATTools.A_FILES ); - options.filesList = new LinkedList(); - //options.filesList = argument.values; - options.filesList.add( argument.value ); - - argument = cmdLine.idMap.get( A_DSTFILE ); - options.dstName = argument.value; - - return options; - } - -} diff --git a/src/main/java/org/jwat/tools/tasks/pathindex/PathIndexTaskCLI.java b/src/main/java/org/jwat/tools/tasks/pathindex/PathIndexTaskCLI.java index 47c5383..958f7d1 100644 --- a/src/main/java/org/jwat/tools/tasks/pathindex/PathIndexTaskCLI.java +++ b/src/main/java/org/jwat/tools/tasks/pathindex/PathIndexTaskCLI.java @@ -1,8 +1,15 @@ package org.jwat.tools.tasks.pathindex; +import java.io.File; + +import org.jwat.tools.JWATTools; import org.jwat.tools.tasks.TaskCLI; +import com.antiaction.common.cli.Argument; +import com.antiaction.common.cli.ArgumentParser; +import com.antiaction.common.cli.ArgumentParserException; import com.antiaction.common.cli.CommandLine; +import com.antiaction.common.cli.Options; public class PathIndexTaskCLI extends TaskCLI { @@ -27,7 +34,47 @@ public void show_help() { @Override public void runtask(CommandLine cmdLine) { PathIndexTask task = new PathIndexTask(); - task.runtask(PathIndexTaskCLIParser.parseArguments(cmdLine)); + PathIndexOptions options = parseArguments(cmdLine); + task.runtask(options); + } + + public static final int A_OUTPUT = 101; + + public static PathIndexOptions parseArguments(CommandLine cmdLine) { + Options cliOptions = new Options(); + try { + cliOptions.addOption("-o", null, A_OUTPUT, 0, null).setValueRequired(); + cliOptions.addNamedArgument("files", JWATTools.A_FILES, 1, Integer.MAX_VALUE); + cmdLine = ArgumentParser.parse(cmdLine.argsArray, cliOptions, cmdLine); + } + catch (ArgumentParserException e) { + System.out.println( PathIndexTaskCLI.class.getName() + ": " + e.getMessage() ); + System.exit( 1 ); + } + + PathIndexOptions options = new PathIndexOptions(); + Argument argument; + + // Output file. + argument = cmdLine.idMap.get( A_OUTPUT ); + if ( argument != null && argument.value != null ) { + options.outputFile = new File(argument.value); + if (options.outputFile.isDirectory()) { + System.out.println("Can not output to a directory!"); + System.exit(1); + } else if (options.outputFile.getParentFile() != null && !options.outputFile.getParentFile().exists()) { + if (!options.outputFile.getParentFile().mkdirs()) { + System.out.println("Could not create parent directories!"); + System.exit(1); + } + } + } + + // Files. + argument = cmdLine.idMap.get( JWATTools.A_FILES ); + options.filesList = argument.values; + + return options; } } diff --git a/src/main/java/org/jwat/tools/tasks/pathindex/PathIndexTaskCLIParser.java b/src/main/java/org/jwat/tools/tasks/pathindex/PathIndexTaskCLIParser.java deleted file mode 100644 index ab2e9e0..0000000 --- a/src/main/java/org/jwat/tools/tasks/pathindex/PathIndexTaskCLIParser.java +++ /dev/null @@ -1,57 +0,0 @@ -package org.jwat.tools.tasks.pathindex; - -import java.io.File; - -import org.jwat.tools.JWATTools; - -import com.antiaction.common.cli.Argument; -import com.antiaction.common.cli.ArgumentParserException; -import com.antiaction.common.cli.ArgumentParser; -import com.antiaction.common.cli.CommandLine; -import com.antiaction.common.cli.Options; - -public class PathIndexTaskCLIParser { - - public static final int A_OUTPUT = 101; - - protected PathIndexTaskCLIParser() { - } - - public static PathIndexOptions parseArguments(CommandLine cmdLine) { - Options cliOptions = new Options(); - try { - cliOptions.addOption("-o", null, A_OUTPUT, 0, null).setValueRequired(); - cliOptions.addNamedArgument("files", JWATTools.A_FILES, 1, Integer.MAX_VALUE); - cmdLine = ArgumentParser.parse(cmdLine.argsArray, cliOptions, cmdLine); - } - catch (ArgumentParserException e) { - System.out.println( PathIndexTaskCLIParser.class.getName() + ": " + e.getMessage() ); - System.exit( 1 ); - } - - PathIndexOptions options = new PathIndexOptions(); - Argument argument; - - // Output file. - argument = cmdLine.idMap.get( A_OUTPUT ); - if ( argument != null && argument.value != null ) { - options.outputFile = new File(argument.value); - if (options.outputFile.isDirectory()) { - System.out.println("Can not output to a directory!"); - System.exit(1); - } else if (options.outputFile.getParentFile() != null && !options.outputFile.getParentFile().exists()) { - if (!options.outputFile.getParentFile().mkdirs()) { - System.out.println("Could not create parent directories!"); - System.exit(1); - } - } - } - - // Files. - argument = cmdLine.idMap.get( JWATTools.A_FILES ); - options.filesList = argument.values; - - return options; - } - -} diff --git a/src/main/java/org/jwat/tools/tasks/test/TestTaskCLI.java b/src/main/java/org/jwat/tools/tasks/test/TestTaskCLI.java index f24c7e2..d22c16c 100644 --- a/src/main/java/org/jwat/tools/tasks/test/TestTaskCLI.java +++ b/src/main/java/org/jwat/tools/tasks/test/TestTaskCLI.java @@ -1,8 +1,20 @@ package org.jwat.tools.tasks.test; +import java.text.DateFormat; +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.Date; + +import org.jwat.common.UriProfile; +import org.jwat.tools.JWATTools; import org.jwat.tools.tasks.TaskCLI; +import org.jwat.tools.validators.XmlValidatorPlugin; +import com.antiaction.common.cli.Argument; +import com.antiaction.common.cli.ArgumentParser; +import com.antiaction.common.cli.ArgumentParserException; import com.antiaction.common.cli.CommandLine; +import com.antiaction.common.cli.Options; public class TestTaskCLI extends TaskCLI { @@ -32,7 +44,109 @@ public void show_help() { @Override public void runtask(CommandLine cmdLine) { TestTask task = new TestTask(); - task.runtask(TestTaskCLIParser.parseArguments(cmdLine)); + TestOptions options = parseArguments(cmdLine); + task.runtask(options); + } + + public static final int A_AFTER = 101; + public static final int A_BAD = 102; + public static final int A_SHOW_ERRORS = 103; + public static final int A_IGNORE_DIGEST = 104; + public static final int A_HTTP_HEADER_ERRORS = 105; + public static final int A_LAX = 106; + public static final int A_XML = 107; + + public static TestOptions parseArguments(CommandLine cmdLine) { + Options cliOptions = new Options(); + try { + cliOptions.addOption(null, "--queue-first", JWATTools.A_QUEUE_FIRST, 0, null); + cliOptions.addOption("-w", "--workers", JWATTools.A_WORKERS, 0, null).setValueRequired(); + cliOptions.addOption("-a", null, A_AFTER, 0, null).setValueRequired(); + cliOptions.addOption("-b", null, A_BAD, 0, null); + cliOptions.addOption("-e", null, A_SHOW_ERRORS, 0, null); + cliOptions.addOption("-h", null, A_HTTP_HEADER_ERRORS, 0, null); + cliOptions.addOption("-i", "--ignore-digest", A_IGNORE_DIGEST, 0, null); + cliOptions.addOption("-l", null, A_LAX, 0, null); + cliOptions.addOption("-x", null, A_XML, 0, null); + cliOptions.addNamedArgument("files", JWATTools.A_FILES, 1, Integer.MAX_VALUE); + cmdLine = ArgumentParser.parse(cmdLine.argsArray, cliOptions, cmdLine); + } + catch (ArgumentParserException e) { + System.out.println(TestTaskCLI.class.getName() + ": " + e.getMessage()); + System.exit(1); + } + + TestOptions options = new TestOptions(); + + Argument argument; + + // Queue first. + options.bQueueFirst = cmdLine.idMap.containsKey(JWATTools.A_QUEUE_FIRST); + options.bHttpHeaderErrors = cmdLine.idMap.containsKey(A_HTTP_HEADER_ERRORS); + + // Thread workers. + argument = cmdLine.idMap.get( JWATTools.A_WORKERS ); + if ( argument != null && argument.value != null ) { + try { + options.threads = Integer.parseInt(argument.value); + } catch (NumberFormatException e) { + System.out.println( "Invalid number of threads requested: " + argument.value ); + System.exit( 1 ); + } + } + if ( options.threads < 1 ) { + System.out.println( "Invalid number of threads requested: " + options.threads ); + System.exit( 1 ); + } + + // Show errors. + if ( cmdLine.idMap.containsKey( A_SHOW_ERRORS ) ) { + options.bShowErrors = true; + } + System.out.println("Showing errors: " + options.bShowErrors); + + // Ignore digest. + if ( cmdLine.idMap.containsKey( A_IGNORE_DIGEST ) ) { + options.bValidateDigest = false; + } + System.out.println("Validate digest: " + options.bValidateDigest); + + // Relaxed URI validation. + if ( cmdLine.idMap.containsKey( A_LAX ) ) { + options.uriProfile = UriProfile.RFC3986_ABS_16BIT_LAX; + System.out.println("Using relaxed URI validation for ARC URL and WARC Target-URI."); + } + + // XML validation. + if ( cmdLine.idMap.containsKey( A_XML ) ) { + options.validatorPlugins.add(new XmlValidatorPlugin()); + } + + // Tag. + if ( cmdLine.idMap.containsKey( A_BAD ) ) { + options.bBad = true; + System.out.println("Tagging enabled for invalid files"); + } + + // After. + argument = cmdLine.idMap.get( A_AFTER ); + if ( argument != null && argument.value != null ) { + try { + DateFormat dateFormat = new SimpleDateFormat("yyyyMMddHHmmss"); + dateFormat.setLenient(false); + Date afterDate = dateFormat.parse(argument.value); + options.after = afterDate.getTime(); + } catch (ParseException e) { + System.out.println("Invalid date format - " + argument.value); + System.exit( 1 ); + } + } + + // Files. + argument = cmdLine.idMap.get( JWATTools.A_FILES ); + options.filesList = argument.values; + + return options; } } diff --git a/src/main/java/org/jwat/tools/tasks/test/TestTaskCLIParser.java b/src/main/java/org/jwat/tools/tasks/test/TestTaskCLIParser.java deleted file mode 100644 index f8457f7..0000000 --- a/src/main/java/org/jwat/tools/tasks/test/TestTaskCLIParser.java +++ /dev/null @@ -1,124 +0,0 @@ -package org.jwat.tools.tasks.test; - -import java.text.DateFormat; -import java.text.ParseException; -import java.text.SimpleDateFormat; -import java.util.Date; - -import org.jwat.common.UriProfile; -import org.jwat.tools.JWATTools; -import org.jwat.tools.validators.XmlValidatorPlugin; - -import com.antiaction.common.cli.Argument; -import com.antiaction.common.cli.ArgumentParser; -import com.antiaction.common.cli.ArgumentParserException; -import com.antiaction.common.cli.CommandLine; -import com.antiaction.common.cli.Options; - -public class TestTaskCLIParser { - - public static final int A_AFTER = 101; - public static final int A_BAD = 102; - public static final int A_SHOW_ERRORS = 103; - public static final int A_IGNORE_DIGEST = 104; - public static final int A_HTTP_HEADER_ERRORS = 105; - public static final int A_LAX = 106; - public static final int A_XML = 107; - - protected TestTaskCLIParser() { - } - - public static TestOptions parseArguments(CommandLine cmdLine) { - Options cliOptions = new Options(); - try { - cliOptions.addOption(null, "--queue-first", JWATTools.A_QUEUE_FIRST, 0, null); - cliOptions.addOption("-w", "--workers", JWATTools.A_WORKERS, 0, null).setValueRequired(); - cliOptions.addOption("-a", null, A_AFTER, 0, null).setValueRequired(); - cliOptions.addOption("-b", null, A_BAD, 0, null); - cliOptions.addOption("-e", null, A_SHOW_ERRORS, 0, null); - cliOptions.addOption("-h", null, A_HTTP_HEADER_ERRORS, 0, null); - cliOptions.addOption("-i", "--ignore-digest", A_IGNORE_DIGEST, 0, null); - cliOptions.addOption("-l", null, A_LAX, 0, null); - cliOptions.addOption("-x", null, A_XML, 0, null); - cliOptions.addNamedArgument("files", JWATTools.A_FILES, 1, Integer.MAX_VALUE); - cmdLine = ArgumentParser.parse(cmdLine.argsArray, cliOptions, cmdLine); - } - catch (ArgumentParserException e) { - System.out.println( TestTaskCLIParser.class.getName() + ": " + e.getMessage() ); - System.exit( 1 ); - } - - TestOptions options = new TestOptions(); - - Argument argument; - - // Queue first. - options.bQueueFirst = cmdLine.idMap.containsKey(JWATTools.A_QUEUE_FIRST); - options.bHttpHeaderErrors = cmdLine.idMap.containsKey(A_HTTP_HEADER_ERRORS); - - // Thread workers. - argument = cmdLine.idMap.get( JWATTools.A_WORKERS ); - if ( argument != null && argument.value != null ) { - try { - options.threads = Integer.parseInt(argument.value); - } catch (NumberFormatException e) { - System.out.println( "Invalid number of threads requested: " + argument.value ); - System.exit( 1 ); - } - } - if ( options.threads < 1 ) { - System.out.println( "Invalid number of threads requested: " + options.threads ); - System.exit( 1 ); - } - - // Show errors. - if ( cmdLine.idMap.containsKey( A_SHOW_ERRORS ) ) { - options.bShowErrors = true; - } - System.out.println("Showing errors: " + options.bShowErrors); - - // Ignore digest. - if ( cmdLine.idMap.containsKey( A_IGNORE_DIGEST ) ) { - options.bValidateDigest = false; - } - System.out.println("Validate digest: " + options.bValidateDigest); - - // Relaxed URI validation. - if ( cmdLine.idMap.containsKey( A_LAX ) ) { - options.uriProfile = UriProfile.RFC3986_ABS_16BIT_LAX; - System.out.println("Using relaxed URI validation for ARC URL and WARC Target-URI."); - } - - // XML validation. - if ( cmdLine.idMap.containsKey( A_XML ) ) { - options.validatorPlugins.add(new XmlValidatorPlugin()); - } - - // Tag. - if ( cmdLine.idMap.containsKey( A_BAD ) ) { - options.bBad = true; - System.out.println("Tagging enabled for invalid files"); - } - - // After. - argument = cmdLine.idMap.get( A_AFTER ); - if ( argument != null && argument.value != null ) { - try { - DateFormat dateFormat = new SimpleDateFormat("yyyyMMddHHmmss"); - dateFormat.setLenient(false); - Date afterDate = dateFormat.parse(argument.value); - options.after = afterDate.getTime(); - } catch (ParseException e) { - System.out.println("Invalid date format - " + argument.value); - System.exit( 1 ); - } - } - - // Files. - argument = cmdLine.idMap.get( JWATTools.A_FILES ); - options.filesList = argument.values; - - return options; - } - -} diff --git a/src/main/java/org/jwat/tools/tasks/unchunk/UnchunkOptions.java b/src/main/java/org/jwat/tools/tasks/unchunk/UnchunkOptions.java new file mode 100644 index 0000000..33b50cd --- /dev/null +++ b/src/main/java/org/jwat/tools/tasks/unchunk/UnchunkOptions.java @@ -0,0 +1,9 @@ +package org.jwat.tools.tasks.unchunk; + +import java.util.List; + +public class UnchunkOptions { + + public List filesList; + +} diff --git a/src/main/java/org/jwat/tools/tasks/unchunk/UnchunkTask.java b/src/main/java/org/jwat/tools/tasks/unchunk/UnchunkTask.java new file mode 100644 index 0000000..ad3853d --- /dev/null +++ b/src/main/java/org/jwat/tools/tasks/unchunk/UnchunkTask.java @@ -0,0 +1,56 @@ +package org.jwat.tools.tasks.unchunk; + +import java.io.File; +import java.io.IOException; +import java.io.RandomAccessFile; + +import org.jwat.common.DigestInputStreamChunkedNoSkip; +import org.jwat.common.RandomAccessFileInputStream; +import org.jwat.common.RandomAccessFileOutputStream; +import org.jwat.tools.tasks.AbstractTask; + +public class UnchunkTask extends AbstractTask { + + private UnchunkOptions options; + + public UnchunkTask() { + } + + public void runtask(UnchunkOptions options) { + this.options = options; + filelist_feeder( options.filesList, this ); + } + + @Override + public void process(File srcFile) { + RandomAccessFile rafin = null; + RandomAccessFileInputStream rafis = null; + DigestInputStreamChunkedNoSkip dis = null; + RandomAccessFile rafout = null; + RandomAccessFileOutputStream rafos = null; + try { + rafin = new RandomAccessFile(srcFile, "r"); + rafis = new RandomAccessFileInputStream(rafin); + rafout = new RandomAccessFile(srcFile + ".unchunked", "rw"); + rafos = new RandomAccessFileOutputStream(rafout); + dis = new DigestInputStreamChunkedNoSkip(rafis, null, null, rafos); + long remaining = rafin.length(); + long skipped; + while (remaining > 0) { + skipped = dis.skip(remaining); + if (skipped > 0) { + remaining -= skipped; + } + } + dis.close(); + rafos.close(); + rafis.close(); + rafout.close(); + rafin.close(); + } + catch (IOException e) { + e.printStackTrace(); + } + } + +} diff --git a/src/main/java/org/jwat/tools/tasks/unchunk/UnchunkTaskCLI.java b/src/main/java/org/jwat/tools/tasks/unchunk/UnchunkTaskCLI.java new file mode 100644 index 0000000..5f8e2c3 --- /dev/null +++ b/src/main/java/org/jwat/tools/tasks/unchunk/UnchunkTaskCLI.java @@ -0,0 +1,66 @@ +package org.jwat.tools.tasks.unchunk; + +import java.util.LinkedList; + +import org.jwat.tools.JWATTools; +import org.jwat.tools.tasks.TaskCLI; + +import com.antiaction.common.cli.Argument; +import com.antiaction.common.cli.ArgumentParser; +import com.antiaction.common.cli.ArgumentParserException; +import com.antiaction.common.cli.CommandLine; +import com.antiaction.common.cli.Options; + +public class UnchunkTaskCLI extends TaskCLI { + + public static final String commandName = "unchunk"; + + public static final String commandDescription = "unchunk file(s) with chunked transfter encoding"; + + @Override + public void show_help() { + System.out.println("jwattools [-o] unchunk "); + System.out.println(""); + System.out.println("unchunk file(s)"); + System.out.println(""); + System.out.println("\tUnchunk file(s)."); + /* + System.out.println(""); + System.out.println("options:"); + System.out.println(""); + */ + } + + @Override + public void runtask(CommandLine cmdLine) { + UnchunkTask task = new UnchunkTask(); + UnchunkOptions options = parseArguments(cmdLine); + task.runtask(options); + } + + public static UnchunkOptions parseArguments(CommandLine cmdLine) { + Options cliOptions = new Options(); + try { + cliOptions.addNamedArgument( "files", JWATTools.A_FILES, 1, 1 ); + cmdLine = ArgumentParser.parse(cmdLine.argsArray, cliOptions, cmdLine); + } + catch (ArgumentParserException e) { + System.out.println( UnchunkTaskCLI.class.getName() + ": " + e.getMessage() ); + System.exit( 1 ); + } + + UnchunkOptions options = new UnchunkOptions(); + + Argument argument; + String tmpStr; + + // Files + argument = cmdLine.idMap.get( JWATTools.A_FILES ); + options.filesList = new LinkedList(); + //options.filesList = argument.values; + options.filesList.add( argument.value ); + + return options; + } + +} diff --git a/src/main/java/org/jwat/tools/tasks/UnpackOptions.java b/src/main/java/org/jwat/tools/tasks/unpack/UnpackOptions.java similarity index 70% rename from src/main/java/org/jwat/tools/tasks/UnpackOptions.java rename to src/main/java/org/jwat/tools/tasks/unpack/UnpackOptions.java index ec9f197..bcb0beb 100644 --- a/src/main/java/org/jwat/tools/tasks/UnpackOptions.java +++ b/src/main/java/org/jwat/tools/tasks/unpack/UnpackOptions.java @@ -1,4 +1,4 @@ -package org.jwat.tools.tasks; +package org.jwat.tools.tasks.unpack; import java.util.List; diff --git a/src/main/java/org/jwat/tools/tasks/UnpackTask.java b/src/main/java/org/jwat/tools/tasks/unpack/UnpackTask.java similarity index 93% rename from src/main/java/org/jwat/tools/tasks/UnpackTask.java rename to src/main/java/org/jwat/tools/tasks/unpack/UnpackTask.java index 90349c1..365ede2 100644 --- a/src/main/java/org/jwat/tools/tasks/UnpackTask.java +++ b/src/main/java/org/jwat/tools/tasks/unpack/UnpackTask.java @@ -1,4 +1,4 @@ -package org.jwat.tools.tasks; +package org.jwat.tools.tasks.unpack; import java.io.BufferedInputStream; import java.io.File; @@ -11,6 +11,7 @@ import org.jwat.common.RandomAccessFileInputStream; import org.jwat.gzip.GzipEntry; import org.jwat.gzip.GzipReader; +import org.jwat.tools.tasks.AbstractTask; // TODO public class UnpackTask extends AbstractTask { diff --git a/src/main/java/org/jwat/tools/tasks/UnpackTaskCLIParser.java b/src/main/java/org/jwat/tools/tasks/unpack/UnpackTaskCLI.java similarity index 57% rename from src/main/java/org/jwat/tools/tasks/UnpackTaskCLIParser.java rename to src/main/java/org/jwat/tools/tasks/unpack/UnpackTaskCLI.java index 1b7b291..83ad04c 100644 --- a/src/main/java/org/jwat/tools/tasks/UnpackTaskCLIParser.java +++ b/src/main/java/org/jwat/tools/tasks/unpack/UnpackTaskCLI.java @@ -1,16 +1,30 @@ -package org.jwat.tools.tasks; +package org.jwat.tools.tasks.unpack; import org.jwat.tools.JWATTools; +import org.jwat.tools.tasks.TaskCLI; import com.antiaction.common.cli.Argument; -import com.antiaction.common.cli.ArgumentParserException; import com.antiaction.common.cli.ArgumentParser; +import com.antiaction.common.cli.ArgumentParserException; import com.antiaction.common.cli.CommandLine; import com.antiaction.common.cli.Options; -public class UnpackTaskCLIParser { +public class UnpackTaskCLI extends TaskCLI { + + public static final String commandName = "unpack"; + + public static final String commandDescription = "unpack multifile GZip"; + + @Override + public void show_help() { + System.out.println("Work in progress..."); + } - protected UnpackTaskCLIParser() { + @Override + public void runtask(CommandLine cmdLine) { + UnpackTask task = new UnpackTask(); + UnpackOptions options = parseArguments(cmdLine); + task.runtask(options); } public static UnpackOptions parseArguments(CommandLine cmdLine) { @@ -20,7 +34,7 @@ public static UnpackOptions parseArguments(CommandLine cmdLine) { cmdLine = ArgumentParser.parse(cmdLine.argsArray, cliOptions, cmdLine); } catch (ArgumentParserException e) { - System.out.println( UnpackTaskCLIParser.class.getName() + ": " + e.getMessage() ); + System.out.println( UnpackTaskCLI.class.getName() + ": " + e.getMessage() ); System.exit( 1 ); } diff --git a/src/test/java/org/jwat/tools/tasks/arc2warc/TestArc2WarcTaskCLIParser.java b/src/test/java/org/jwat/tools/tasks/arc2warc/TestArc2WarcTaskCLI.java similarity index 89% rename from src/test/java/org/jwat/tools/tasks/arc2warc/TestArc2WarcTaskCLIParser.java rename to src/test/java/org/jwat/tools/tasks/arc2warc/TestArc2WarcTaskCLI.java index f663fa9..eb935bf 100644 --- a/src/test/java/org/jwat/tools/tasks/arc2warc/TestArc2WarcTaskCLIParser.java +++ b/src/test/java/org/jwat/tools/tasks/arc2warc/TestArc2WarcTaskCLI.java @@ -15,7 +15,7 @@ import com.antiaction.common.cli.CommandLine; @RunWith(JUnit4.class) -public class TestArc2WarcTaskCLIParser { +public class TestArc2WarcTaskCLI { private SecurityManager securityManager; @@ -35,7 +35,7 @@ public void test_arc2warctask_cli_parser() { CommandLine cmdLine; Arc2WarcOptions options; - Arc2WarcTaskCLIParser object = new Arc2WarcTaskCLIParser(); + Arc2WarcTaskCLI object = new Arc2WarcTaskCLI(); Assert.assertNotNull(object); Object[][] cases = new Object[][] { @@ -85,7 +85,7 @@ public void test_arc2warctask_cli_parser() { System.out.println(i); cmdLine = new CommandLine(); cmdLine.argsArray = (String[])cases[ i ][ 0 ]; - options = Arc2WarcTaskCLIParser.parseArguments(cmdLine); + options = Arc2WarcTaskCLI.parseArguments(cmdLine); Assert.assertEquals(cases[ i ][ 1 ], options.threads); Assert.assertEquals(cases[ i ][ 2 ], options.destDir); Assert.assertEquals(cases[ i ][ 3 ], options.prefix); @@ -101,7 +101,7 @@ public void test_arc2warctask_cli_parser() { try { cmdLine = new CommandLine(); cmdLine.argsArray = new String[] {}; - options = Arc2WarcTaskCLIParser.parseArguments(cmdLine); + options = Arc2WarcTaskCLI.parseArguments(cmdLine); Assert.fail("Exception expected!"); } catch (ExitException e) { @@ -110,7 +110,7 @@ public void test_arc2warctask_cli_parser() { try { cmdLine = new CommandLine(); cmdLine.argsArray = new String[] {"-w", "42"}; - options = Arc2WarcTaskCLIParser.parseArguments(cmdLine); + options = Arc2WarcTaskCLI.parseArguments(cmdLine); Assert.fail("Exception expected!"); } catch (ExitException e) { @@ -119,7 +119,7 @@ public void test_arc2warctask_cli_parser() { try { cmdLine = new CommandLine(); cmdLine.argsArray = new String[] {"-w", "0", "file"}; - options = Arc2WarcTaskCLIParser.parseArguments(cmdLine); + options = Arc2WarcTaskCLI.parseArguments(cmdLine); Assert.fail("Exception expected!"); } catch (ExitException e) { @@ -128,7 +128,7 @@ public void test_arc2warctask_cli_parser() { try { cmdLine = new CommandLine(); cmdLine.argsArray = new String[] {"-w", "fourtytwo", "file"}; - options = Arc2WarcTaskCLIParser.parseArguments(cmdLine); + options = Arc2WarcTaskCLI.parseArguments(cmdLine); Assert.fail("Exception expected!"); } catch (ExitException e) { diff --git a/src/test/java/org/jwat/tools/tasks/cdx/TestCDXTaskCLIParser.java b/src/test/java/org/jwat/tools/tasks/cdx/TestCDXTaskCLIParser.java index 7960f31..b3395d1 100644 --- a/src/test/java/org/jwat/tools/tasks/cdx/TestCDXTaskCLIParser.java +++ b/src/test/java/org/jwat/tools/tasks/cdx/TestCDXTaskCLIParser.java @@ -35,7 +35,7 @@ public void test_cdxtask_cli_parser() { CommandLine cmdLine; CDXOptions options; - CDXTaskCLIParser object = new CDXTaskCLIParser(); + CDXTaskCLI object = new CDXTaskCLI(); Assert.assertNotNull(object); Object[][] cases = new Object[][] { @@ -69,7 +69,7 @@ public void test_cdxtask_cli_parser() { for (int i=0; i fileList = options.filesList; @@ -72,7 +72,7 @@ public void test_changedtask_cli_parser() { try { cmdLine = new CommandLine(); cmdLine.argsArray = new String[] {}; - options = ChangedTaskCLIParser.parseArguments(cmdLine); + options = ChangedTaskCLI.parseArguments(cmdLine); Assert.fail("Exception expected!"); } catch (ExitException e) { @@ -81,7 +81,7 @@ public void test_changedtask_cli_parser() { try { cmdLine = new CommandLine(); cmdLine.argsArray = new String[] {"-o", "outfile"}; - options = ChangedTaskCLIParser.parseArguments(cmdLine); + options = ChangedTaskCLI.parseArguments(cmdLine); Assert.fail("Exception expected!"); } catch (ExitException e) { diff --git a/src/test/java/org/jwat/tools/tasks/compress/TestCompressTaskCLIParser.java b/src/test/java/org/jwat/tools/tasks/compress/TestCompressTaskCLI.java similarity index 92% rename from src/test/java/org/jwat/tools/tasks/compress/TestCompressTaskCLIParser.java rename to src/test/java/org/jwat/tools/tasks/compress/TestCompressTaskCLI.java index fb34cd0..3cd3f53 100644 --- a/src/test/java/org/jwat/tools/tasks/compress/TestCompressTaskCLIParser.java +++ b/src/test/java/org/jwat/tools/tasks/compress/TestCompressTaskCLI.java @@ -15,7 +15,7 @@ import com.antiaction.common.cli.CommandLine; @RunWith(JUnit4.class) -public class TestCompressTaskCLIParser { +public class TestCompressTaskCLI { private SecurityManager securityManager; @@ -35,7 +35,7 @@ public void test_compresstask_cli_parser() { CommandLine cmdLine; CompressOptions options; - CompressTaskCLIParser object = new CompressTaskCLIParser(); + CompressTaskCLI object = new CompressTaskCLI(); Assert.assertNotNull(object); Object[][] cases = new Object[][] { @@ -134,7 +134,7 @@ null, new File("listfile1"), new String[] {"file12"} for (int i=0; i fileList = options.filesList; @@ -76,7 +76,7 @@ public void test_decompresstask_cli_parser() { try { cmdLine = new CommandLine(); cmdLine.argsArray = new String[] {}; - options = DecompressTaskCLIParser.parseArguments(cmdLine); + options = DecompressTaskCLI.parseArguments(cmdLine); Assert.fail("Exception expected!"); } catch (ExitException e) { @@ -85,7 +85,7 @@ public void test_decompresstask_cli_parser() { try { cmdLine = new CommandLine(); cmdLine.argsArray = new String[] {"-w", "8"}; - options = DecompressTaskCLIParser.parseArguments(cmdLine); + options = DecompressTaskCLI.parseArguments(cmdLine); Assert.fail("Exception expected!"); } catch (ExitException e) { @@ -94,7 +94,7 @@ public void test_decompresstask_cli_parser() { try { cmdLine = new CommandLine(); cmdLine.argsArray = new String[] {"-w", "0", "file"}; - options = DecompressTaskCLIParser.parseArguments(cmdLine); + options = DecompressTaskCLI.parseArguments(cmdLine); Assert.fail("Exception expected!"); } catch (ExitException e) { @@ -103,7 +103,7 @@ public void test_decompresstask_cli_parser() { try { cmdLine = new CommandLine(); cmdLine.argsArray = new String[] {"-w", "fourtytwo", "file"}; - options = DecompressTaskCLIParser.parseArguments(cmdLine); + options = DecompressTaskCLI.parseArguments(cmdLine); Assert.fail("Exception expected!"); } catch (ExitException e) { diff --git a/src/test/java/org/jwat/tools/tasks/delete/TestDeleteTaskCLIParser.java b/src/test/java/org/jwat/tools/tasks/delete/TestDeleteTaskCLI.java similarity index 89% rename from src/test/java/org/jwat/tools/tasks/delete/TestDeleteTaskCLIParser.java rename to src/test/java/org/jwat/tools/tasks/delete/TestDeleteTaskCLI.java index b733407..a7572d7 100644 --- a/src/test/java/org/jwat/tools/tasks/delete/TestDeleteTaskCLIParser.java +++ b/src/test/java/org/jwat/tools/tasks/delete/TestDeleteTaskCLI.java @@ -15,7 +15,7 @@ import com.antiaction.common.cli.CommandLine; @RunWith(JUnit4.class) -public class TestDeleteTaskCLIParser { +public class TestDeleteTaskCLI { private SecurityManager securityManager; @@ -35,7 +35,7 @@ public void test_deletetask_cli_parser() { CommandLine cmdLine; DeleteOptions options; - DeleteTaskCLIParser object = new DeleteTaskCLIParser(); + DeleteTaskCLI object = new DeleteTaskCLI(); Assert.assertNotNull(object); Object[][] cases = new Object[][] { @@ -64,7 +64,7 @@ true, new File("output-file2"), for (int i=0; i Date: Wed, 7 Jun 2023 01:41:18 +0200 Subject: [PATCH 2/6] Last CLIParser cleanup. Improved throwable catch in borderline cases. --- src/main/java/org/jwat/tools/HelpTaskCLI.java | 27 +++++++++++++- .../org/jwat/tools/HelpTaskCLIParser.java | 35 ------------------- .../tasks/ThreadPoolExecutorPausable.java | 9 +++++ .../org/jwat/tools/tasks/test/TestTask.java | 15 +++++--- 4 files changed, 45 insertions(+), 41 deletions(-) delete mode 100644 src/main/java/org/jwat/tools/HelpTaskCLIParser.java diff --git a/src/main/java/org/jwat/tools/HelpTaskCLI.java b/src/main/java/org/jwat/tools/HelpTaskCLI.java index 94a3677..bf1b4da 100644 --- a/src/main/java/org/jwat/tools/HelpTaskCLI.java +++ b/src/main/java/org/jwat/tools/HelpTaskCLI.java @@ -2,7 +2,11 @@ import org.jwat.tools.tasks.TaskCLI; +import com.antiaction.common.cli.Argument; +import com.antiaction.common.cli.ArgumentParser; +import com.antiaction.common.cli.ArgumentParserException; import com.antiaction.common.cli.CommandLine; +import com.antiaction.common.cli.Options; public class HelpTaskCLI extends TaskCLI { @@ -23,9 +27,30 @@ public void show_help() { System.out.println("\tIf a command is supplied its help information is shown instead."); } + public static final int A_HELPFOR_COMMAND = 101; + + public static HelpOptions parseArguments(CommandLine cmdLine) { + Options cliOptions = new Options(); + try { + cliOptions.addNamedArgument( "helpfor_command", A_HELPFOR_COMMAND, 1, 1); + cmdLine = ArgumentParser.parse(cmdLine.argsArray, cliOptions, cmdLine); + } + catch (ArgumentParserException e) { + System.out.println( HelpTaskCLI.class.getName() + ": " + e.getMessage() ); + System.exit( 1 ); + } + + HelpOptions options = new HelpOptions(); + Argument argument = cmdLine.idMap.get(A_HELPFOR_COMMAND); + if (argument != null) { + options.command = argument.value; + } + return options; + } + @Override public void runtask(CommandLine cmdLine) { - HelpOptions options = HelpTaskCLIParser.parseArguments(cmdLine); + HelpOptions options = parseArguments(cmdLine); String command = options.command; if (command == null) { JWATTools.show_help(); diff --git a/src/main/java/org/jwat/tools/HelpTaskCLIParser.java b/src/main/java/org/jwat/tools/HelpTaskCLIParser.java deleted file mode 100644 index 6cf6597..0000000 --- a/src/main/java/org/jwat/tools/HelpTaskCLIParser.java +++ /dev/null @@ -1,35 +0,0 @@ -package org.jwat.tools; - -import com.antiaction.common.cli.Argument; -import com.antiaction.common.cli.ArgumentParserException; -import com.antiaction.common.cli.ArgumentParser; -import com.antiaction.common.cli.CommandLine; -import com.antiaction.common.cli.Options; - -public class HelpTaskCLIParser { - - public static final int A_HELPFOR_COMMAND = 101; - - protected HelpTaskCLIParser() { - } - - public static HelpOptions parseArguments(CommandLine cmdLine) { - Options cliOptions = new Options(); - try { - cliOptions.addNamedArgument( "helpfor_command", A_HELPFOR_COMMAND, 1, 1); - cmdLine = ArgumentParser.parse(cmdLine.argsArray, cliOptions, cmdLine); - } - catch (ArgumentParserException e) { - System.out.println( HelpTaskCLIParser.class.getName() + ": " + e.getMessage() ); - System.exit( 1 ); - } - - HelpOptions options = new HelpOptions(); - Argument argument = cmdLine.idMap.get(A_HELPFOR_COMMAND); - if (argument != null) { - options.command = argument.value; - } - return options; - } - -} diff --git a/src/main/java/org/jwat/tools/tasks/ThreadPoolExecutorPausable.java b/src/main/java/org/jwat/tools/tasks/ThreadPoolExecutorPausable.java index 9bcb8f7..8438d51 100644 --- a/src/main/java/org/jwat/tools/tasks/ThreadPoolExecutorPausable.java +++ b/src/main/java/org/jwat/tools/tasks/ThreadPoolExecutorPausable.java @@ -32,6 +32,7 @@ public ThreadPoolExecutorPausable(int corePoolSize, int maximumPoolSize, long ke super(corePoolSize, maximumPoolSize, keepAliveTime, unit, workQueue, threadFactory); } + @Override protected void beforeExecute(Thread t, Runnable r) { super.beforeExecute(t, r); pauseLock.lock(); @@ -46,6 +47,14 @@ protected void beforeExecute(Thread t, Runnable r) { } } + @Override + protected void afterExecute(Runnable r, Throwable t) { + super.afterExecute(r, t); + if (t != null) { + t.printStackTrace(); + } + } + public void pause() { pauseLock.lock(); try { diff --git a/src/main/java/org/jwat/tools/tasks/test/TestTask.java b/src/main/java/org/jwat/tools/tasks/test/TestTask.java index 3c6dc4b..fbfe450 100644 --- a/src/main/java/org/jwat/tools/tasks/test/TestTask.java +++ b/src/main/java/org/jwat/tools/tasks/test/TestTask.java @@ -260,11 +260,16 @@ class TaskRunnable implements Runnable { } @Override public void run() { - TestFile2 testFile = new TestFile2(); - testFile.callback = null; - TestFileResult result = testFile.processFile(srcFile, options, cloner); - results.add(result); - resultsReady.release(); + try { + TestFile2 testFile = new TestFile2(); + testFile.callback = null; + TestFileResult result = testFile.processFile(srcFile, options, cloner); + results.add(result); + resultsReady.release(); + } + catch (Throwable t) { + t.printStackTrace(); + } } } From 739a3ed983c0c1d574353f6d3368bc126cdcf4e6 Mon Sep 17 00:00:00 2001 From: Nicholas Clarke Date: Fri, 28 Jul 2023 08:59:43 +0200 Subject: [PATCH 3/6] Added profiles to set source/target for JDK8 and set release for JDK9+. --- pom.xml | 855 +++++++++++++++++++++++++++++--------------------------- 1 file changed, 436 insertions(+), 419 deletions(-) diff --git a/pom.xml b/pom.xml index 88243c3..c3f9e53 100644 --- a/pom.xml +++ b/pom.xml @@ -1,419 +1,436 @@ - - - 4.0.0 - - org.jwat - jwat-tools - 0.7.2-SNAPSHOT - - jar - - jwat-tools - - JWAT-Tools uses the available JWAT libraries to make high level tasks available either from command-line or programmatically. - Common tasks include: Test, Compress, Decompress, CDX, Arc2Warc. - More specialised tasks include: Changed, ContainerMD, Delete, Extract, Interval, PathIndex, Unpack, Headers2CDX. - - http://jwat.org/ - - 2011 - - - - Apache License, Version 2.0 - http://www.apache.org/licenses/LICENSE-2.0.txt - repo - A business-friendly OSS license - - - - - scm:git:https://github.com/netarchivesuite/jwat-tools.git - scm:git:git@github.com:netarchivesuite/jwat-tools.git - http://bitbucket.org/nclarkekb/jwat-tools - HEAD - - - - jira - https://sbforge.org/jira/browse/JWAT - - - - jenkins - https://sbforge.org/jenkins/view/JWAT/ - - - - - Nicholas Clarke - - developer - - nclarke@antiaction.com - Antiaction - https://www.antiaction.com - - - - Nicholas Clarke - - developer - - nicl@kb.dk - Det Kongelige Bibliotek/Royal Danish Library - http://www.kb.dk - - - - - UTF-8 - UTF-8 - - 2.2.2 - 3.8.1 - 3.0.1 - 2.3 - 3.3.0 - 3.2.1 - - 1.8 - - 1.3.0-SNAPSHOT - 3.2.7 - 0.2.0 - 0.7.0 - 0.2.0-JWAT - 2.0.0 - 4.13.2 - - - - - - - sbforge-nexus - SBForge Nexus Repo manager - https://sbforge.org/nexus/content/repositories/releases - - - - sbforge-nexus - SBForge Nexus Repo manager - https://sbforge.org/nexus/content/repositories/snapshots/ - - - - - - org.jwat - jwat-common - ${jwat.core.version} - - - org.jwat - jwat-gzip - ${jwat.core.version} - - - org.jwat - jwat-arc - ${jwat.core.version} - - - org.jwat - jwat-warc - ${jwat.core.version} - - - org.jwat - jwat-archive - ${jwat.core.version} - - - - com.antiaction - common-cli - ${common-cli.version} - - - com.antiaction - common-json - ${common-json.version} - - - com.antiaction - common-datastructures - ${common-datastructures.version} - - - - net.java.dev.jna - jna - ${jna.version} - - - - org.netpreserve.openwayback - openwayback-core - ${openwayback-core.version} - - - org.netpreserve.openwayback - openwayback-cdx-server - - - org.netpreserve.openwayback - openwayback-webapp - - - - - - - - - - - - - - junit - junit - ${junit.version} - test - - - - - - - maven-compiler-plugin - ${maven-compiler-plugin.version} - - 1.8 - 1.8 - - - - - org.codehaus.mojo - license-maven-plugin - ${license.maven.plugin} - - true - apache_v2 - true - true - - - - - - org.apache.maven.plugins - maven-jar-plugin - ${maven.jar.plugin} - - - false - - - org.jwat.tools.JWATTools - - - ${project.name} - ${project.version} - - ${project.groupId}.${project.artifactId} - - - - - - false - - - true - ${project.build.finalName} - - - - - - maven-assembly-plugin - ${maven.assembly.plugin} - - - distro-assembly - package - - single - - - - src/main/assembly/release.xml - - false - gnu - - - - - - - - - - release - - - - - org.apache.maven.plugins - maven-source-plugin - ${maven-source-plugin.version} - - - attach-sources - verify - - jar-no-fork - - - - - - - - org.apache.maven.plugins - maven-javadoc-plugin - ${maven-javadoc-plugin.version} - - 8 - false - true - true - true - - -Xdoclint:none - - - - attach-javadocs - verify - - jar - - - - - - - org.apache.maven.plugins - maven-gpg-plugin - ${maven-gpg-plugin.version} - - - sign-artifacts - verify - - sign - - - - - - - - - + + + 4.0.0 + + org.jwat + jwat-tools + 0.7.2-SNAPSHOT + + jar + + jwat-tools + + JWAT-Tools uses the available JWAT libraries to make high level tasks available either from command-line or programmatically. + Common tasks include: Test, Compress, Decompress, CDX, Arc2Warc. + More specialised tasks include: Changed, ContainerMD, Delete, Extract, Interval, PathIndex, Unpack, Headers2CDX. + + http://jwat.org/ + + 2011 + + + + Apache License, Version 2.0 + http://www.apache.org/licenses/LICENSE-2.0.txt + repo + A business-friendly OSS license + + + + + scm:git:https://github.com/netarchivesuite/jwat-tools.git + scm:git:https://github.com/netarchivesuite/jwat-tools.git + https://github.com/netarchivesuite/jwat-tools + + + + jira + https://sbforge.org/jira/browse/JWAT + + + + jenkins + https://sbforge.org/jenkins/view/JWAT/ + + + + + Nicholas Clarke + + developer + + nclarke@antiaction.com + Antiaction + https://www.antiaction.com + + + + Nicholas Clarke + + developer + + nicl@kb.dk + Det Kongelige Bibliotek/Royal Danish Library + http://www.kb.dk + + + + + UTF-8 + UTF-8 + + en + checkstyleCache + false + true + + 2.2.2 + 3.11.0 + 3.0.1 + 2.3 + 3.3.0 + 3.2.1 + + 2.0.0 + + 1.3.0-SNAPSHOT + 3.2.7 + 0.2.0 + 0.7.0 + 0.2.0-JWAT + 2.0.0 + 4.13.2 + + + + + ossrh + url>https://s01.oss.sonatype.org/content/repositories/snapshots + https://oss.sonatype.org/content/repositories/snapshots/ + + + ossrh + url>https://s01.oss.sonatype.org/service/local/staging/deploy/maven2/ + https://oss.sonatype.org/service/local/staging/deploy/maven2/ + + + + + + org.jwat + jwat-common + ${jwat.core.version} + + + org.jwat + jwat-gzip + ${jwat.core.version} + + + org.jwat + jwat-arc + ${jwat.core.version} + + + org.jwat + jwat-warc + ${jwat.core.version} + + + org.jwat + jwat-archive + ${jwat.core.version} + + + + com.antiaction + common-cli + ${common-cli.version} + + + com.antiaction + common-json + ${common-json.version} + + + com.antiaction + common-datastructures + ${common-datastructures.version} + + + + net.java.dev.jna + jna + ${jna.version} + + + + org.netpreserve.openwayback + openwayback-core + ${openwayback-core.version} + + + org.netpreserve.openwayback + openwayback-cdx-server + + + org.netpreserve.openwayback + openwayback-webapp + + + + + + + + + + + + + + junit + junit + ${junit.version} + test + + + + + + + org.apache.maven.plugins + maven-compiler-plugin + ${maven-compiler-plugin.version} + + + + + org.codehaus.mojo + license-maven-plugin + ${license.maven.plugin} + + true + apache_v2 + true + true + + + + + + org.apache.maven.plugins + maven-jar-plugin + ${maven.jar.plugin} + + + false + + + org.jwat.tools.JWATTools + + + ${project.name} + ${project.version} + + ${project.groupId}.${project.artifactId} + + + + + + false + + + true + ${project.build.finalName} + + + + + + org.apache.maven.plugins + maven-assembly-plugin + ${maven.assembly.plugin} + + + distro-assembly + package + + single + + + + src/main/assembly/release.xml + + false + gnu + + + + + + + + + + javac8-release + + 8 + + + 1.8 + 1.8 + + + + javac9-release + + [9,) + + + 8 + + + + release + + + + + org.apache.maven.plugins + maven-source-plugin + ${maven-source-plugin.version} + + + attach-sources + verify + + jar-no-fork + + + + + + + + org.apache.maven.plugins + maven-javadoc-plugin + ${maven-javadoc-plugin.version} + + 8 + false + true + true + true + + -Xdoclint:none + + + + attach-javadocs + verify + + jar + + + + + + + org.apache.maven.plugins + maven-gpg-plugin + ${maven-gpg-plugin.version} + + + sign-artifacts + verify + + sign + + + + + + + + + From 731ee814187e5092622ae7d68aaf5b358bec15eb Mon Sep 17 00:00:00 2001 From: Nicholas Clarke Date: Fri, 8 Dec 2023 13:09:13 +0100 Subject: [PATCH 4/6] Help text cleanup for most commands. --- src/main/java/org/jwat/tools/HelpTaskCLI.java | 11 +- src/main/java/org/jwat/tools/JWATTools.java | 14 +- .../tools/tasks/arc2warc/Arc2WarcTaskCLI.java | 4 +- .../org/jwat/tools/tasks/cdx/CDXTaskCLI.java | 8 +- .../tools/tasks/changed/ChangedTaskCLI.java | 10 +- .../tools/tasks/compress/CompressTaskCLI.java | 11 +- .../tasks/containermd/ContainerMDTaskCLI.java | 4 +- .../tasks/decompress/DecompressTaskCLI.java | 11 +- .../tools/tasks/delete/DeleteTaskCLI.java | 8 +- .../tools/tasks/digest/DigestOptions.java | 44 +++++ .../jwat/tools/tasks/digest/DigestTask.java | 51 +++--- .../tools/tasks/digest/DigestTaskCLI.java | 154 +++++++++++++++++- .../tools/tasks/extract/ExtractTaskCLI.java | 4 +- .../tasks/headers2cdx/Headers2CDXTaskCLI.java | 8 +- .../tools/tasks/interval/IntervalTaskCLI.java | 13 +- .../tasks/pathindex/PathIndexTaskCLI.java | 8 +- .../jwat/tools/tasks/test/TestTaskCLI.java | 12 +- .../jwat/tools/tasks/unchunk/UnchunkTask.java | 19 +++ .../tools/tasks/unchunk/UnchunkTaskCLI.java | 17 +- .../tools/tasks/unpack/UnpackTaskCLI.java | 1 + 20 files changed, 318 insertions(+), 94 deletions(-) diff --git a/src/main/java/org/jwat/tools/HelpTaskCLI.java b/src/main/java/org/jwat/tools/HelpTaskCLI.java index bf1b4da..729dbae 100644 --- a/src/main/java/org/jwat/tools/HelpTaskCLI.java +++ b/src/main/java/org/jwat/tools/HelpTaskCLI.java @@ -19,12 +19,17 @@ public HelpTaskCLI() { @Override public void show_help() { + System.out.println("FileTools v" + JWATTools.getVersionString()); System.out.println("jwattools help []"); System.out.println(""); - System.out.println("display help information"); + System.out.println("Display help information."); + System.out.println("If no command is supplied overall help information is shown."); + System.out.println("If a command is supplied its help information is shown instead."); + System.out.println(""); + System.out.println("options:"); + System.out.println(""); + System.out.println("none"); System.out.println(""); - System.out.println("\tIf no command is supplied overall help information is shown."); - System.out.println("\tIf a command is supplied its help information is shown instead."); } public static final int A_HELPFOR_COMMAND = 101; diff --git a/src/main/java/org/jwat/tools/JWATTools.java b/src/main/java/org/jwat/tools/JWATTools.java index 7b9c924..6a72b4b 100644 --- a/src/main/java/org/jwat/tools/JWATTools.java +++ b/src/main/java/org/jwat/tools/JWATTools.java @@ -1,6 +1,7 @@ package org.jwat.tools; import java.lang.reflect.Field; +import java.security.Provider; import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; @@ -8,6 +9,7 @@ import java.util.List; import java.util.Map; +import org.jwat.common.SecurityProviderTools; import org.jwat.tools.tasks.TaskCLI; import org.jwat.tools.tasks.arc2warc.Arc2WarcTaskCLI; import org.jwat.tools.tasks.cdx.CDXTaskCLI; @@ -142,11 +144,11 @@ public int compare(Class t1, Class t2) { } System.out.println(""); System.out.println("See 'jwattools help ' for more information on a specific command."); + System.out.println(""); } - public static String getVersionString(String packageName) { - Package pkg = Package.getPackage(packageName); - System.out.println(pkg); + public static String getVersionString() { + Package pkg = Package.getPackage("org.jwat.tools"); String version = null; if (pkg != null) { version = pkg.getSpecificationVersion(); @@ -158,7 +160,7 @@ public static String getVersionString(String packageName) { } public static void show_help() { - System.out.println("JWATTools v" + getVersionString("org.jwat.tools")); + System.out.println("JWATTools v" + getVersionString()); //System.out.println(getVersionString("org.jwat.common")); //System.out.println(getVersionString("org.jwat.gzip")); //System.out.println(getVersionString("org.jwat.arc")); @@ -169,6 +171,10 @@ public static void show_help() { } public void Main(String[] args) { + Provider[] providers = SecurityProviderTools.getSecurityProviders(); + if (!SecurityProviderTools.isProviderAvailable(providers, "BC")) { + SecurityProviderTools.loadBCProvider(); + } CommandLine cmdLine = null; configure_cli(); try { diff --git a/src/main/java/org/jwat/tools/tasks/arc2warc/Arc2WarcTaskCLI.java b/src/main/java/org/jwat/tools/tasks/arc2warc/Arc2WarcTaskCLI.java index acb981f..8c9f6e9 100644 --- a/src/main/java/org/jwat/tools/tasks/arc2warc/Arc2WarcTaskCLI.java +++ b/src/main/java/org/jwat/tools/tasks/arc2warc/Arc2WarcTaskCLI.java @@ -15,10 +15,11 @@ public class Arc2WarcTaskCLI extends TaskCLI { public static final String commandName = "arc2warc"; - public static final String commandDescription = "convert ARC to WARC"; + public static final String commandDescription = "convert ARC file(s) to WARC file(s)"; @Override public void show_help() { + System.out.println("FileTools v" + JWATTools.getVersionString()); System.out.println("jwattools arc2warc [-d DIR] [--overwrite]... [-w THREADS] ..."); System.out.println(""); System.out.println("arc2warc will convert one or more ARC file(s) to WARC file(s)."); @@ -30,6 +31,7 @@ public void show_help() { System.out.println(" --prefix destination filename prefix (default is '" + Arc2WarcOptions.DEFAULT_PREFIX + "')"); System.out.println(" --queue-first queue files before processing"); System.out.println(" -w set the amount of worker thread(s) (defaults to 1)"); + System.out.println(""); } @Override diff --git a/src/main/java/org/jwat/tools/tasks/cdx/CDXTaskCLI.java b/src/main/java/org/jwat/tools/tasks/cdx/CDXTaskCLI.java index 22312d3..0e6d085 100644 --- a/src/main/java/org/jwat/tools/tasks/cdx/CDXTaskCLI.java +++ b/src/main/java/org/jwat/tools/tasks/cdx/CDXTaskCLI.java @@ -19,18 +19,18 @@ public class CDXTaskCLI extends TaskCLI { @Override public void show_help() { + System.out.println("FileTools v" + JWATTools.getVersionString()); System.out.println("jwattools cdx [-o OUTPUT_FILE] [-w THREADS] ..."); System.out.println(""); - System.out.println("cdx one or more ARC/WARC files"); - System.out.println(""); - System.out.println("\tRead through ARC/WARC file(s) and create a CDX file."); - System.out.println("\tCDX files are primarily used with Wayback."); + System.out.println("Read through ARC/WARC file(s) and create a CDX file."); + System.out.println("CDX files are primarily used with replay tools like (Open)Wayback."); System.out.println(""); System.out.println("options:"); System.out.println(""); System.out.println(" -o output cdx filename (unsorted)"); System.out.println(" --queue-first queue files before processing"); System.out.println(" -w set the amount of worker thread(s) (defaults to 1)"); + System.out.println(""); } @Override diff --git a/src/main/java/org/jwat/tools/tasks/changed/ChangedTaskCLI.java b/src/main/java/org/jwat/tools/tasks/changed/ChangedTaskCLI.java index 0c3598c..80b792f 100644 --- a/src/main/java/org/jwat/tools/tasks/changed/ChangedTaskCLI.java +++ b/src/main/java/org/jwat/tools/tasks/changed/ChangedTaskCLI.java @@ -19,16 +19,16 @@ public class ChangedTaskCLI extends TaskCLI { @Override public void show_help() { + System.out.println("FileTools v" + JWATTools.getVersionString()); System.out.println("jwattools changed ..."); System.out.println(""); - System.out.println("group files by similar last modified dates"); - System.out.println(""); - System.out.println("\tUseful command for identifying when and if files where modified"); - System.out.println("\tin close proximity of others."); + System.out.println("Useful command for identifying when and if files where modified in close proximity of others."); + System.out.println("Group files by similar last modified dates."); System.out.println(""); System.out.println("options:"); System.out.println(""); - System.out.println(" -o output intervals and files to file"); + System.out.println(" -o output intervals and files to file"); + System.out.println(""); } @Override diff --git a/src/main/java/org/jwat/tools/tasks/compress/CompressTaskCLI.java b/src/main/java/org/jwat/tools/tasks/compress/CompressTaskCLI.java index e3ccee1..bf00284 100644 --- a/src/main/java/org/jwat/tools/tasks/compress/CompressTaskCLI.java +++ b/src/main/java/org/jwat/tools/tasks/compress/CompressTaskCLI.java @@ -19,12 +19,12 @@ public class CompressTaskCLI extends TaskCLI { @Override public void show_help() { + System.out.println("FileTools v" + JWATTools.getVersionString()); System.out.println("jwattools compress [-123456789] [--fast] [--best] [-w THREADS] ..."); System.out.println(""); - System.out.println("compress one or more ARC/WARC/GZip files"); - System.out.println(""); - System.out.println("\tNormal files are compressed as a single GZip file."); - System.out.println("\tARC/WARC files are compressed on a record level."); + System.out.println("Compress one or more ARC/WARC/GZip files."); + System.out.println("ARC/WARC files are compressed on a record level."); + System.out.println("Normal files are compressed as a single GZip file."); System.out.println(""); System.out.println("options:"); System.out.println(""); @@ -41,6 +41,7 @@ public void show_help() { System.out.println(" --blacklist list of files to ignore (one filename per line)"); System.out.println(" --checksums list of sorted checksums (one filename##checksum per line))"); System.out.println(" -w set the amount of worker thread(s) (defaults to 1)"); + System.out.println(""); } @Override @@ -180,7 +181,7 @@ public static CompressOptions parseArguments(CommandLine cmdLine) { options.filesList = argument.values; if (!options.bQuiet) { - System.out.println("JWATTools v" + JWATTools.getVersionString("org.jwat.tools")); + System.out.println("JWATTools v" + JWATTools.getVersionString()); System.out.println( "Compression level: " + options.compressionLevel ); System.out.println( " Batch mode: " + options.bBatch ); System.out.println( " Dry run: " + options.bDryrun ); diff --git a/src/main/java/org/jwat/tools/tasks/containermd/ContainerMDTaskCLI.java b/src/main/java/org/jwat/tools/tasks/containermd/ContainerMDTaskCLI.java index d14ed15..7a262f9 100644 --- a/src/main/java/org/jwat/tools/tasks/containermd/ContainerMDTaskCLI.java +++ b/src/main/java/org/jwat/tools/tasks/containermd/ContainerMDTaskCLI.java @@ -20,9 +20,10 @@ public class ContainerMDTaskCLI extends TaskCLI { @Override public void show_help() { + System.out.println("FileTools v" + JWATTools.getVersionString()); System.out.println("jwattools containermd [-d outputDir] [-l] [-q] [-w THREADS] "); System.out.println(""); - System.out.println("generate containerMD for (W)ARC files"); + System.out.println("Generate containerMD for (W)ARC files."); System.out.println(""); System.out.println("options:"); System.out.println(""); @@ -31,6 +32,7 @@ public void show_help() { System.out.println(" -q quiet, no output to console"); System.out.println(" --queue-first queue files before processing"); System.out.println(" -w set the amount of worker thread(s) (defaults to 1)"); + System.out.println(""); } @Override diff --git a/src/main/java/org/jwat/tools/tasks/decompress/DecompressTaskCLI.java b/src/main/java/org/jwat/tools/tasks/decompress/DecompressTaskCLI.java index f1040f5..63e0178 100644 --- a/src/main/java/org/jwat/tools/tasks/decompress/DecompressTaskCLI.java +++ b/src/main/java/org/jwat/tools/tasks/decompress/DecompressTaskCLI.java @@ -17,17 +17,18 @@ public class DecompressTaskCLI extends TaskCLI { @Override public void show_help() { + System.out.println("FileTools v" + JWATTools.getVersionString()); System.out.println("jwattools decompress [-w THREADS] ..."); System.out.println(""); - System.out.println("decompress one or more GZip files"); - System.out.println(""); - System.out.println("\tNormal files are decompressed into one or more files."); - System.out.println("\tARC/WARC files are compressed on a record level."); + System.out.println("Decompress one or more GZip files."); + System.out.println("ARC/WARC files are compressed on a record level."); + System.out.println("Normal files are decompressed into one or more files."); System.out.println(""); System.out.println("options:"); System.out.println(""); System.out.println(" --queue-first queue files before processing"); - System.out.println(" -w set the amount of worker thread(s) (defaults to 1)"); + System.out.println(" -w set the amount of worker thread(s) (defaults to 1)"); + System.out.println(""); } @Override diff --git a/src/main/java/org/jwat/tools/tasks/delete/DeleteTaskCLI.java b/src/main/java/org/jwat/tools/tasks/delete/DeleteTaskCLI.java index 5fda39b..0d4a18f 100644 --- a/src/main/java/org/jwat/tools/tasks/delete/DeleteTaskCLI.java +++ b/src/main/java/org/jwat/tools/tasks/delete/DeleteTaskCLI.java @@ -19,17 +19,17 @@ public class DeleteTaskCLI extends TaskCLI { @Override public void show_help() { + System.out.println("FileTools v" + JWATTools.getVersionString()); System.out.println("jwattools delete [-t] [-o OUTPUT_FILE] ..."); System.out.println(""); - System.out.println("delete one or more files"); - System.out.println(""); - System.out.println("\tDelete one or more files."); - System.out.println("\tLinux has this nasty habit of making it hard to delete many files at the same time."); + System.out.println("Delete one or more files."); + System.out.println("Linux has this nasty habit of making it hard to delete many files at the same time."); System.out.println(""); System.out.println("options:"); System.out.println(""); System.out.println(" -o output filenames deleted"); System.out.println(" --dryrun dry run, does not delete files"); + System.out.println(""); } @Override diff --git a/src/main/java/org/jwat/tools/tasks/digest/DigestOptions.java b/src/main/java/org/jwat/tools/tasks/digest/DigestOptions.java index 55bf481..faec95b 100644 --- a/src/main/java/org/jwat/tools/tasks/digest/DigestOptions.java +++ b/src/main/java/org/jwat/tools/tasks/digest/DigestOptions.java @@ -1,9 +1,53 @@ package org.jwat.tools.tasks.digest; +import java.security.MessageDigest; import java.util.List; +import org.jwat.tools.JWATTools; + public class DigestOptions { public List filesList; + public boolean bBase16; + + public boolean bBase32; + + public boolean bBase64; + + public String mdAlgo; + + public MessageDigest md; + + @Override + public String toString() { + String lineSeparator = System.lineSeparator(); + int idx; + int len; + StringBuilder sb = new StringBuilder(); + sb.append("FileTools v"); + sb.append(JWATTools.getVersionString()); + sb.append(lineSeparator); + /* + if (paths != null) { + idx = 0; + len = paths.length; + sb.append(" Path: " + paths[idx++].getPath()); + sb.append(lineSeparator); + while (idx < len) { + sb.append(" " + paths[idx++].getPath()); + sb.append(lineSeparator); + } + } + */ + sb.append(" base16: " + bBase16); + sb.append(lineSeparator); + sb.append(" base32: " + bBase32); + sb.append(lineSeparator); + sb.append(" base64: " + bBase64); + sb.append(lineSeparator); + sb.append(" base64: " + mdAlgo); + return sb.toString(); + } + } diff --git a/src/main/java/org/jwat/tools/tasks/digest/DigestTask.java b/src/main/java/org/jwat/tools/tasks/digest/DigestTask.java index db695b4..024d8f8 100644 --- a/src/main/java/org/jwat/tools/tasks/digest/DigestTask.java +++ b/src/main/java/org/jwat/tools/tasks/digest/DigestTask.java @@ -3,8 +3,6 @@ import java.io.File; import java.io.FileInputStream; import java.io.IOException; -import java.security.MessageDigest; -import java.security.NoSuchAlgorithmException; import org.jwat.common.Base16; import org.jwat.common.Base32; @@ -22,7 +20,11 @@ public DigestTask() { public void runtask(DigestOptions options) { this.options = options; - filelist_feeder( options.filesList, this ); + if (options.filesList.size() > 0) { + filelist_feeder( options.filesList, this ); + } + else { + } } private byte[] isBuffer = new byte[65536]; @@ -31,40 +33,37 @@ public void runtask(DigestOptions options) { @Override public void process(File srcFile) { - MessageDigest md = null; byte[] digest; FastBufferedInputStream in = null; - String digestAlgorithm = "SHA-1"; - try { - md = MessageDigest.getInstance(digestAlgorithm); - } - catch (NoSuchAlgorithmException e) { - e.printStackTrace(); - System.exit(-1); - } - int read; - try { + int read; + try { in = new FastBufferedInputStream(new FileInputStream(srcFile), isBuffer); while ((read = in.read(readBuffer)) != -1) { - md.update(readBuffer, 0, read); + options.md.update(readBuffer, 0, read); + } + digest = options.md.digest(); + if (options.bBase16) { + System.out.println(options.mdAlgo + ":" + Base16.encodeArray(digest) + " (base16/hex)"); + } + if (options.bBase32) { + System.out.println(options.mdAlgo + ":" + Base32.encodeArray(digest) + " (base32)"); + } + if (options.bBase64) { + System.out.println(options.mdAlgo + ":" + Base64.encodeArray(digest) + " (base64)"); } - digest = md.digest(); - System.out.println(digestAlgorithm + ":" + Base16.encodeArray(digest) + " (base16/hex)"); - System.out.println(digestAlgorithm + ":" + Base32.encodeArray(digest) + " (base32)"); - System.out.println(digestAlgorithm + ":" + Base64.encodeArray(digest) + " (base64)"); } - catch (IOException e) { + catch (IOException e) { e.printStackTrace(); } - finally { - if (in != null) { - try { + finally { + if (in != null) { + try { in.close(); } - catch (IOException e) { + catch (IOException e) { } - } - } + } + } } } diff --git a/src/main/java/org/jwat/tools/tasks/digest/DigestTaskCLI.java b/src/main/java/org/jwat/tools/tasks/digest/DigestTaskCLI.java index 897c4c1..0df42a6 100644 --- a/src/main/java/org/jwat/tools/tasks/digest/DigestTaskCLI.java +++ b/src/main/java/org/jwat/tools/tasks/digest/DigestTaskCLI.java @@ -1,7 +1,18 @@ package org.jwat.tools.tasks.digest; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; +import java.security.Provider; +import java.security.Provider.Service; +import java.security.Security; +import java.util.Iterator; import java.util.LinkedList; +import java.util.Map; +import java.util.Set; +import java.util.TreeMap; +import java.util.TreeSet; +import org.jwat.common.SecurityProviderTools; import org.jwat.tools.JWATTools; import org.jwat.tools.tasks.TaskCLI; @@ -11,37 +22,50 @@ import com.antiaction.common.cli.CommandLine; import com.antiaction.common.cli.Options; +// TODO Fix common-cli to handle numerics in longName. public class DigestTaskCLI extends TaskCLI { public static final String commandName = "digest"; - public static final String commandDescription = "digest calculation"; + public static final String commandDescription = "calculate the digest of file(s)"; @Override public void show_help() { - System.out.println("jwattools [-o] digest "); + System.out.println("FileTools v" + JWATTools.getVersionString()); + System.out.println("jwattools [-o] digest ... "); System.out.println(""); - System.out.println("digest file(s)"); - System.out.println(""); - System.out.println("\tDigest file(s)."); - /* + System.out.println("Digest file(s)"); + System.out.println("Use of this is mostly for debugging purposes."); System.out.println(""); System.out.println("options:"); System.out.println(""); - */ + System.out.println(getMessageDigestAlgos()); + System.out.println(""); } @Override public void runtask(CommandLine cmdLine) { DigestTask task = new DigestTask(); DigestOptions options = parseArguments(cmdLine); + System.out.println(options.toString()); task.runtask(options); } + public static final int A_BASE16 = 101; + public static final int A_BASE32 = 102; + public static final int A_BASE64 = 103; + public static final int A_DIGEST_ALGO = 104; + public static DigestOptions parseArguments(CommandLine cmdLine) { Options cliOptions = new Options(); try { cliOptions.addNamedArgument( "files", JWATTools.A_FILES, 1, 1 ); + /* + cliOptions.addOption(null, "--base16", A_BASE16, 0, null); + cliOptions.addOption(null, "--base32", A_BASE32, 0, null); + cliOptions.addOption(null, "--base64", A_BASE64, 0, null); + */ + cliOptions.addOption("-a", "--digest-algorithm", A_DIGEST_ALGO, 0, null).setValueRequired(); cmdLine = ArgumentParser.parse(cmdLine.argsArray, cliOptions, cmdLine); } catch (ArgumentParserException e) { @@ -58,9 +82,123 @@ public static DigestOptions parseArguments(CommandLine cmdLine) { argument = cmdLine.idMap.get( JWATTools.A_FILES ); options.filesList = new LinkedList(); //options.filesList = argument.values; - options.filesList.add( argument.value ); + if (argument != null) { + options.filesList.add( argument.value ); + } + + argument = cmdLine.idMap.get(A_DIGEST_ALGO); + if (argument != null) { + options.mdAlgo = argument.value; + try { + options.md = MessageDigest.getInstance(argument.value); + } + catch (NoSuchAlgorithmException e) { + System.out.println("Unsupported digest algorithm: " + argument.value); + System.exit(-1); + } + } + + options.bBase16 = (cmdLine.idMap.get(A_BASE16) != null); + options.bBase32 = (cmdLine.idMap.get(A_BASE32) != null); + options.bBase64 = (cmdLine.idMap.get(A_BASE64) != null); + + if (options.bBase32 == false && options.bBase64 == false) { + options.bBase16 = true; + } + options.bBase32 = true; + options.bBase64 = true; return options; } + public static Map> digestAlgos = new TreeMap<>(); + public static Set digestAliases = new TreeSet<>(); + public static Set digestAndAliases = new TreeSet<>(); + + public static String getMessageDigestAlgos() { + final String digestClassName = MessageDigest.class.getSimpleName(); + final String aliasPrefix = "Alg.Alias." + digestClassName + "."; + final int aliasPrefixLen = aliasPrefix.length(); + Provider[] providers = Security.getProviders(); + String providerAlias; + if (SecurityProviderTools.isProviderAvailable(providers, "BC")) { + providerAlias = "BC"; + } + else { + providerAlias = "SUN"; + } + try { + Provider provider = Security.getProvider(providerAlias); + Set services = provider.getServices(); + services.stream().forEach(service -> { + String algorithm; + Set aliases; + if (digestClassName.equalsIgnoreCase(service.getType())) { + algorithm = service.getAlgorithm(); + char[] charArr = algorithm.toCharArray(); + int charIdx = charArr.length - 1; + char c; + boolean b = true; + while (b && charIdx >= 0) { + c = charArr[charIdx--]; + b = ((c >= '0' && c<= '9') || c == '.'); + } + if (charIdx != -1 && !(charIdx == 1 && algorithm.startsWith("OID."))) { + aliases = digestAlgos.get(algorithm); + if (aliases == null) { + digestAlgos.put(algorithm, new TreeSet()); + digestAndAliases.add(algorithm); + } + } + } + }); + provider.keySet().stream().map(Object::toString).filter(s -> s.startsWith(aliasPrefix)).forEach(s -> { + String alias = s.substring(aliasPrefixLen); + String algorithm = provider.get(s).toString(); + if (alias.compareToIgnoreCase(algorithm) != 0) { + char[] charArr = alias.toCharArray(); + int charIdx = charArr.length - 1; + char c; + boolean b = true; + while (b && charIdx >= 0) { + c = charArr[charIdx--]; + b = ((c >= '0' && c<= '9') || c == '.'); + } + if (charIdx != -1 && !(charIdx == 1 && alias.startsWith("OID."))) { + Set algorithms = digestAlgos.get(algorithm); + if (algorithms != null) { + algorithms.add(alias); + digestAliases.add(alias); + digestAndAliases.add(alias); + } + } + } + }); + } + catch (Exception e) { + e.printStackTrace(); + } + final StringBuffer sb = new StringBuffer(); + //String prefix = null; + //int prefixLen = 0; + digestAlgos.entrySet().forEach(e -> { + Set algorithms = e.getValue(); + if (sb.length() > 0) { + sb.append(", "); + } + sb.append(e.getKey()); + Iterator aliasIter = algorithms.iterator(); + if (aliasIter.hasNext()) { + sb.append(" ("); + sb.append(aliasIter.next()); + while (aliasIter.hasNext()) { + sb.append(", "); + sb.append(aliasIter.next()); + } + sb.append(")"); + } + }); + return sb.toString(); + } + } diff --git a/src/main/java/org/jwat/tools/tasks/extract/ExtractTaskCLI.java b/src/main/java/org/jwat/tools/tasks/extract/ExtractTaskCLI.java index fc0bd7a..6d68227 100644 --- a/src/main/java/org/jwat/tools/tasks/extract/ExtractTaskCLI.java +++ b/src/main/java/org/jwat/tools/tasks/extract/ExtractTaskCLI.java @@ -17,15 +17,17 @@ public class ExtractTaskCLI extends TaskCLI { @Override public void show_help() { + System.out.println("FileTools v" + JWATTools.getVersionString()); System.out.println("jwattools extract [-u URI] [-w THREADS] ..."); System.out.println(""); - System.out.println("extract one or more entries/records from GZip/ARC/WARC files"); + System.out.println("Extract one or more entries/records from GZip/ARC/WARC files."); System.out.println(""); System.out.println("options:"); System.out.println(""); System.out.println(" -u (target)uri to extract"); System.out.println(" --queue-first queue files before processing"); System.out.println(" -w set the amount of worker thread(s) (defaults to 1)"); + System.out.println(""); } @Override diff --git a/src/main/java/org/jwat/tools/tasks/headers2cdx/Headers2CDXTaskCLI.java b/src/main/java/org/jwat/tools/tasks/headers2cdx/Headers2CDXTaskCLI.java index dac087a..49018a0 100644 --- a/src/main/java/org/jwat/tools/tasks/headers2cdx/Headers2CDXTaskCLI.java +++ b/src/main/java/org/jwat/tools/tasks/headers2cdx/Headers2CDXTaskCLI.java @@ -19,18 +19,18 @@ public class Headers2CDXTaskCLI extends TaskCLI { @Override public void show_help() { + System.out.println("FileTools v" + JWATTools.getVersionString()); System.out.println("jwattools headers2cdx [-o OUTPUT_FILE] [-w THREADS] ..."); System.out.println(""); - System.out.println("cdx one or more gzipped json (W)ARC/HTTP header files"); - System.out.println(""); - System.out.println("\tRead through gzipped json (W)ARC/HTTP header file(s) and create a CDX file."); - System.out.println("\tCDX files are primarily used with Wayback."); + System.out.println("Read through gzipped json (W)ARC/HTTP header file(s) and create a CDX file."); + System.out.println("CDX files are primarily used with replay tools like (Open)Wayback."); System.out.println(""); System.out.println("options:"); System.out.println(""); System.out.println(" -o output cdx filename (unsorted)"); System.out.println(" --queue-first queue files before processing"); System.out.println(" -w set the amount of worker thread(s) (defaults to 1)"); + System.out.println(""); } @Override diff --git a/src/main/java/org/jwat/tools/tasks/interval/IntervalTaskCLI.java b/src/main/java/org/jwat/tools/tasks/interval/IntervalTaskCLI.java index a92da96..7eed883 100644 --- a/src/main/java/org/jwat/tools/tasks/interval/IntervalTaskCLI.java +++ b/src/main/java/org/jwat/tools/tasks/interval/IntervalTaskCLI.java @@ -19,18 +19,17 @@ public class IntervalTaskCLI extends TaskCLI { @Override public void show_help() { + System.out.println("FileTools v" + JWATTools.getVersionString()); System.out.println("jwattools [-o] interval offset1 offset2 srcfile dstfile"); System.out.println(""); - System.out.println("extract the byte interval from offset1 to offset2 from a file"); - System.out.println(""); - System.out.println("\tSkips data up to offset1 and save data to file until offset2 is reached."); - System.out.println("\tOffset1/2 can be decimal or hexadecimal ($ or 0x)."); - System.out.println("\tOffset2 can also be a length-offset (+ +$ +0x)."); - /* + System.out.println("Extract the byte interval from offset1 to offset2 from a file."); + System.out.println("Offset1/2 can be decimal or hexadecimal ($ or 0x)."); + System.out.println("Offset2 can also be a length-offset (+ +$ +0x)."); System.out.println(""); System.out.println("options:"); System.out.println(""); - */ + System.out.println("none"); + System.out.println(""); } @Override diff --git a/src/main/java/org/jwat/tools/tasks/pathindex/PathIndexTaskCLI.java b/src/main/java/org/jwat/tools/tasks/pathindex/PathIndexTaskCLI.java index 958f7d1..02890ef 100644 --- a/src/main/java/org/jwat/tools/tasks/pathindex/PathIndexTaskCLI.java +++ b/src/main/java/org/jwat/tools/tasks/pathindex/PathIndexTaskCLI.java @@ -19,16 +19,16 @@ public class PathIndexTaskCLI extends TaskCLI { @Override public void show_help() { + System.out.println("FileTools v" + JWATTools.getVersionString()); System.out.println("jwattools pathindex [-o OUTPUT_FILE] ..."); System.out.println(""); - System.out.println("create a pathindex from one or more ARC/WARC files"); - System.out.println(""); - System.out.println("\tRead through ARC/WARC file(s) and create a pathindex file."); - System.out.println("\tPathindex files are primarily used with Wayback."); + System.out.println("Create a pathindex from one or more ARC/WARC files."); + System.out.println("Pathindex files are primarily used with replay tools like (Open)Wayback."); System.out.println(""); System.out.println("options:"); System.out.println(""); System.out.println(" -o output pathindex filename (unsorted)"); + System.out.println(""); } @Override diff --git a/src/main/java/org/jwat/tools/tasks/test/TestTaskCLI.java b/src/main/java/org/jwat/tools/tasks/test/TestTaskCLI.java index d22c16c..74a4c2b 100644 --- a/src/main/java/org/jwat/tools/tasks/test/TestTaskCLI.java +++ b/src/main/java/org/jwat/tools/tasks/test/TestTaskCLI.java @@ -24,9 +24,10 @@ public class TestTaskCLI extends TaskCLI { @Override public void show_help() { + System.out.println("FileTools v" + JWATTools.getVersionString()); System.out.println("jwattools test [-beilx] [-w THREADS] [-a] ..."); System.out.println(""); - System.out.println("test one or more ARC/WARC/GZip files"); + System.out.println("Test one or more ARC/WARC/GZip files."); System.out.println(""); System.out.println("options:"); System.out.println(""); @@ -39,6 +40,7 @@ public void show_help() { System.out.println(" -x to validate text/xml payload (eg. mets)"); System.out.println(" --queue-first queue files before processing"); System.out.println(" -w set the amount of worker thread(s) (defaults to 1)"); + System.out.println(""); } @Override @@ -133,16 +135,16 @@ public static TestOptions parseArguments(CommandLine cmdLine) { if ( argument != null && argument.value != null ) { try { DateFormat dateFormat = new SimpleDateFormat("yyyyMMddHHmmss"); - dateFormat.setLenient(false); - Date afterDate = dateFormat.parse(argument.value); - options.after = afterDate.getTime(); + dateFormat.setLenient(false); + Date afterDate = dateFormat.parse(argument.value); + options.after = afterDate.getTime(); } catch (ParseException e) { System.out.println("Invalid date format - " + argument.value); System.exit( 1 ); } } - // Files. + // Files. argument = cmdLine.idMap.get( JWATTools.A_FILES ); options.filesList = argument.values; diff --git a/src/main/java/org/jwat/tools/tasks/unchunk/UnchunkTask.java b/src/main/java/org/jwat/tools/tasks/unchunk/UnchunkTask.java index ad3853d..193a6ef 100644 --- a/src/main/java/org/jwat/tools/tasks/unchunk/UnchunkTask.java +++ b/src/main/java/org/jwat/tools/tasks/unchunk/UnchunkTask.java @@ -51,6 +51,25 @@ public void process(File srcFile) { catch (IOException e) { e.printStackTrace(); } + int state; + if (dis != null) { + state = dis.getState(); + switch (state) { + case DigestInputStreamChunkedNoSkip.S_LENGTH: + case DigestInputStreamChunkedNoSkip.S_LENGTH_CR: + case DigestInputStreamChunkedNoSkip.S_LENGTH_LF: + case DigestInputStreamChunkedNoSkip.S_CHUNK_CR: + case DigestInputStreamChunkedNoSkip.S_CHUNK_LF: + case DigestInputStreamChunkedNoSkip.S_END_CR: + case DigestInputStreamChunkedNoSkip.S_END_LF: + break; + case DigestInputStreamChunkedNoSkip.S_DONE: + break; + case DigestInputStreamChunkedNoSkip.S_ERROR: + break; + } + dis.getOverflow(); + } } } diff --git a/src/main/java/org/jwat/tools/tasks/unchunk/UnchunkTaskCLI.java b/src/main/java/org/jwat/tools/tasks/unchunk/UnchunkTaskCLI.java index 5f8e2c3..cb1b60f 100644 --- a/src/main/java/org/jwat/tools/tasks/unchunk/UnchunkTaskCLI.java +++ b/src/main/java/org/jwat/tools/tasks/unchunk/UnchunkTaskCLI.java @@ -15,20 +15,23 @@ public class UnchunkTaskCLI extends TaskCLI { public static final String commandName = "unchunk"; - public static final String commandDescription = "unchunk file(s) with chunked transfter encoding"; + public static final String commandDescription = "unchunk file(s) containing only chunked transfter encoded data"; @Override public void show_help() { - System.out.println("jwattools [-o] unchunk "); + System.out.println("FileTools v" + JWATTools.getVersionString()); + System.out.println("jwattools [-o] unchunk ..."); System.out.println(""); - System.out.println("unchunk file(s)"); - System.out.println(""); - System.out.println("\tUnchunk file(s)."); - /* + System.out.println("Unchunk one or more files which consist of chunked transfer encoded data files."); + System.out.println("This command only handles the chunked data."); + System.out.println("All encapsulating headers or similar must be removed before using this command."); + System.out.println("This includes removing headers for ARC, WARC, HTTP etc."); + System.out.println("Use of this is mostly for debugging purposes."); System.out.println(""); System.out.println("options:"); System.out.println(""); - */ + System.out.println("none"); + System.out.println(""); } @Override diff --git a/src/main/java/org/jwat/tools/tasks/unpack/UnpackTaskCLI.java b/src/main/java/org/jwat/tools/tasks/unpack/UnpackTaskCLI.java index 83ad04c..f34b7b5 100644 --- a/src/main/java/org/jwat/tools/tasks/unpack/UnpackTaskCLI.java +++ b/src/main/java/org/jwat/tools/tasks/unpack/UnpackTaskCLI.java @@ -17,6 +17,7 @@ public class UnpackTaskCLI extends TaskCLI { @Override public void show_help() { + System.out.println("FileTools v" + JWATTools.getVersionString()); System.out.println("Work in progress..."); } From d9093c2a1932ec298cc73845958c076da82523ce Mon Sep 17 00:00:00 2001 From: Nicholas Clarke Date: Wed, 13 Dec 2023 01:20:23 +0100 Subject: [PATCH 5/6] Improved digest command. --- .../jwat/tools/tasks/digest/DigestAlgo.java | 11 ++ .../tools/tasks/digest/DigestOptions.java | 43 +++--- .../jwat/tools/tasks/digest/DigestTask.java | 32 ++-- .../tools/tasks/digest/DigestTaskCLI.java | 143 ++++-------------- .../jwat/tools/tasks/unchunk/UnchunkTask.java | 1 + .../tools/tasks/unchunk/UnchunkTaskCLI.java | 1 - 6 files changed, 85 insertions(+), 146 deletions(-) create mode 100644 src/main/java/org/jwat/tools/tasks/digest/DigestAlgo.java diff --git a/src/main/java/org/jwat/tools/tasks/digest/DigestAlgo.java b/src/main/java/org/jwat/tools/tasks/digest/DigestAlgo.java new file mode 100644 index 0000000..2ab5ac2 --- /dev/null +++ b/src/main/java/org/jwat/tools/tasks/digest/DigestAlgo.java @@ -0,0 +1,11 @@ +package org.jwat.tools.tasks.digest; + +import java.security.MessageDigest; + +public class DigestAlgo { + + public String mdAlgo; + + public MessageDigest md; + +} diff --git a/src/main/java/org/jwat/tools/tasks/digest/DigestOptions.java b/src/main/java/org/jwat/tools/tasks/digest/DigestOptions.java index faec95b..0644388 100644 --- a/src/main/java/org/jwat/tools/tasks/digest/DigestOptions.java +++ b/src/main/java/org/jwat/tools/tasks/digest/DigestOptions.java @@ -1,6 +1,5 @@ package org.jwat.tools.tasks.digest; -import java.security.MessageDigest; import java.util.List; import org.jwat.tools.JWATTools; @@ -15,38 +14,38 @@ public class DigestOptions { public boolean bBase64; - public String mdAlgo; - - public MessageDigest md; + public DigestAlgo[] digestAlgos; @Override public String toString() { String lineSeparator = System.lineSeparator(); - int idx; - int len; + //int idx; + //int len; StringBuilder sb = new StringBuilder(); sb.append("FileTools v"); sb.append(JWATTools.getVersionString()); - sb.append(lineSeparator); - /* - if (paths != null) { - idx = 0; - len = paths.length; - sb.append(" Path: " + paths[idx++].getPath()); + if (filesList.size() > 0) { sb.append(lineSeparator); - while (idx < len) { - sb.append(" " + paths[idx++].getPath()); + /* + if (paths != null) { + idx = 0; + len = paths.length; + sb.append(" Path: " + paths[idx++].getPath()); sb.append(lineSeparator); + while (idx < len) { + sb.append(" " + paths[idx++].getPath()); + sb.append(lineSeparator); + } } + */ + sb.append(" base16: " + bBase16); + sb.append(lineSeparator); + sb.append(" base32: " + bBase32); + sb.append(lineSeparator); + sb.append(" base64: " + bBase64); + //sb.append(lineSeparator); + //sb.append(" base64: " + mdAlgo); } - */ - sb.append(" base16: " + bBase16); - sb.append(lineSeparator); - sb.append(" base32: " + bBase32); - sb.append(lineSeparator); - sb.append(" base64: " + bBase64); - sb.append(lineSeparator); - sb.append(" base64: " + mdAlgo); return sb.toString(); } diff --git a/src/main/java/org/jwat/tools/tasks/digest/DigestTask.java b/src/main/java/org/jwat/tools/tasks/digest/DigestTask.java index 024d8f8..9fa6c2d 100644 --- a/src/main/java/org/jwat/tools/tasks/digest/DigestTask.java +++ b/src/main/java/org/jwat/tools/tasks/digest/DigestTask.java @@ -3,10 +3,12 @@ import java.io.File; import java.io.FileInputStream; import java.io.IOException; +import java.security.MessageDigest; import org.jwat.common.Base16; import org.jwat.common.Base32; import org.jwat.common.Base64; +import org.jwat.common.SecurityProviderAlgorithms; import org.jwat.tools.tasks.AbstractTask; import it.unimi.dsi.fastutil.io.FastBufferedInputStream; @@ -24,6 +26,12 @@ public void runtask(DigestOptions options) { filelist_feeder( options.filesList, this ); } else { + SecurityProviderAlgorithms spa = SecurityProviderAlgorithms.getInstanceFor(MessageDigest.class); + System.out.println(""); + System.out.println("Available algorithms:"); + System.out.println("---------------------"); + System.out.println(spa.getAlgorithmListGrouped()); + System.out.println(""); } } @@ -39,17 +47,21 @@ public void process(File srcFile) { try { in = new FastBufferedInputStream(new FileInputStream(srcFile), isBuffer); while ((read = in.read(readBuffer)) != -1) { - options.md.update(readBuffer, 0, read); - } - digest = options.md.digest(); - if (options.bBase16) { - System.out.println(options.mdAlgo + ":" + Base16.encodeArray(digest) + " (base16/hex)"); - } - if (options.bBase32) { - System.out.println(options.mdAlgo + ":" + Base32.encodeArray(digest) + " (base32)"); + for (int i=0; i] digest ... "); System.out.println(""); @@ -39,7 +32,11 @@ public void show_help() { System.out.println(""); System.out.println("options:"); System.out.println(""); - System.out.println(getMessageDigestAlgos()); + System.out.println(" -a specify one or more digest algorithm"); + System.out.println(""); + System.out.println("Available digest algorithms:"); + System.out.println("----------------------------"); + System.out.println(spa.getAlgorithmListGrouped()); System.out.println(""); } @@ -60,11 +57,9 @@ public static DigestOptions parseArguments(CommandLine cmdLine) { Options cliOptions = new Options(); try { cliOptions.addNamedArgument( "files", JWATTools.A_FILES, 1, 1 ); - /* - cliOptions.addOption(null, "--base16", A_BASE16, 0, null); - cliOptions.addOption(null, "--base32", A_BASE32, 0, null); - cliOptions.addOption(null, "--base64", A_BASE64, 0, null); - */ + //cliOptions.addOption(null, "--base16", A_BASE16, 0, null); + //cliOptions.addOption(null, "--base32", A_BASE32, 0, null); + //cliOptions.addOption(null, "--base64", A_BASE64, 0, null); cliOptions.addOption("-a", "--digest-algorithm", A_DIGEST_ALGO, 0, null).setValueRequired(); cmdLine = ArgumentParser.parse(cmdLine.argsArray, cliOptions, cmdLine); } @@ -76,7 +71,8 @@ public static DigestOptions parseArguments(CommandLine cmdLine) { DigestOptions options = new DigestOptions(); Argument argument; - String tmpStr; + String[] values; + DigestAlgo digestAlgo; // Files argument = cmdLine.idMap.get( JWATTools.A_FILES ); @@ -88,16 +84,27 @@ public static DigestOptions parseArguments(CommandLine cmdLine) { argument = cmdLine.idMap.get(A_DIGEST_ALGO); if (argument != null) { - options.mdAlgo = argument.value; - try { - options.md = MessageDigest.getInstance(argument.value); - } - catch (NoSuchAlgorithmException e) { - System.out.println("Unsupported digest algorithm: " + argument.value); - System.exit(-1); + values = argument.value.split(","); + options.digestAlgos = new DigestAlgo[values.length]; + for (int i=0; i 0 && options.digestAlgos == null) { + System.out.println("Missing digest algorithm."); + System.exit(-1); + } + options.bBase16 = (cmdLine.idMap.get(A_BASE16) != null); options.bBase32 = (cmdLine.idMap.get(A_BASE32) != null); options.bBase64 = (cmdLine.idMap.get(A_BASE64) != null); @@ -111,94 +118,4 @@ public static DigestOptions parseArguments(CommandLine cmdLine) { return options; } - public static Map> digestAlgos = new TreeMap<>(); - public static Set digestAliases = new TreeSet<>(); - public static Set digestAndAliases = new TreeSet<>(); - - public static String getMessageDigestAlgos() { - final String digestClassName = MessageDigest.class.getSimpleName(); - final String aliasPrefix = "Alg.Alias." + digestClassName + "."; - final int aliasPrefixLen = aliasPrefix.length(); - Provider[] providers = Security.getProviders(); - String providerAlias; - if (SecurityProviderTools.isProviderAvailable(providers, "BC")) { - providerAlias = "BC"; - } - else { - providerAlias = "SUN"; - } - try { - Provider provider = Security.getProvider(providerAlias); - Set services = provider.getServices(); - services.stream().forEach(service -> { - String algorithm; - Set aliases; - if (digestClassName.equalsIgnoreCase(service.getType())) { - algorithm = service.getAlgorithm(); - char[] charArr = algorithm.toCharArray(); - int charIdx = charArr.length - 1; - char c; - boolean b = true; - while (b && charIdx >= 0) { - c = charArr[charIdx--]; - b = ((c >= '0' && c<= '9') || c == '.'); - } - if (charIdx != -1 && !(charIdx == 1 && algorithm.startsWith("OID."))) { - aliases = digestAlgos.get(algorithm); - if (aliases == null) { - digestAlgos.put(algorithm, new TreeSet()); - digestAndAliases.add(algorithm); - } - } - } - }); - provider.keySet().stream().map(Object::toString).filter(s -> s.startsWith(aliasPrefix)).forEach(s -> { - String alias = s.substring(aliasPrefixLen); - String algorithm = provider.get(s).toString(); - if (alias.compareToIgnoreCase(algorithm) != 0) { - char[] charArr = alias.toCharArray(); - int charIdx = charArr.length - 1; - char c; - boolean b = true; - while (b && charIdx >= 0) { - c = charArr[charIdx--]; - b = ((c >= '0' && c<= '9') || c == '.'); - } - if (charIdx != -1 && !(charIdx == 1 && alias.startsWith("OID."))) { - Set algorithms = digestAlgos.get(algorithm); - if (algorithms != null) { - algorithms.add(alias); - digestAliases.add(alias); - digestAndAliases.add(alias); - } - } - } - }); - } - catch (Exception e) { - e.printStackTrace(); - } - final StringBuffer sb = new StringBuffer(); - //String prefix = null; - //int prefixLen = 0; - digestAlgos.entrySet().forEach(e -> { - Set algorithms = e.getValue(); - if (sb.length() > 0) { - sb.append(", "); - } - sb.append(e.getKey()); - Iterator aliasIter = algorithms.iterator(); - if (aliasIter.hasNext()) { - sb.append(" ("); - sb.append(aliasIter.next()); - while (aliasIter.hasNext()) { - sb.append(", "); - sb.append(aliasIter.next()); - } - sb.append(")"); - } - }); - return sb.toString(); - } - } diff --git a/src/main/java/org/jwat/tools/tasks/unchunk/UnchunkTask.java b/src/main/java/org/jwat/tools/tasks/unchunk/UnchunkTask.java index 193a6ef..745a5f7 100644 --- a/src/main/java/org/jwat/tools/tasks/unchunk/UnchunkTask.java +++ b/src/main/java/org/jwat/tools/tasks/unchunk/UnchunkTask.java @@ -11,6 +11,7 @@ public class UnchunkTask extends AbstractTask { + @SuppressWarnings("unused") private UnchunkOptions options; public UnchunkTask() { diff --git a/src/main/java/org/jwat/tools/tasks/unchunk/UnchunkTaskCLI.java b/src/main/java/org/jwat/tools/tasks/unchunk/UnchunkTaskCLI.java index cb1b60f..d22f2ec 100644 --- a/src/main/java/org/jwat/tools/tasks/unchunk/UnchunkTaskCLI.java +++ b/src/main/java/org/jwat/tools/tasks/unchunk/UnchunkTaskCLI.java @@ -55,7 +55,6 @@ public static UnchunkOptions parseArguments(CommandLine cmdLine) { UnchunkOptions options = new UnchunkOptions(); Argument argument; - String tmpStr; // Files argument = cmdLine.idMap.get( JWATTools.A_FILES ); From 99a7f8cbd4f54462e93b4810f8d6103bfcd93eb2 Mon Sep 17 00:00:00 2001 From: Nicholas Clarke Date: Wed, 13 Dec 2023 15:40:10 +0100 Subject: [PATCH 6/6] Improve command overview in README.md. --- README.md | 35 +++++++++++++++++++++++------------ 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 07c3c74..a9ca3ef 100644 --- a/README.md +++ b/README.md @@ -6,18 +6,29 @@ This project currently includes a commandline tool with various gzip/arc/warc/xm ### Tasks ### -* Arc2Warc -* CDX -* Changed -* Compress -* ContainerMD -* Decompress -* Delete -* Extract -* Interval -* PathIndex -* Test -* Unpack +``` +usage: JWATTools [] + +Commands: + arc2warc convert ARC file(s) to WARC file(s) + cdx create a CDX index for use in wayback (unsorted) + changed changed files grouped by intervals + compress compress ARC/WARC or plain file(s) + containermd generation of containerMD for (W)ARC file(s) + decompress decompress ARC/WARC or normal GZip file(s) + delete delete files + digest calculate the digest of file(s) + extract extract ARC/WARC record(s) + headers2cdx create a CDX index for use in wayback (unsorted) + help display help information + interval interval extract + pathindex create a path index file for use in wayback (unsorted) + test test validity of ARC/WARC/GZip file(s) + unchunk unchunk file(s) containing only chunked transfter encoded data + unpack unpack multifile GZip + +See 'jwattools help ' for more information on a specific command. +``` ### Downloads ###