diff --git a/README.md b/README.md index 07c3c74..a9ca3ef 100644 --- a/README.md +++ b/README.md @@ -6,18 +6,29 @@ This project currently includes a commandline tool with various gzip/arc/warc/xm ### Tasks ### -* Arc2Warc -* CDX -* Changed -* Compress -* ContainerMD -* Decompress -* Delete -* Extract -* Interval -* PathIndex -* Test -* Unpack +``` +usage: JWATTools [] + +Commands: + arc2warc convert ARC file(s) to WARC file(s) + cdx create a CDX index for use in wayback (unsorted) + changed changed files grouped by intervals + compress compress ARC/WARC or plain file(s) + containermd generation of containerMD for (W)ARC file(s) + decompress decompress ARC/WARC or normal GZip file(s) + delete delete files + digest calculate the digest of file(s) + extract extract ARC/WARC record(s) + headers2cdx create a CDX index for use in wayback (unsorted) + help display help information + interval interval extract + pathindex create a path index file for use in wayback (unsorted) + test test validity of ARC/WARC/GZip file(s) + unchunk unchunk file(s) containing only chunked transfter encoded data + unpack unpack multifile GZip + +See 'jwattools help ' for more information on a specific command. +``` ### Downloads ### diff --git a/pom.xml b/pom.xml index 7c21013..c3f9e53 100644 --- a/pom.xml +++ b/pom.xml @@ -1,419 +1,436 @@ - - - 4.0.0 - - org.jwat - jwat-tools - 0.7.2-SNAPSHOT - - jar - - jwat-tools - - JWAT-Tools uses the available JWAT libraries to make high level tasks available either from command-line or programmatically. - Common tasks include: Test, Compress, Decompress, CDX, Arc2Warc. - More specialised tasks include: Changed, ContainerMD, Delete, Extract, Interval, PathIndex, Unpack, Headers2CDX. - - http://jwat.org/ - - 2011 - - - - Apache License, Version 2.0 - http://www.apache.org/licenses/LICENSE-2.0.txt - repo - A business-friendly OSS license - - - - - scm:git:https://github.com/netarchivesuite/jwat-tools.git - scm:git:git@github.com:netarchivesuite/jwat-tools.git - http://bitbucket.org/nclarkekb/jwat-tools - HEAD - - - - jira - https://sbforge.org/jira/browse/JWAT - - - - jenkins - https://sbforge.org/jenkins/view/JWAT/ - - - - - Nicholas Clarke - - developer - - nclarke@antiaction.com - Antiaction - https://www.antiaction.com - - - - Nicholas Clarke - - developer - - nicl@kb.dk - Det Kongelige Bibliotek/Royal Danish Library - http://www.kb.dk - - - - - UTF-8 - UTF-8 - - 2.2.2 - 3.8.1 - 3.0.1 - 2.3 - 3.3.0 - 3.2.1 - - 1.8 - - 1.2.1 - 3.2.7 - 0.2.0 - 0.7.0 - 0.2.0-JWAT - 2.0.0 - 4.13.2 - - - - - - - sbforge-nexus - SBForge Nexus Repo manager - https://sbforge.org/nexus/content/repositories/releases - - - - sbforge-nexus - SBForge Nexus Repo manager - https://sbforge.org/nexus/content/repositories/snapshots/ - - - - - - org.jwat - jwat-common - ${jwat.core.version} - - - org.jwat - jwat-gzip - ${jwat.core.version} - - - org.jwat - jwat-arc - ${jwat.core.version} - - - org.jwat - jwat-warc - ${jwat.core.version} - - - org.jwat - jwat-archive - ${jwat.core.version} - - - - com.antiaction - common-cli - ${common-cli.version} - - - com.antiaction - common-json - ${common-json.version} - - - com.antiaction - common-datastructures - ${common-datastructures.version} - - - - net.java.dev.jna - jna - ${jna.version} - - - - org.netpreserve.openwayback - openwayback-core - ${openwayback-core.version} - - - org.netpreserve.openwayback - openwayback-cdx-server - - - org.netpreserve.openwayback - openwayback-webapp - - - - - - - - - - - - - - junit - junit - ${junit.version} - test - - - - - - - maven-compiler-plugin - ${maven-compiler-plugin.version} - - 1.8 - 1.8 - - - - - org.codehaus.mojo - license-maven-plugin - ${license.maven.plugin} - - true - apache_v2 - true - true - - - - - - org.apache.maven.plugins - maven-jar-plugin - ${maven.jar.plugin} - - - false - - - org.jwat.tools.JWATTools - - - ${project.name} - ${project.version} - - ${project.groupId}.${project.artifactId} - - - - - - false - - - true - ${project.build.finalName} - - - - - - maven-assembly-plugin - ${maven.assembly.plugin} - - - distro-assembly - package - - single - - - - src/main/assembly/release.xml - - false - gnu - - - - - - - - - - release - - - - - org.apache.maven.plugins - maven-source-plugin - ${maven-source-plugin.version} - - - attach-sources - verify - - jar-no-fork - - - - - - - - org.apache.maven.plugins - maven-javadoc-plugin - ${maven-javadoc-plugin.version} - - 8 - false - true - true - true - - -Xdoclint:none - - - - attach-javadocs - verify - - jar - - - - - - - org.apache.maven.plugins - maven-gpg-plugin - ${maven-gpg-plugin.version} - - - sign-artifacts - verify - - sign - - - - - - - - - + + + 4.0.0 + + org.jwat + jwat-tools + 0.7.2-SNAPSHOT + + jar + + jwat-tools + + JWAT-Tools uses the available JWAT libraries to make high level tasks available either from command-line or programmatically. + Common tasks include: Test, Compress, Decompress, CDX, Arc2Warc. + More specialised tasks include: Changed, ContainerMD, Delete, Extract, Interval, PathIndex, Unpack, Headers2CDX. + + http://jwat.org/ + + 2011 + + + + Apache License, Version 2.0 + http://www.apache.org/licenses/LICENSE-2.0.txt + repo + A business-friendly OSS license + + + + + scm:git:https://github.com/netarchivesuite/jwat-tools.git + scm:git:https://github.com/netarchivesuite/jwat-tools.git + https://github.com/netarchivesuite/jwat-tools + + + + jira + https://sbforge.org/jira/browse/JWAT + + + + jenkins + https://sbforge.org/jenkins/view/JWAT/ + + + + + Nicholas Clarke + + developer + + nclarke@antiaction.com + Antiaction + https://www.antiaction.com + + + + Nicholas Clarke + + developer + + nicl@kb.dk + Det Kongelige Bibliotek/Royal Danish Library + http://www.kb.dk + + + + + UTF-8 + UTF-8 + + en + checkstyleCache + false + true + + 2.2.2 + 3.11.0 + 3.0.1 + 2.3 + 3.3.0 + 3.2.1 + + 2.0.0 + + 1.3.0-SNAPSHOT + 3.2.7 + 0.2.0 + 0.7.0 + 0.2.0-JWAT + 2.0.0 + 4.13.2 + + + + + ossrh + url>https://s01.oss.sonatype.org/content/repositories/snapshots + https://oss.sonatype.org/content/repositories/snapshots/ + + + ossrh + url>https://s01.oss.sonatype.org/service/local/staging/deploy/maven2/ + https://oss.sonatype.org/service/local/staging/deploy/maven2/ + + + + + + org.jwat + jwat-common + ${jwat.core.version} + + + org.jwat + jwat-gzip + ${jwat.core.version} + + + org.jwat + jwat-arc + ${jwat.core.version} + + + org.jwat + jwat-warc + ${jwat.core.version} + + + org.jwat + jwat-archive + ${jwat.core.version} + + + + com.antiaction + common-cli + ${common-cli.version} + + + com.antiaction + common-json + ${common-json.version} + + + com.antiaction + common-datastructures + ${common-datastructures.version} + + + + net.java.dev.jna + jna + ${jna.version} + + + + org.netpreserve.openwayback + openwayback-core + ${openwayback-core.version} + + + org.netpreserve.openwayback + openwayback-cdx-server + + + org.netpreserve.openwayback + openwayback-webapp + + + + + + + + + + + + + + junit + junit + ${junit.version} + test + + + + + + + org.apache.maven.plugins + maven-compiler-plugin + ${maven-compiler-plugin.version} + + + + + org.codehaus.mojo + license-maven-plugin + ${license.maven.plugin} + + true + apache_v2 + true + true + + + + + + org.apache.maven.plugins + maven-jar-plugin + ${maven.jar.plugin} + + + false + + + org.jwat.tools.JWATTools + + + ${project.name} + ${project.version} + + ${project.groupId}.${project.artifactId} + + + + + + false + + + true + ${project.build.finalName} + + + + + + org.apache.maven.plugins + maven-assembly-plugin + ${maven.assembly.plugin} + + + distro-assembly + package + + single + + + + src/main/assembly/release.xml + + false + gnu + + + + + + + + + + javac8-release + + 8 + + + 1.8 + 1.8 + + + + javac9-release + + [9,) + + + 8 + + + + release + + + + + org.apache.maven.plugins + maven-source-plugin + ${maven-source-plugin.version} + + + attach-sources + verify + + jar-no-fork + + + + + + + + org.apache.maven.plugins + maven-javadoc-plugin + ${maven-javadoc-plugin.version} + + 8 + false + true + true + true + + -Xdoclint:none + + + + attach-javadocs + verify + + jar + + + + + + + org.apache.maven.plugins + maven-gpg-plugin + ${maven-gpg-plugin.version} + + + sign-artifacts + verify + + sign + + + + + + + + + diff --git a/src/main/java/org/jwat/tools/HelpTaskCLI.java b/src/main/java/org/jwat/tools/HelpTaskCLI.java index 94a3677..729dbae 100644 --- a/src/main/java/org/jwat/tools/HelpTaskCLI.java +++ b/src/main/java/org/jwat/tools/HelpTaskCLI.java @@ -2,7 +2,11 @@ import org.jwat.tools.tasks.TaskCLI; +import com.antiaction.common.cli.Argument; +import com.antiaction.common.cli.ArgumentParser; +import com.antiaction.common.cli.ArgumentParserException; import com.antiaction.common.cli.CommandLine; +import com.antiaction.common.cli.Options; public class HelpTaskCLI extends TaskCLI { @@ -15,17 +19,43 @@ public HelpTaskCLI() { @Override public void show_help() { + System.out.println("FileTools v" + JWATTools.getVersionString()); System.out.println("jwattools help []"); System.out.println(""); - System.out.println("display help information"); + System.out.println("Display help information."); + System.out.println("If no command is supplied overall help information is shown."); + System.out.println("If a command is supplied its help information is shown instead."); System.out.println(""); - System.out.println("\tIf no command is supplied overall help information is shown."); - System.out.println("\tIf a command is supplied its help information is shown instead."); + System.out.println("options:"); + System.out.println(""); + System.out.println("none"); + System.out.println(""); + } + + public static final int A_HELPFOR_COMMAND = 101; + + public static HelpOptions parseArguments(CommandLine cmdLine) { + Options cliOptions = new Options(); + try { + cliOptions.addNamedArgument( "helpfor_command", A_HELPFOR_COMMAND, 1, 1); + cmdLine = ArgumentParser.parse(cmdLine.argsArray, cliOptions, cmdLine); + } + catch (ArgumentParserException e) { + System.out.println( HelpTaskCLI.class.getName() + ": " + e.getMessage() ); + System.exit( 1 ); + } + + HelpOptions options = new HelpOptions(); + Argument argument = cmdLine.idMap.get(A_HELPFOR_COMMAND); + if (argument != null) { + options.command = argument.value; + } + return options; } @Override public void runtask(CommandLine cmdLine) { - HelpOptions options = HelpTaskCLIParser.parseArguments(cmdLine); + HelpOptions options = parseArguments(cmdLine); String command = options.command; if (command == null) { JWATTools.show_help(); diff --git a/src/main/java/org/jwat/tools/HelpTaskCLIParser.java b/src/main/java/org/jwat/tools/HelpTaskCLIParser.java deleted file mode 100644 index 6cf6597..0000000 --- a/src/main/java/org/jwat/tools/HelpTaskCLIParser.java +++ /dev/null @@ -1,35 +0,0 @@ -package org.jwat.tools; - -import com.antiaction.common.cli.Argument; -import com.antiaction.common.cli.ArgumentParserException; -import com.antiaction.common.cli.ArgumentParser; -import com.antiaction.common.cli.CommandLine; -import com.antiaction.common.cli.Options; - -public class HelpTaskCLIParser { - - public static final int A_HELPFOR_COMMAND = 101; - - protected HelpTaskCLIParser() { - } - - public static HelpOptions parseArguments(CommandLine cmdLine) { - Options cliOptions = new Options(); - try { - cliOptions.addNamedArgument( "helpfor_command", A_HELPFOR_COMMAND, 1, 1); - cmdLine = ArgumentParser.parse(cmdLine.argsArray, cliOptions, cmdLine); - } - catch (ArgumentParserException e) { - System.out.println( HelpTaskCLIParser.class.getName() + ": " + e.getMessage() ); - System.exit( 1 ); - } - - HelpOptions options = new HelpOptions(); - Argument argument = cmdLine.idMap.get(A_HELPFOR_COMMAND); - if (argument != null) { - options.command = argument.value; - } - return options; - } - -} diff --git a/src/main/java/org/jwat/tools/JWATTools.java b/src/main/java/org/jwat/tools/JWATTools.java index afa7563..6a72b4b 100644 --- a/src/main/java/org/jwat/tools/JWATTools.java +++ b/src/main/java/org/jwat/tools/JWATTools.java @@ -1,6 +1,7 @@ package org.jwat.tools; import java.lang.reflect.Field; +import java.security.Provider; import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; @@ -8,8 +9,8 @@ import java.util.List; import java.util.Map; +import org.jwat.common.SecurityProviderTools; import org.jwat.tools.tasks.TaskCLI; -import org.jwat.tools.tasks.UnpackTaskCLI; import org.jwat.tools.tasks.arc2warc.Arc2WarcTaskCLI; import org.jwat.tools.tasks.cdx.CDXTaskCLI; import org.jwat.tools.tasks.changed.ChangedTaskCLI; @@ -17,11 +18,14 @@ import org.jwat.tools.tasks.containermd.ContainerMDTaskCLI; import org.jwat.tools.tasks.decompress.DecompressTaskCLI; import org.jwat.tools.tasks.delete.DeleteTaskCLI; +import org.jwat.tools.tasks.digest.DigestTaskCLI; import org.jwat.tools.tasks.extract.ExtractTaskCLI; import org.jwat.tools.tasks.headers2cdx.Headers2CDXTaskCLI; import org.jwat.tools.tasks.interval.IntervalTaskCLI; import org.jwat.tools.tasks.pathindex.PathIndexTaskCLI; import org.jwat.tools.tasks.test.TestTaskCLI; +import org.jwat.tools.tasks.unchunk.UnchunkTaskCLI; +import org.jwat.tools.tasks.unpack.UnpackTaskCLI; import com.antiaction.common.cli.Argument; import com.antiaction.common.cli.ArgumentParser; @@ -92,7 +96,9 @@ public static void configure_cli() { PathIndexTaskCLI.class, TestTaskCLI.class, UnpackTaskCLI.class, - Headers2CDXTaskCLI.class + Headers2CDXTaskCLI.class, + DigestTaskCLI.class, + UnchunkTaskCLI.class }; addCommands((Class[])tasks); } @@ -138,11 +144,11 @@ public int compare(Class t1, Class t2) { } System.out.println(""); System.out.println("See 'jwattools help ' for more information on a specific command."); + System.out.println(""); } - public static String getVersionString(String packageName) { - Package pkg = Package.getPackage(packageName); - System.out.println(pkg); + public static String getVersionString() { + Package pkg = Package.getPackage("org.jwat.tools"); String version = null; if (pkg != null) { version = pkg.getSpecificationVersion(); @@ -154,7 +160,7 @@ public static String getVersionString(String packageName) { } public static void show_help() { - System.out.println("JWATTools v" + getVersionString("org.jwat.tools")); + System.out.println("JWATTools v" + getVersionString()); //System.out.println(getVersionString("org.jwat.common")); //System.out.println(getVersionString("org.jwat.gzip")); //System.out.println(getVersionString("org.jwat.arc")); @@ -165,6 +171,10 @@ public static void show_help() { } public void Main(String[] args) { + Provider[] providers = SecurityProviderTools.getSecurityProviders(); + if (!SecurityProviderTools.isProviderAvailable(providers, "BC")) { + SecurityProviderTools.loadBCProvider(); + } CommandLine cmdLine = null; configure_cli(); try { diff --git a/src/main/java/org/jwat/tools/tasks/ThreadPoolExecutorPausable.java b/src/main/java/org/jwat/tools/tasks/ThreadPoolExecutorPausable.java index 9bcb8f7..8438d51 100644 --- a/src/main/java/org/jwat/tools/tasks/ThreadPoolExecutorPausable.java +++ b/src/main/java/org/jwat/tools/tasks/ThreadPoolExecutorPausable.java @@ -32,6 +32,7 @@ public ThreadPoolExecutorPausable(int corePoolSize, int maximumPoolSize, long ke super(corePoolSize, maximumPoolSize, keepAliveTime, unit, workQueue, threadFactory); } + @Override protected void beforeExecute(Thread t, Runnable r) { super.beforeExecute(t, r); pauseLock.lock(); @@ -46,6 +47,14 @@ protected void beforeExecute(Thread t, Runnable r) { } } + @Override + protected void afterExecute(Runnable r, Throwable t) { + super.afterExecute(r, t); + if (t != null) { + t.printStackTrace(); + } + } + public void pause() { pauseLock.lock(); try { diff --git a/src/main/java/org/jwat/tools/tasks/UnpackTaskCLI.java b/src/main/java/org/jwat/tools/tasks/UnpackTaskCLI.java deleted file mode 100644 index 2292900..0000000 --- a/src/main/java/org/jwat/tools/tasks/UnpackTaskCLI.java +++ /dev/null @@ -1,22 +0,0 @@ -package org.jwat.tools.tasks; - -import com.antiaction.common.cli.CommandLine; - -public class UnpackTaskCLI extends TaskCLI { - - public static final String commandName = "unpack"; - - public static final String commandDescription = "unpack multifile GZip"; - - @Override - public void show_help() { - System.out.println("Work in progress..."); - } - - @Override - public void runtask(CommandLine cmdLine) { - UnpackTask task = new UnpackTask(); - task.runtask(UnpackTaskCLIParser.parseArguments(cmdLine)); - } - -} diff --git a/src/main/java/org/jwat/tools/tasks/arc2warc/Arc2WarcTaskCLI.java b/src/main/java/org/jwat/tools/tasks/arc2warc/Arc2WarcTaskCLI.java index d16a1fd..8c9f6e9 100644 --- a/src/main/java/org/jwat/tools/tasks/arc2warc/Arc2WarcTaskCLI.java +++ b/src/main/java/org/jwat/tools/tasks/arc2warc/Arc2WarcTaskCLI.java @@ -1,17 +1,25 @@ package org.jwat.tools.tasks.arc2warc; +import java.io.File; + +import org.jwat.tools.JWATTools; import org.jwat.tools.tasks.TaskCLI; +import com.antiaction.common.cli.Argument; +import com.antiaction.common.cli.ArgumentParser; +import com.antiaction.common.cli.ArgumentParserException; import com.antiaction.common.cli.CommandLine; +import com.antiaction.common.cli.Options; public class Arc2WarcTaskCLI extends TaskCLI { public static final String commandName = "arc2warc"; - public static final String commandDescription = "convert ARC to WARC"; + public static final String commandDescription = "convert ARC file(s) to WARC file(s)"; @Override public void show_help() { + System.out.println("FileTools v" + JWATTools.getVersionString()); System.out.println("jwattools arc2warc [-d DIR] [--overwrite]... [-w THREADS] ..."); System.out.println(""); System.out.println("arc2warc will convert one or more ARC file(s) to WARC file(s)."); @@ -23,12 +31,92 @@ public void show_help() { System.out.println(" --prefix destination filename prefix (default is '" + Arc2WarcOptions.DEFAULT_PREFIX + "')"); System.out.println(" --queue-first queue files before processing"); System.out.println(" -w set the amount of worker thread(s) (defaults to 1)"); + System.out.println(""); } @Override public void runtask(CommandLine cmdLine) { Arc2WarcTask task = new Arc2WarcTask(); - task.runtask(Arc2WarcTaskCLIParser.parseArguments(cmdLine)); + Arc2WarcOptions options = parseArguments(cmdLine); + task.runtask(options); + } + + public static final int A_DEST = 101; + public static final int A_OVERWRITE = 102; + public static final int A_PREFIX = 103; + + public static Arc2WarcOptions parseArguments(CommandLine cmdLine) { + Options cliOptions = new Options(); + try { + cliOptions.addOption(null, "--queue-first", JWATTools.A_QUEUE_FIRST, 0, null); + cliOptions.addOption("-w", "--workers", JWATTools.A_WORKERS, 0, null).setValueRequired(); + cliOptions.addOption("-d", "--destdir", A_DEST, 0, null).setValueRequired(); + cliOptions.addOption(null, "--overwrite", A_OVERWRITE, 0, null); + cliOptions.addOption(null, "--prefix", A_PREFIX, 0, null).setValueRequired(); + cliOptions.addNamedArgument("files", JWATTools.A_FILES, 1, Integer.MAX_VALUE); + cmdLine = ArgumentParser.parse(cmdLine.argsArray, cliOptions, cmdLine); + } + catch (ArgumentParserException e) { + System.out.println( Arc2WarcTaskCLI.class.getName() + ": " + e.getMessage() ); + System.exit( 1 ); + } + + Arc2WarcOptions options = new Arc2WarcOptions(); + + Argument argument; + + // Queue first. + options.bQueueFirst = cmdLine.idMap.containsKey(JWATTools.A_QUEUE_FIRST); + + // Thread workers. + argument = cmdLine.idMap.get( JWATTools.A_WORKERS ); + if ( argument != null && argument.value != null ) { + try { + options.threads = Integer.parseInt(argument.value); + } catch (NumberFormatException e) { + System.out.println( "Invalid number of threads requested: " + argument.value ); + System.exit( 1 ); + } + } + if ( options.threads < 1 ) { + System.out.println( "Invalid number of threads requested: " + options.threads ); + System.exit( 1 ); + } + + // Destination directory. + String dest = System.getProperty("user.dir"); + argument = cmdLine.idMap.get( A_DEST ); + if ( argument != null && argument.value != null ) { + dest = argument.value; + } + System.out.println( "Using '" + dest + "' as destination directory." ); + options.destDir = new File( dest ); + if ( !options.destDir.exists() ) { + if ( !options.destDir.mkdirs() ) { + System.out.println( "Could not create destination directory: '" + dest + "'!" ); + System.exit( 1 ); + } + } else if ( !options.destDir.isDirectory() ) { + System.out.println( "'" + dest + "' is not a directory!" ); + System.exit( 1 ); + } + + // Overwrite. + if ( cmdLine.idMap.containsKey( A_OVERWRITE) ) { + options.bOverwrite = true; + } + + // Prefix. + argument = cmdLine.idMap.get( A_PREFIX ); + if ( argument != null && argument.value != null ) { + options.prefix = argument.value; + } + + // Files. + argument = cmdLine.idMap.get( JWATTools.A_FILES ); + options.filesList = argument.values; + + return options; } } diff --git a/src/main/java/org/jwat/tools/tasks/arc2warc/Arc2WarcTaskCLIParser.java b/src/main/java/org/jwat/tools/tasks/arc2warc/Arc2WarcTaskCLIParser.java deleted file mode 100644 index 0e2d764..0000000 --- a/src/main/java/org/jwat/tools/tasks/arc2warc/Arc2WarcTaskCLIParser.java +++ /dev/null @@ -1,96 +0,0 @@ -package org.jwat.tools.tasks.arc2warc; - -import java.io.File; - -import org.jwat.tools.JWATTools; - -import com.antiaction.common.cli.Argument; -import com.antiaction.common.cli.ArgumentParserException; -import com.antiaction.common.cli.ArgumentParser; -import com.antiaction.common.cli.CommandLine; -import com.antiaction.common.cli.Options; - -public class Arc2WarcTaskCLIParser { - - public static final int A_DEST = 101; - public static final int A_OVERWRITE = 102; - public static final int A_PREFIX = 103; - - protected Arc2WarcTaskCLIParser() { - } - - public static Arc2WarcOptions parseArguments(CommandLine cmdLine) { - Options cliOptions = new Options(); - try { - cliOptions.addOption(null, "--queue-first", JWATTools.A_QUEUE_FIRST, 0, null); - cliOptions.addOption("-w", "--workers", JWATTools.A_WORKERS, 0, null).setValueRequired(); - cliOptions.addOption("-d", "--destdir", A_DEST, 0, null).setValueRequired(); - cliOptions.addOption(null, "--overwrite", A_OVERWRITE, 0, null); - cliOptions.addOption(null, "--prefix", A_PREFIX, 0, null).setValueRequired(); - cliOptions.addNamedArgument("files", JWATTools.A_FILES, 1, Integer.MAX_VALUE); - cmdLine = ArgumentParser.parse(cmdLine.argsArray, cliOptions, cmdLine); - } - catch (ArgumentParserException e) { - System.out.println( Arc2WarcTaskCLIParser.class.getName() + ": " + e.getMessage() ); - System.exit( 1 ); - } - - Arc2WarcOptions options = new Arc2WarcOptions(); - - Argument argument; - - // Queue first. - options.bQueueFirst = cmdLine.idMap.containsKey(JWATTools.A_QUEUE_FIRST); - - // Thread workers. - argument = cmdLine.idMap.get( JWATTools.A_WORKERS ); - if ( argument != null && argument.value != null ) { - try { - options.threads = Integer.parseInt(argument.value); - } catch (NumberFormatException e) { - System.out.println( "Invalid number of threads requested: " + argument.value ); - System.exit( 1 ); - } - } - if ( options.threads < 1 ) { - System.out.println( "Invalid number of threads requested: " + options.threads ); - System.exit( 1 ); - } - - // Destination directory. - String dest = System.getProperty("user.dir"); - argument = cmdLine.idMap.get( A_DEST ); - if ( argument != null && argument.value != null ) { - dest = argument.value; - } - System.out.println( "Using '" + dest + "' as destination directory." ); - options.destDir = new File( dest ); - if ( !options.destDir.exists() ) { - if ( !options.destDir.mkdirs() ) { - System.out.println( "Could not create destination directory: '" + dest + "'!" ); - System.exit( 1 ); - } - } else if ( !options.destDir.isDirectory() ) { - System.out.println( "'" + dest + "' is not a directory!" ); - System.exit( 1 ); - } - - // Overwrite. - if ( cmdLine.idMap.containsKey( A_OVERWRITE) ) { - options.bOverwrite = true; - } - - // Prefix. - argument = cmdLine.idMap.get( A_PREFIX ); - if ( argument != null && argument.value != null ) { - options.prefix = argument.value; - } - - // Files. - argument = cmdLine.idMap.get( JWATTools.A_FILES ); - options.filesList = argument.values; - - return options; - } - -} diff --git a/src/main/java/org/jwat/tools/tasks/cdx/CDXTaskCLI.java b/src/main/java/org/jwat/tools/tasks/cdx/CDXTaskCLI.java index 9bfc64b..0e6d085 100644 --- a/src/main/java/org/jwat/tools/tasks/cdx/CDXTaskCLI.java +++ b/src/main/java/org/jwat/tools/tasks/cdx/CDXTaskCLI.java @@ -1,8 +1,15 @@ package org.jwat.tools.tasks.cdx; +import java.io.File; + +import org.jwat.tools.JWATTools; import org.jwat.tools.tasks.TaskCLI; +import com.antiaction.common.cli.Argument; +import com.antiaction.common.cli.ArgumentParser; +import com.antiaction.common.cli.ArgumentParserException; import com.antiaction.common.cli.CommandLine; +import com.antiaction.common.cli.Options; public class CDXTaskCLI extends TaskCLI { @@ -12,24 +19,85 @@ public class CDXTaskCLI extends TaskCLI { @Override public void show_help() { + System.out.println("FileTools v" + JWATTools.getVersionString()); System.out.println("jwattools cdx [-o OUTPUT_FILE] [-w THREADS] ..."); System.out.println(""); - System.out.println("cdx one or more ARC/WARC files"); - System.out.println(""); - System.out.println("\tRead through ARC/WARC file(s) and create a CDX file."); - System.out.println("\tCDX files are primarily used with Wayback."); + System.out.println("Read through ARC/WARC file(s) and create a CDX file."); + System.out.println("CDX files are primarily used with replay tools like (Open)Wayback."); System.out.println(""); System.out.println("options:"); System.out.println(""); System.out.println(" -o output cdx filename (unsorted)"); System.out.println(" --queue-first queue files before processing"); System.out.println(" -w set the amount of worker thread(s) (defaults to 1)"); + System.out.println(""); } @Override public void runtask(CommandLine cmdLine) { CDXTask task = new CDXTask(); - task.runtask(CDXTaskCLIParser.parseArguments(cmdLine)); + CDXOptions options = parseArguments(cmdLine); + task.runtask(options); + } + + public static final int A_OUTPUT = 101; + + public static CDXOptions parseArguments(CommandLine cmdLine) { + Options cliOptions = new Options(); + try { + cliOptions.addOption(null, "--queue-first", JWATTools.A_QUEUE_FIRST, 0, null); + cliOptions.addOption("-w", "--workers", JWATTools.A_WORKERS, 0, null).setValueRequired(); + cliOptions.addOption("-o", null, A_OUTPUT, 0, null).setValueRequired(); + cliOptions.addNamedArgument("files", JWATTools.A_FILES, 1, Integer.MAX_VALUE); + cmdLine = ArgumentParser.parse(cmdLine.argsArray, cliOptions, cmdLine); + } + catch (ArgumentParserException e) { + System.out.println( CDXTaskCLI.class.getName() + ": " + e.getMessage() ); + System.exit( 1 ); + } + + CDXOptions options = new CDXOptions(); + + Argument argument; + + // Queue first. + options.bQueueFirst = cmdLine.idMap.containsKey(JWATTools.A_QUEUE_FIRST); + + // Thread workers. + argument = cmdLine.idMap.get( JWATTools.A_WORKERS ); + if ( argument != null && argument.value != null ) { + try { + options.threads = Integer.parseInt(argument.value); + } catch (NumberFormatException e) { + System.out.println( "Invalid number of threads requested: " + argument.value ); + System.exit( 1 ); + } + } + if ( options.threads < 1 ) { + System.out.println( "Invalid number of threads requested: " + options.threads ); + System.exit( 1 ); + } + + // Output file. + argument = cmdLine.idMap.get( A_OUTPUT ); + if ( argument != null && argument.value != null ) { + options.outputFile = new File(argument.value); + if (options.outputFile.isDirectory()) { + System.out.println("Can not output to a directory!"); + System.exit(1); + } else if (options.outputFile.getParentFile() != null && !options.outputFile.getParentFile().exists()) { + if (!options.outputFile.getParentFile().mkdirs()) { + System.out.println("Could not create parent directories!"); + System.exit(1); + } + } + } + + // Files. + argument = cmdLine.idMap.get( JWATTools.A_FILES ); + options.filesList = argument.values; + + return options; } } diff --git a/src/main/java/org/jwat/tools/tasks/cdx/CDXTaskCLIParser.java b/src/main/java/org/jwat/tools/tasks/cdx/CDXTaskCLIParser.java deleted file mode 100644 index c96a0d0..0000000 --- a/src/main/java/org/jwat/tools/tasks/cdx/CDXTaskCLIParser.java +++ /dev/null @@ -1,78 +0,0 @@ -package org.jwat.tools.tasks.cdx; - -import java.io.File; - -import org.jwat.tools.JWATTools; - -import com.antiaction.common.cli.Argument; -import com.antiaction.common.cli.ArgumentParserException; -import com.antiaction.common.cli.ArgumentParser; -import com.antiaction.common.cli.CommandLine; -import com.antiaction.common.cli.Options; - -public class CDXTaskCLIParser { - - public static final int A_OUTPUT = 101; - - protected CDXTaskCLIParser() { - } - - public static CDXOptions parseArguments(CommandLine cmdLine) { - Options cliOptions = new Options(); - try { - cliOptions.addOption(null, "--queue-first", JWATTools.A_QUEUE_FIRST, 0, null); - cliOptions.addOption("-w", "--workers", JWATTools.A_WORKERS, 0, null).setValueRequired(); - cliOptions.addOption("-o", null, A_OUTPUT, 0, null).setValueRequired(); - cliOptions.addNamedArgument("files", JWATTools.A_FILES, 1, Integer.MAX_VALUE); - cmdLine = ArgumentParser.parse(cmdLine.argsArray, cliOptions, cmdLine); - } - catch (ArgumentParserException e) { - System.out.println( CDXTaskCLIParser.class.getName() + ": " + e.getMessage() ); - System.exit( 1 ); - } - - CDXOptions options = new CDXOptions(); - - Argument argument; - - // Queue first. - options.bQueueFirst = cmdLine.idMap.containsKey(JWATTools.A_QUEUE_FIRST); - - // Thread workers. - argument = cmdLine.idMap.get( JWATTools.A_WORKERS ); - if ( argument != null && argument.value != null ) { - try { - options.threads = Integer.parseInt(argument.value); - } catch (NumberFormatException e) { - System.out.println( "Invalid number of threads requested: " + argument.value ); - System.exit( 1 ); - } - } - if ( options.threads < 1 ) { - System.out.println( "Invalid number of threads requested: " + options.threads ); - System.exit( 1 ); - } - - // Output file. - argument = cmdLine.idMap.get( A_OUTPUT ); - if ( argument != null && argument.value != null ) { - options.outputFile = new File(argument.value); - if (options.outputFile.isDirectory()) { - System.out.println("Can not output to a directory!"); - System.exit(1); - } else if (options.outputFile.getParentFile() != null && !options.outputFile.getParentFile().exists()) { - if (!options.outputFile.getParentFile().mkdirs()) { - System.out.println("Could not create parent directories!"); - System.exit(1); - } - } - } - - // Files. - argument = cmdLine.idMap.get( JWATTools.A_FILES ); - options.filesList = argument.values; - - return options; - } - -} diff --git a/src/main/java/org/jwat/tools/tasks/changed/ChangedTaskCLI.java b/src/main/java/org/jwat/tools/tasks/changed/ChangedTaskCLI.java index 0f75b9b..80b792f 100644 --- a/src/main/java/org/jwat/tools/tasks/changed/ChangedTaskCLI.java +++ b/src/main/java/org/jwat/tools/tasks/changed/ChangedTaskCLI.java @@ -1,8 +1,15 @@ package org.jwat.tools.tasks.changed; +import java.io.File; + +import org.jwat.tools.JWATTools; import org.jwat.tools.tasks.TaskCLI; +import com.antiaction.common.cli.Argument; +import com.antiaction.common.cli.ArgumentParser; +import com.antiaction.common.cli.ArgumentParserException; import com.antiaction.common.cli.CommandLine; +import com.antiaction.common.cli.Options; public class ChangedTaskCLI extends TaskCLI { @@ -12,22 +19,63 @@ public class ChangedTaskCLI extends TaskCLI { @Override public void show_help() { + System.out.println("FileTools v" + JWATTools.getVersionString()); System.out.println("jwattools changed ..."); System.out.println(""); - System.out.println("group files by similar last modified dates"); - System.out.println(""); - System.out.println("\tUseful command for identifying when and if files where modified"); - System.out.println("\tin close proximity of others."); + System.out.println("Useful command for identifying when and if files where modified in close proximity of others."); + System.out.println("Group files by similar last modified dates."); System.out.println(""); System.out.println("options:"); System.out.println(""); - System.out.println(" -o output intervals and files to file"); + System.out.println(" -o output intervals and files to file"); + System.out.println(""); } @Override public void runtask(CommandLine cmdLine) { ChangedTask task = new ChangedTask(); - task.runtask(ChangedTaskCLIParser.parseArguments(cmdLine)); + ChangedOptions options = parseArguments(cmdLine); + task.runtask(options); + } + + public static final int A_OUTPUT = 101; + + public static ChangedOptions parseArguments(CommandLine cmdLine) { + Options cliOptions = new Options(); + try { + cliOptions.addOption("-o", null, A_OUTPUT, 0, null).setValueRequired(); + cliOptions.addNamedArgument("files", JWATTools.A_FILES, 1, Integer.MAX_VALUE); + cmdLine = ArgumentParser.parse(cmdLine.argsArray, cliOptions, cmdLine); + } + catch (ArgumentParserException e) { + System.out.println( ChangedTaskCLI.class.getName() + ": " + e.getMessage() ); + System.exit( 1 ); + } + + ChangedOptions options = new ChangedOptions(); + + Argument argument; + + // Output file. + argument = cmdLine.idMap.get( A_OUTPUT ); + if ( argument != null && argument.value != null ) { + options.outputFile = new File(argument.value); + if (options.outputFile.isDirectory()) { + System.out.println("Can not output to a directory!"); + System.exit(1); + } else if (options.outputFile.getParentFile() != null && !options.outputFile.getParentFile().exists()) { + if (!options.outputFile.getParentFile().mkdirs()) { + System.out.println("Could not create parent directories!"); + System.exit(1); + } + } + } + + // Files + argument = cmdLine.idMap.get( JWATTools.A_FILES ); + options.filesList = argument.values; + + return options; } } diff --git a/src/main/java/org/jwat/tools/tasks/changed/ChangedTaskCLIParser.java b/src/main/java/org/jwat/tools/tasks/changed/ChangedTaskCLIParser.java deleted file mode 100644 index 6654a13..0000000 --- a/src/main/java/org/jwat/tools/tasks/changed/ChangedTaskCLIParser.java +++ /dev/null @@ -1,58 +0,0 @@ -package org.jwat.tools.tasks.changed; - -import java.io.File; - -import org.jwat.tools.JWATTools; - -import com.antiaction.common.cli.Argument; -import com.antiaction.common.cli.ArgumentParserException; -import com.antiaction.common.cli.ArgumentParser; -import com.antiaction.common.cli.CommandLine; -import com.antiaction.common.cli.Options; - -public class ChangedTaskCLIParser { - - public static final int A_OUTPUT = 101; - - protected ChangedTaskCLIParser() { - } - - public static ChangedOptions parseArguments(CommandLine cmdLine) { - Options cliOptions = new Options(); - try { - cliOptions.addOption("-o", null, A_OUTPUT, 0, null).setValueRequired(); - cliOptions.addNamedArgument("files", JWATTools.A_FILES, 1, Integer.MAX_VALUE); - cmdLine = ArgumentParser.parse(cmdLine.argsArray, cliOptions, cmdLine); - } - catch (ArgumentParserException e) { - System.out.println( ChangedTaskCLIParser.class.getName() + ": " + e.getMessage() ); - System.exit( 1 ); - } - - ChangedOptions options = new ChangedOptions(); - - Argument argument; - - // Output file. - argument = cmdLine.idMap.get( A_OUTPUT ); - if ( argument != null && argument.value != null ) { - options.outputFile = new File(argument.value); - if (options.outputFile.isDirectory()) { - System.out.println("Can not output to a directory!"); - System.exit(1); - } else if (options.outputFile.getParentFile() != null && !options.outputFile.getParentFile().exists()) { - if (!options.outputFile.getParentFile().mkdirs()) { - System.out.println("Could not create parent directories!"); - System.exit(1); - } - } - } - - // Files - argument = cmdLine.idMap.get( JWATTools.A_FILES ); - options.filesList = argument.values; - - return options; - } - -} diff --git a/src/main/java/org/jwat/tools/tasks/compress/CompressTaskCLI.java b/src/main/java/org/jwat/tools/tasks/compress/CompressTaskCLI.java index 1f6345e..bf00284 100644 --- a/src/main/java/org/jwat/tools/tasks/compress/CompressTaskCLI.java +++ b/src/main/java/org/jwat/tools/tasks/compress/CompressTaskCLI.java @@ -1,8 +1,15 @@ package org.jwat.tools.tasks.compress; +import java.io.File; + +import org.jwat.tools.JWATTools; import org.jwat.tools.tasks.TaskCLI; +import com.antiaction.common.cli.Argument; +import com.antiaction.common.cli.ArgumentParser; +import com.antiaction.common.cli.ArgumentParserException; import com.antiaction.common.cli.CommandLine; +import com.antiaction.common.cli.Options; public class CompressTaskCLI extends TaskCLI { @@ -12,12 +19,12 @@ public class CompressTaskCLI extends TaskCLI { @Override public void show_help() { + System.out.println("FileTools v" + JWATTools.getVersionString()); System.out.println("jwattools compress [-123456789] [--fast] [--best] [-w THREADS] ..."); System.out.println(""); - System.out.println("compress one or more ARC/WARC/GZip files"); - System.out.println(""); - System.out.println("\tNormal files are compressed as a single GZip file."); - System.out.println("\tARC/WARC files are compressed on a record level."); + System.out.println("Compress one or more ARC/WARC/GZip files."); + System.out.println("ARC/WARC files are compressed on a record level."); + System.out.println("Normal files are compressed as a single GZip file."); System.out.println(""); System.out.println("options:"); System.out.println(""); @@ -34,12 +41,160 @@ public void show_help() { System.out.println(" --blacklist list of files to ignore (one filename per line)"); System.out.println(" --checksums list of sorted checksums (one filename##checksum per line))"); System.out.println(" -w set the amount of worker thread(s) (defaults to 1)"); + System.out.println(""); } @Override public void runtask(CommandLine cmdLine) { CompressTask task = new CompressTask(); - task.runtask(CompressTaskCLIParser.parseArguments(cmdLine)); + CompressOptions options = parseArguments(cmdLine); + task.runtask(options); + } + + public static final int A_COMPRESS = 101; + public static final int A_BATCHMODE = 102; + public static final int A_DRYRUN = 103; + public static final int A_VERIFY = 104; + public static final int A_REMOVE = 105; + public static final int A_DEST = 106; + public static final int A_FILELIST = 107; + public static final int A_TWOPASS = 108; + public static final int A_HDRFILES = 109; + public static final int A_BLACKLIST = 110; + public static final int A_CHECKSUMS = 111; + + public static CompressOptions parseArguments(CommandLine cmdLine) { + Options cliOptions = new Options(); + try { + cliOptions.addOption(null, "--queue-first", JWATTools.A_QUEUE_FIRST, 0, null); + cliOptions.addOption("-w", "--workers", JWATTools.A_WORKERS, 0, null).setValueRequired(); + cliOptions.addOption("-1", "--fast", A_COMPRESS, 1, null); + cliOptions.addOption("-2", null, A_COMPRESS, 2, null); + cliOptions.addOption("-3", null, A_COMPRESS, 3, null); + cliOptions.addOption("-4", null, A_COMPRESS, 4, null); + cliOptions.addOption("-5", null, A_COMPRESS, 5, null); + cliOptions.addOption("-6", null, A_COMPRESS, 6, null); + cliOptions.addOption("-7", null, A_COMPRESS, 7, null); + cliOptions.addOption("-8", null, A_COMPRESS, 8, null); + cliOptions.addOption("-9", "--best", A_COMPRESS, 9, null); + cliOptions.addOption("-d", "--destdir", A_DEST, 0, null).setValueRequired(); + cliOptions.addOption(null, "--batch", A_BATCHMODE, 0, null); + cliOptions.addOption(null, "--remove", A_REMOVE, 0, null); + cliOptions.addOption(null, "--verify", A_VERIFY, 0, null); + cliOptions.addOption(null, "--dryrun", A_DRYRUN, 0, null); + cliOptions.addOption(null, "--twopass", A_TWOPASS, 0, null); + cliOptions.addOption(null, "--listfile", A_FILELIST, 0, null).setValueRequired(); + cliOptions.addOption(null, "--hdrfiles", A_HDRFILES, 0, null); + cliOptions.addOption("-q", "--quiet", JWATTools.A_QUIET, 0, null); + cliOptions.addOption(null, "--blacklist", A_BLACKLIST, 0, null).setValueRequired(); + cliOptions.addOption(null, "--checksums", A_CHECKSUMS, 0, null).setValueRequired(); + cliOptions.addNamedArgument("files", JWATTools.A_FILES, 1, Integer.MAX_VALUE); + cmdLine = ArgumentParser.parse(cmdLine.argsArray, cliOptions, cmdLine); + } + catch (ArgumentParserException e) { + System.out.println( CompressTaskCLI.class.getName() + ": " + e.getMessage() ); + System.exit( 1 ); + } + + CompressOptions options = new CompressOptions(); + + Argument argument; + + // Queue first. + options.bQueueFirst = cmdLine.idMap.containsKey(JWATTools.A_QUEUE_FIRST); + + // Thread workers. + argument = cmdLine.idMap.get( JWATTools.A_WORKERS ); + if ( argument != null && argument.value != null ) { + try { + options.threads = Integer.parseInt(argument.value); + } catch (NumberFormatException e) { + System.out.println( "Invalid number of threads requested: " + argument.value ); + System.exit( 1 ); + } + } + if ( options.threads < 1 ) { + System.out.println( "Invalid number of threads requested: " + options.threads ); + System.exit( 1 ); + } + + // Compression level. + argument = cmdLine.idMap.get( A_COMPRESS ); + if (argument != null) { + options.compressionLevel = argument.option.subId; + } + + argument = cmdLine.idMap.get( A_BATCHMODE ); + if (argument != null) { + options.bBatch = true; + } + + argument = cmdLine.idMap.get( A_DRYRUN ); + if (argument != null) { + options.bDryrun = true; + } + + argument = cmdLine.idMap.get( A_VERIFY ); + if (argument != null) { + options.bVerify = true; + } + + argument = cmdLine.idMap.get( A_REMOVE ); + if (argument != null) { + options.bRemove = true; + } + + argument = cmdLine.idMap.get( A_DEST ); + if (argument != null) { + options.dstPath = new File( argument.value ); + } + + argument = cmdLine.idMap.get( A_FILELIST ); + if (argument != null) { + options.lstFile = new File( argument.value ); + } + + argument = cmdLine.idMap.get( A_TWOPASS ); + if (argument != null) { + options.bTwopass = true; + } + + argument = cmdLine.idMap.get( A_HDRFILES ); + if (argument != null) { + options.bHeaderFiles = true; + } + + argument = cmdLine.idMap.get( A_BLACKLIST ); + if (argument != null) { + options.blacklistFile = new File( argument.value ); + } + + argument = cmdLine.idMap.get( A_CHECKSUMS ); + if (argument != null) { + options.checksumsFile = new File( argument.value ); + } + + options.bQuiet = cmdLine.idMap.containsKey( JWATTools.A_QUIET ); + + // Files. + argument = cmdLine.idMap.get( JWATTools.A_FILES ); + options.filesList = argument.values; + + if (!options.bQuiet) { + System.out.println("JWATTools v" + JWATTools.getVersionString()); + System.out.println( "Compression level: " + options.compressionLevel ); + System.out.println( " Batch mode: " + options.bBatch ); + System.out.println( " Dry run: " + options.bDryrun ); + System.out.println( " Verify output: " + options.bVerify ); + System.out.println( " Remove input: " + options.bRemove ); + System.out.println( " Dest path: " + options.dstPath ); + System.out.println( " List file: " + options.lstFile ); + System.out.println( " Twopass: " + options.bTwopass ); + System.out.println( " Header Files: " + options.bHeaderFiles ); + System.out.println( " Quiet: " + options.bQuiet ); + } + + return options; } } diff --git a/src/main/java/org/jwat/tools/tasks/compress/CompressTaskCLIParser.java b/src/main/java/org/jwat/tools/tasks/compress/CompressTaskCLIParser.java deleted file mode 100644 index c5c1fec..0000000 --- a/src/main/java/org/jwat/tools/tasks/compress/CompressTaskCLIParser.java +++ /dev/null @@ -1,164 +0,0 @@ -package org.jwat.tools.tasks.compress; - -import java.io.File; - -import org.jwat.tools.JWATTools; - -import com.antiaction.common.cli.Argument; -import com.antiaction.common.cli.ArgumentParser; -import com.antiaction.common.cli.ArgumentParserException; -import com.antiaction.common.cli.CommandLine; -import com.antiaction.common.cli.Options; - -public class CompressTaskCLIParser { - - public static final int A_COMPRESS = 101; - public static final int A_BATCHMODE = 102; - public static final int A_DRYRUN = 103; - public static final int A_VERIFY = 104; - public static final int A_REMOVE = 105; - public static final int A_DEST = 106; - public static final int A_FILELIST = 107; - public static final int A_TWOPASS = 108; - public static final int A_HDRFILES = 109; - public static final int A_BLACKLIST = 110; - public static final int A_CHECKSUMS = 111; - - protected CompressTaskCLIParser() { - } - - public static CompressOptions parseArguments(CommandLine cmdLine) { - Options cliOptions = new Options(); - try { - cliOptions.addOption(null, "--queue-first", JWATTools.A_QUEUE_FIRST, 0, null); - cliOptions.addOption("-w", "--workers", JWATTools.A_WORKERS, 0, null).setValueRequired(); - cliOptions.addOption("-1", "--fast", A_COMPRESS, 1, null); - cliOptions.addOption("-2", null, A_COMPRESS, 2, null); - cliOptions.addOption("-3", null, A_COMPRESS, 3, null); - cliOptions.addOption("-4", null, A_COMPRESS, 4, null); - cliOptions.addOption("-5", null, A_COMPRESS, 5, null); - cliOptions.addOption("-6", null, A_COMPRESS, 6, null); - cliOptions.addOption("-7", null, A_COMPRESS, 7, null); - cliOptions.addOption("-8", null, A_COMPRESS, 8, null); - cliOptions.addOption("-9", "--best", A_COMPRESS, 9, null); - cliOptions.addOption("-d", "--destdir", A_DEST, 0, null).setValueRequired(); - cliOptions.addOption(null, "--batch", A_BATCHMODE, 0, null); - cliOptions.addOption(null, "--remove", A_REMOVE, 0, null); - cliOptions.addOption(null, "--verify", A_VERIFY, 0, null); - cliOptions.addOption(null, "--dryrun", A_DRYRUN, 0, null); - cliOptions.addOption(null, "--twopass", A_TWOPASS, 0, null); - cliOptions.addOption(null, "--listfile", A_FILELIST, 0, null).setValueRequired(); - cliOptions.addOption(null, "--hdrfiles", A_HDRFILES, 0, null); - cliOptions.addOption("-q", "--quiet", JWATTools.A_QUIET, 0, null); - cliOptions.addOption(null, "--blacklist", A_BLACKLIST, 0, null).setValueRequired(); - cliOptions.addOption(null, "--checksums", A_CHECKSUMS, 0, null).setValueRequired(); - cliOptions.addNamedArgument("files", JWATTools.A_FILES, 1, Integer.MAX_VALUE); - cmdLine = ArgumentParser.parse(cmdLine.argsArray, cliOptions, cmdLine); - } - catch (ArgumentParserException e) { - System.out.println( CompressTaskCLIParser.class.getName() + ": " + e.getMessage() ); - System.exit( 1 ); - } - - CompressOptions options = new CompressOptions(); - - Argument argument; - - // Queue first. - options.bQueueFirst = cmdLine.idMap.containsKey(JWATTools.A_QUEUE_FIRST); - - // Thread workers. - argument = cmdLine.idMap.get( JWATTools.A_WORKERS ); - if ( argument != null && argument.value != null ) { - try { - options.threads = Integer.parseInt(argument.value); - } catch (NumberFormatException e) { - System.out.println( "Invalid number of threads requested: " + argument.value ); - System.exit( 1 ); - } - } - if ( options.threads < 1 ) { - System.out.println( "Invalid number of threads requested: " + options.threads ); - System.exit( 1 ); - } - - // Compression level. - argument = cmdLine.idMap.get( A_COMPRESS ); - if (argument != null) { - options.compressionLevel = argument.option.subId; - } - - argument = cmdLine.idMap.get( A_BATCHMODE ); - if (argument != null) { - options.bBatch = true; - } - - argument = cmdLine.idMap.get( A_DRYRUN ); - if (argument != null) { - options.bDryrun = true; - } - - argument = cmdLine.idMap.get( A_VERIFY ); - if (argument != null) { - options.bVerify = true; - } - - argument = cmdLine.idMap.get( A_REMOVE ); - if (argument != null) { - options.bRemove = true; - } - - argument = cmdLine.idMap.get( A_DEST ); - if (argument != null) { - options.dstPath = new File( argument.value ); - } - - argument = cmdLine.idMap.get( A_FILELIST ); - if (argument != null) { - options.lstFile = new File( argument.value ); - } - - argument = cmdLine.idMap.get( A_TWOPASS ); - if (argument != null) { - options.bTwopass = true; - } - - argument = cmdLine.idMap.get( A_HDRFILES ); - if (argument != null) { - options.bHeaderFiles = true; - } - - argument = cmdLine.idMap.get( A_BLACKLIST ); - if (argument != null) { - options.blacklistFile = new File( argument.value ); - } - - argument = cmdLine.idMap.get( A_CHECKSUMS ); - if (argument != null) { - options.checksumsFile = new File( argument.value ); - } - - options.bQuiet = cmdLine.idMap.containsKey( JWATTools.A_QUIET ); - - // Files. - argument = cmdLine.idMap.get( JWATTools.A_FILES ); - options.filesList = argument.values; - - if (!options.bQuiet) { - System.out.println("JWATTools v" + JWATTools.getVersionString("org.jwat.tools")); - System.out.println( "Compression level: " + options.compressionLevel ); - System.out.println( " Batch mode: " + options.bBatch ); - System.out.println( " Dry run: " + options.bDryrun ); - System.out.println( " Verify output: " + options.bVerify ); - System.out.println( " Remove input: " + options.bRemove ); - System.out.println( " Dest path: " + options.dstPath ); - System.out.println( " List file: " + options.lstFile ); - System.out.println( " Twopass: " + options.bTwopass ); - System.out.println( " Header Files: " + options.bHeaderFiles ); - System.out.println( " Quiet: " + options.bQuiet ); - } - - return options; - } - -} diff --git a/src/main/java/org/jwat/tools/tasks/containermd/ContainerMDTaskCLI.java b/src/main/java/org/jwat/tools/tasks/containermd/ContainerMDTaskCLI.java index 51b90b2..7a262f9 100644 --- a/src/main/java/org/jwat/tools/tasks/containermd/ContainerMDTaskCLI.java +++ b/src/main/java/org/jwat/tools/tasks/containermd/ContainerMDTaskCLI.java @@ -1,8 +1,16 @@ package org.jwat.tools.tasks.containermd; +import java.io.File; + +import org.jwat.common.UriProfile; +import org.jwat.tools.JWATTools; import org.jwat.tools.tasks.TaskCLI; +import com.antiaction.common.cli.Argument; +import com.antiaction.common.cli.ArgumentParser; +import com.antiaction.common.cli.ArgumentParserException; import com.antiaction.common.cli.CommandLine; +import com.antiaction.common.cli.Options; public class ContainerMDTaskCLI extends TaskCLI { @@ -12,9 +20,10 @@ public class ContainerMDTaskCLI extends TaskCLI { @Override public void show_help() { + System.out.println("FileTools v" + JWATTools.getVersionString()); System.out.println("jwattools containermd [-d outputDir] [-l] [-q] [-w THREADS] "); System.out.println(""); - System.out.println("generate containerMD for (W)ARC files"); + System.out.println("Generate containerMD for (W)ARC files."); System.out.println(""); System.out.println("options:"); System.out.println(""); @@ -23,12 +32,89 @@ public void show_help() { System.out.println(" -q quiet, no output to console"); System.out.println(" --queue-first queue files before processing"); System.out.println(" -w set the amount of worker thread(s) (defaults to 1)"); + System.out.println(""); } @Override public void runtask(CommandLine cmdLine) { ContainerMDTask task = new ContainerMDTask(); - task.runtask(ContainerMDTaskCLIParser.parseArguments(cmdLine)); + ContainerMDOptions options = parseArguments(cmdLine); + task.runtask(options); + } + + public static final int A_DEST = 101; + public static final int A_LAX = 102; + + public static ContainerMDOptions parseArguments(CommandLine cmdLine) { + Options cliOptions = new Options(); + try { + cliOptions.addOption(null, "--queue-first", JWATTools.A_QUEUE_FIRST, 0, null); + cliOptions.addOption("-w", "--workers", JWATTools.A_WORKERS, 0, null).setValueRequired(); + cliOptions.addOption("-d", "--destdir", A_DEST, 0, null).setValueRequired(); + cliOptions.addOption("-l", null, A_LAX, 0, null); + cliOptions.addOption("-q", null, JWATTools.A_QUIET, 0, null); + cliOptions.addNamedArgument("files", JWATTools.A_FILES, 1, Integer.MAX_VALUE); + cmdLine = ArgumentParser.parse(cmdLine.argsArray, cliOptions, cmdLine); + } + catch (ArgumentParserException e) { + System.out.println( ContainerMDTaskCLI.class.getName() + ": " + e.getMessage() ); + System.exit( 1 ); + } + + ContainerMDOptions options = new ContainerMDOptions(); + + Argument argument; + + // Queue first. + options.bQueueFirst = cmdLine.idMap.containsKey(JWATTools.A_QUEUE_FIRST); + + // Thread workers. + argument = cmdLine.idMap.get( JWATTools.A_WORKERS ); + if ( argument != null && argument.value != null ) { + try { + options.threads = Integer.parseInt(argument.value); + } catch (NumberFormatException e) { + System.err.println( "Invalid number of threads requested: " + argument.value ); + System.exit( 1 ); + } + } + if ( options.threads < 1 ) { + System.err.println( "Invalid number of threads requested: " + options.threads ); + System.exit( 1 ); + } + + // Output directory + argument = cmdLine.idMap.get( A_DEST ); + if ( argument != null && argument.value != null ) { + File dir = new File(argument.value); + if (dir.exists()) { + if (dir.isDirectory()) { + options.outputDir = dir; + } else { + if (!options.bQuiet) System.err.println("Output '" + argument.value + "' invalid, defaulting to '" + options.outputDir + "'"); + } + } else { + if (dir.mkdirs()) { + options.outputDir = dir; + } else { + if (!options.bQuiet) System.err.println("Output '" + argument.value + "' invalid, defaulting to '" + options.outputDir + "'"); + } + } + } + + // Relaxed URI validation. + if ( cmdLine.idMap.containsKey( A_LAX ) ) { + options.uriProfile = UriProfile.RFC3986_ABS_16BIT_LAX; + if (!options.bQuiet) System.out.println("Using relaxed URI validation for ARC URL and WARC Target-URI."); + } + + options.bQuiet = cmdLine.idMap.containsKey( JWATTools.A_QUIET ); + + // Files. + argument = cmdLine.idMap.get( JWATTools.A_FILES ); + options.filesList = argument.values; + + return options; } } diff --git a/src/main/java/org/jwat/tools/tasks/containermd/ContainerMDTaskCLIParser.java b/src/main/java/org/jwat/tools/tasks/containermd/ContainerMDTaskCLIParser.java deleted file mode 100644 index 8cb55c2..0000000 --- a/src/main/java/org/jwat/tools/tasks/containermd/ContainerMDTaskCLIParser.java +++ /dev/null @@ -1,94 +0,0 @@ -package org.jwat.tools.tasks.containermd; - -import java.io.File; - -import org.jwat.common.UriProfile; -import org.jwat.tools.JWATTools; - -import com.antiaction.common.cli.Argument; -import com.antiaction.common.cli.ArgumentParserException; -import com.antiaction.common.cli.ArgumentParser; -import com.antiaction.common.cli.CommandLine; -import com.antiaction.common.cli.Options; - -public class ContainerMDTaskCLIParser { - - public static final int A_DEST = 101; - public static final int A_LAX = 102; - - protected ContainerMDTaskCLIParser() { - } - - public static ContainerMDOptions parseArguments(CommandLine cmdLine) { - Options cliOptions = new Options(); - try { - cliOptions.addOption(null, "--queue-first", JWATTools.A_QUEUE_FIRST, 0, null); - cliOptions.addOption("-w", "--workers", JWATTools.A_WORKERS, 0, null).setValueRequired(); - cliOptions.addOption("-d", "--destdir", A_DEST, 0, null).setValueRequired(); - cliOptions.addOption("-l", null, A_LAX, 0, null); - cliOptions.addOption("-q", null, JWATTools.A_QUIET, 0, null); - cliOptions.addNamedArgument("files", JWATTools.A_FILES, 1, Integer.MAX_VALUE); - cmdLine = ArgumentParser.parse(cmdLine.argsArray, cliOptions, cmdLine); - } - catch (ArgumentParserException e) { - System.out.println( ContainerMDTaskCLIParser.class.getName() + ": " + e.getMessage() ); - System.exit( 1 ); - } - - ContainerMDOptions options = new ContainerMDOptions(); - - Argument argument; - - // Queue first. - options.bQueueFirst = cmdLine.idMap.containsKey(JWATTools.A_QUEUE_FIRST); - - // Thread workers. - argument = cmdLine.idMap.get( JWATTools.A_WORKERS ); - if ( argument != null && argument.value != null ) { - try { - options.threads = Integer.parseInt(argument.value); - } catch (NumberFormatException e) { - System.err.println( "Invalid number of threads requested: " + argument.value ); - System.exit( 1 ); - } - } - if ( options.threads < 1 ) { - System.err.println( "Invalid number of threads requested: " + options.threads ); - System.exit( 1 ); - } - - // Output directory - argument = cmdLine.idMap.get( A_DEST ); - if ( argument != null && argument.value != null ) { - File dir = new File(argument.value); - if (dir.exists()) { - if (dir.isDirectory()) { - options.outputDir = dir; - } else { - if (!options.bQuiet) System.err.println("Output '" + argument.value + "' invalid, defaulting to '" + options.outputDir + "'"); - } - } else { - if (dir.mkdirs()) { - options.outputDir = dir; - } else { - if (!options.bQuiet) System.err.println("Output '" + argument.value + "' invalid, defaulting to '" + options.outputDir + "'"); - } - } - } - - // Relaxed URI validation. - if ( cmdLine.idMap.containsKey( A_LAX ) ) { - options.uriProfile = UriProfile.RFC3986_ABS_16BIT_LAX; - if (!options.bQuiet) System.out.println("Using relaxed URI validation for ARC URL and WARC Target-URI."); - } - - options.bQuiet = cmdLine.idMap.containsKey( JWATTools.A_QUIET ); - - // Files. - argument = cmdLine.idMap.get( JWATTools.A_FILES ); - options.filesList = argument.values; - - return options; - } - -} diff --git a/src/main/java/org/jwat/tools/tasks/decompress/DecompressTaskCLI.java b/src/main/java/org/jwat/tools/tasks/decompress/DecompressTaskCLI.java index d02f1d3..63e0178 100644 --- a/src/main/java/org/jwat/tools/tasks/decompress/DecompressTaskCLI.java +++ b/src/main/java/org/jwat/tools/tasks/decompress/DecompressTaskCLI.java @@ -1,8 +1,13 @@ package org.jwat.tools.tasks.decompress; +import org.jwat.tools.JWATTools; import org.jwat.tools.tasks.TaskCLI; +import com.antiaction.common.cli.Argument; +import com.antiaction.common.cli.ArgumentParser; +import com.antiaction.common.cli.ArgumentParserException; import com.antiaction.common.cli.CommandLine; +import com.antiaction.common.cli.Options; public class DecompressTaskCLI extends TaskCLI { @@ -12,23 +17,67 @@ public class DecompressTaskCLI extends TaskCLI { @Override public void show_help() { + System.out.println("FileTools v" + JWATTools.getVersionString()); System.out.println("jwattools decompress [-w THREADS] ..."); System.out.println(""); - System.out.println("decompress one or more GZip files"); - System.out.println(""); - System.out.println("\tNormal files are decompressed into one or more files."); - System.out.println("\tARC/WARC files are compressed on a record level."); + System.out.println("Decompress one or more GZip files."); + System.out.println("ARC/WARC files are compressed on a record level."); + System.out.println("Normal files are decompressed into one or more files."); System.out.println(""); System.out.println("options:"); System.out.println(""); System.out.println(" --queue-first queue files before processing"); - System.out.println(" -w set the amount of worker thread(s) (defaults to 1)"); + System.out.println(" -w set the amount of worker thread(s) (defaults to 1)"); + System.out.println(""); } @Override public void runtask(CommandLine cmdLine) { DecompressTask task = new DecompressTask(); - task.runtask(DecompressTaskCLIParser.parseArguments(cmdLine)); + DecompressOptions options = parseArguments(cmdLine); + task.runtask(options); + } + + public static DecompressOptions parseArguments(CommandLine cmdLine) { + Options cliOptions = new Options(); + try { + cliOptions.addOption(null, "--queue-first", JWATTools.A_QUEUE_FIRST, 0, null); + cliOptions.addOption("-w", "--workers", JWATTools.A_WORKERS, 0, null).setValueRequired(); + cliOptions.addNamedArgument("files", JWATTools.A_FILES, 1, Integer.MAX_VALUE); + cmdLine = ArgumentParser.parse(cmdLine.argsArray, cliOptions, cmdLine); + } + catch (ArgumentParserException e) { + System.out.println( DecompressTaskCLI.class.getName() + ": " + e.getMessage() ); + System.exit( 1 ); + } + + DecompressOptions options = new DecompressOptions(); + + Argument argument; + + // Queue first. + options.bQueueFirst = cmdLine.idMap.containsKey(JWATTools.A_QUEUE_FIRST); + + // Thread workers. + argument = cmdLine.idMap.get( JWATTools.A_WORKERS ); + if ( argument != null && argument.value != null ) { + try { + options.threads = Integer.parseInt(argument.value); + } catch (NumberFormatException e) { + System.out.println( "Invalid number of threads requested: " + argument.value ); + System.exit( 1 ); + } + } + if ( options.threads < 1 ) { + System.out.println( "Invalid number of threads requested: " + options.threads ); + System.exit( 1 ); + } + + // Files. + argument = cmdLine.idMap.get( JWATTools.A_FILES ); + options.filesList = argument.values; + + return options; } } diff --git a/src/main/java/org/jwat/tools/tasks/decompress/DecompressTaskCLIParser.java b/src/main/java/org/jwat/tools/tasks/decompress/DecompressTaskCLIParser.java deleted file mode 100644 index aa98cd1..0000000 --- a/src/main/java/org/jwat/tools/tasks/decompress/DecompressTaskCLIParser.java +++ /dev/null @@ -1,58 +0,0 @@ -package org.jwat.tools.tasks.decompress; - -import org.jwat.tools.JWATTools; - -import com.antiaction.common.cli.Argument; -import com.antiaction.common.cli.ArgumentParserException; -import com.antiaction.common.cli.ArgumentParser; -import com.antiaction.common.cli.CommandLine; -import com.antiaction.common.cli.Options; - -public class DecompressTaskCLIParser { - - protected DecompressTaskCLIParser() { - } - - public static DecompressOptions parseArguments(CommandLine cmdLine) { - Options cliOptions = new Options(); - try { - cliOptions.addOption(null, "--queue-first", JWATTools.A_QUEUE_FIRST, 0, null); - cliOptions.addOption("-w", "--workers", JWATTools.A_WORKERS, 0, null).setValueRequired(); - cliOptions.addNamedArgument("files", JWATTools.A_FILES, 1, Integer.MAX_VALUE); - cmdLine = ArgumentParser.parse(cmdLine.argsArray, cliOptions, cmdLine); - } - catch (ArgumentParserException e) { - System.out.println( DecompressTaskCLIParser.class.getName() + ": " + e.getMessage() ); - System.exit( 1 ); - } - - DecompressOptions options = new DecompressOptions(); - - Argument argument; - - // Queue first. - options.bQueueFirst = cmdLine.idMap.containsKey(JWATTools.A_QUEUE_FIRST); - - // Thread workers. - argument = cmdLine.idMap.get( JWATTools.A_WORKERS ); - if ( argument != null && argument.value != null ) { - try { - options.threads = Integer.parseInt(argument.value); - } catch (NumberFormatException e) { - System.out.println( "Invalid number of threads requested: " + argument.value ); - System.exit( 1 ); - } - } - if ( options.threads < 1 ) { - System.out.println( "Invalid number of threads requested: " + options.threads ); - System.exit( 1 ); - } - - // Files. - argument = cmdLine.idMap.get( JWATTools.A_FILES ); - options.filesList = argument.values; - - return options; - } - -} diff --git a/src/main/java/org/jwat/tools/tasks/delete/DeleteTaskCLI.java b/src/main/java/org/jwat/tools/tasks/delete/DeleteTaskCLI.java index f0e73a8..0d4a18f 100644 --- a/src/main/java/org/jwat/tools/tasks/delete/DeleteTaskCLI.java +++ b/src/main/java/org/jwat/tools/tasks/delete/DeleteTaskCLI.java @@ -1,8 +1,15 @@ package org.jwat.tools.tasks.delete; +import java.io.File; + +import org.jwat.tools.JWATTools; import org.jwat.tools.tasks.TaskCLI; +import com.antiaction.common.cli.Argument; +import com.antiaction.common.cli.ArgumentParser; +import com.antiaction.common.cli.ArgumentParserException; import com.antiaction.common.cli.CommandLine; +import com.antiaction.common.cli.Options; public class DeleteTaskCLI extends TaskCLI { @@ -12,23 +19,72 @@ public class DeleteTaskCLI extends TaskCLI { @Override public void show_help() { + System.out.println("FileTools v" + JWATTools.getVersionString()); System.out.println("jwattools delete [-t] [-o OUTPUT_FILE] ..."); System.out.println(""); - System.out.println("delete one or more files"); - System.out.println(""); - System.out.println("\tDelete one or more files."); - System.out.println("\tLinux has this nasty habit of making it hard to delete many files at the same time."); + System.out.println("Delete one or more files."); + System.out.println("Linux has this nasty habit of making it hard to delete many files at the same time."); System.out.println(""); System.out.println("options:"); System.out.println(""); System.out.println(" -o output filenames deleted"); System.out.println(" --dryrun dry run, does not delete files"); + System.out.println(""); } @Override public void runtask(CommandLine cmdLine) { DeleteTask task = new DeleteTask(); - task.runtask(DeleteTaskCLIParser.parseArguments(cmdLine)); + DeleteOptions options = parseArguments(cmdLine); + task.runtask(options); + } + + public static final int A_OUTPUT = 101; + public static final int A_DRYRUN = 102; + + public static DeleteOptions parseArguments(CommandLine cmdLine) { + Options cliOptions = new Options(); + try { + cliOptions.addOption("-o", null, A_OUTPUT, 0, null).setValueRequired(); + cliOptions.addOption(null, "--dryrun", A_DRYRUN, 0, null); + cliOptions.addNamedArgument("files", JWATTools.A_FILES, 1, Integer.MAX_VALUE); + cmdLine = ArgumentParser.parse(cmdLine.argsArray, cliOptions, cmdLine); + } + catch (ArgumentParserException e) { + System.out.println( DeleteTaskCLI.class.getName() + ": " + e.getMessage() ); + System.exit( 1 ); + } + + DeleteOptions options = new DeleteOptions(); + + Argument argument; + + // Output file. + argument = cmdLine.idMap.get( A_OUTPUT ); + if ( argument != null && argument.value != null ) { + options.outputFile = new File(argument.value); + if (options.outputFile.isDirectory()) { + System.out.println("Can not output to a directory!"); + System.exit(1); + } else if (options.outputFile.getParentFile() != null && !options.outputFile.getParentFile().exists()) { + if (!options.outputFile.getParentFile().mkdirs()) { + System.out.println("Could not create parent directories!"); + System.exit(1); + } + } + } + + // Test run. + if ( cmdLine.idMap.containsKey( A_DRYRUN ) ) { + options.bDryRun = true; + } + System.out.println("Test run: " + options.bDryRun); + + // Files. + argument = cmdLine.idMap.get( JWATTools.A_FILES ); + options.filesList = argument.values; + + return options; } } diff --git a/src/main/java/org/jwat/tools/tasks/delete/DeleteTaskCLIParser.java b/src/main/java/org/jwat/tools/tasks/delete/DeleteTaskCLIParser.java deleted file mode 100644 index a837703..0000000 --- a/src/main/java/org/jwat/tools/tasks/delete/DeleteTaskCLIParser.java +++ /dev/null @@ -1,66 +0,0 @@ -package org.jwat.tools.tasks.delete; - -import java.io.File; - -import org.jwat.tools.JWATTools; - -import com.antiaction.common.cli.Argument; -import com.antiaction.common.cli.ArgumentParserException; -import com.antiaction.common.cli.ArgumentParser; -import com.antiaction.common.cli.CommandLine; -import com.antiaction.common.cli.Options; - -public class DeleteTaskCLIParser { - - public static final int A_OUTPUT = 101; - public static final int A_DRYRUN = 102; - - protected DeleteTaskCLIParser() { - } - - public static DeleteOptions parseArguments(CommandLine cmdLine) { - Options cliOptions = new Options(); - try { - cliOptions.addOption("-o", null, A_OUTPUT, 0, null).setValueRequired(); - cliOptions.addOption(null, "--dryrun", A_DRYRUN, 0, null); - cliOptions.addNamedArgument("files", JWATTools.A_FILES, 1, Integer.MAX_VALUE); - cmdLine = ArgumentParser.parse(cmdLine.argsArray, cliOptions, cmdLine); - } - catch (ArgumentParserException e) { - System.out.println( DeleteTaskCLIParser.class.getName() + ": " + e.getMessage() ); - System.exit( 1 ); - } - - DeleteOptions options = new DeleteOptions(); - - Argument argument; - - // Output file. - argument = cmdLine.idMap.get( A_OUTPUT ); - if ( argument != null && argument.value != null ) { - options.outputFile = new File(argument.value); - if (options.outputFile.isDirectory()) { - System.out.println("Can not output to a directory!"); - System.exit(1); - } else if (options.outputFile.getParentFile() != null && !options.outputFile.getParentFile().exists()) { - if (!options.outputFile.getParentFile().mkdirs()) { - System.out.println("Could not create parent directories!"); - System.exit(1); - } - } - } - - // Test run. - if ( cmdLine.idMap.containsKey( A_DRYRUN ) ) { - options.bDryRun = true; - } - System.out.println("Test run: " + options.bDryRun); - - // Files. - argument = cmdLine.idMap.get( JWATTools.A_FILES ); - options.filesList = argument.values; - - return options; - } - -} diff --git a/src/main/java/org/jwat/tools/tasks/digest/DigestAlgo.java b/src/main/java/org/jwat/tools/tasks/digest/DigestAlgo.java new file mode 100644 index 0000000..2ab5ac2 --- /dev/null +++ b/src/main/java/org/jwat/tools/tasks/digest/DigestAlgo.java @@ -0,0 +1,11 @@ +package org.jwat.tools.tasks.digest; + +import java.security.MessageDigest; + +public class DigestAlgo { + + public String mdAlgo; + + public MessageDigest md; + +} diff --git a/src/main/java/org/jwat/tools/tasks/digest/DigestOptions.java b/src/main/java/org/jwat/tools/tasks/digest/DigestOptions.java new file mode 100644 index 0000000..0644388 --- /dev/null +++ b/src/main/java/org/jwat/tools/tasks/digest/DigestOptions.java @@ -0,0 +1,52 @@ +package org.jwat.tools.tasks.digest; + +import java.util.List; + +import org.jwat.tools.JWATTools; + +public class DigestOptions { + + public List filesList; + + public boolean bBase16; + + public boolean bBase32; + + public boolean bBase64; + + public DigestAlgo[] digestAlgos; + + @Override + public String toString() { + String lineSeparator = System.lineSeparator(); + //int idx; + //int len; + StringBuilder sb = new StringBuilder(); + sb.append("FileTools v"); + sb.append(JWATTools.getVersionString()); + if (filesList.size() > 0) { + sb.append(lineSeparator); + /* + if (paths != null) { + idx = 0; + len = paths.length; + sb.append(" Path: " + paths[idx++].getPath()); + sb.append(lineSeparator); + while (idx < len) { + sb.append(" " + paths[idx++].getPath()); + sb.append(lineSeparator); + } + } + */ + sb.append(" base16: " + bBase16); + sb.append(lineSeparator); + sb.append(" base32: " + bBase32); + sb.append(lineSeparator); + sb.append(" base64: " + bBase64); + //sb.append(lineSeparator); + //sb.append(" base64: " + mdAlgo); + } + return sb.toString(); + } + +} diff --git a/src/main/java/org/jwat/tools/tasks/digest/DigestTask.java b/src/main/java/org/jwat/tools/tasks/digest/DigestTask.java new file mode 100644 index 0000000..9fa6c2d --- /dev/null +++ b/src/main/java/org/jwat/tools/tasks/digest/DigestTask.java @@ -0,0 +1,81 @@ +package org.jwat.tools.tasks.digest; + +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.security.MessageDigest; + +import org.jwat.common.Base16; +import org.jwat.common.Base32; +import org.jwat.common.Base64; +import org.jwat.common.SecurityProviderAlgorithms; +import org.jwat.tools.tasks.AbstractTask; + +import it.unimi.dsi.fastutil.io.FastBufferedInputStream; + +public class DigestTask extends AbstractTask { + + private DigestOptions options; + + public DigestTask() { + } + + public void runtask(DigestOptions options) { + this.options = options; + if (options.filesList.size() > 0) { + filelist_feeder( options.filesList, this ); + } + else { + SecurityProviderAlgorithms spa = SecurityProviderAlgorithms.getInstanceFor(MessageDigest.class); + System.out.println(""); + System.out.println("Available algorithms:"); + System.out.println("---------------------"); + System.out.println(spa.getAlgorithmListGrouped()); + System.out.println(""); + } + } + + private byte[] isBuffer = new byte[65536]; + + private byte[] readBuffer = new byte[65536]; + + @Override + public void process(File srcFile) { + byte[] digest; + FastBufferedInputStream in = null; + int read; + try { + in = new FastBufferedInputStream(new FileInputStream(srcFile), isBuffer); + while ((read = in.read(readBuffer)) != -1) { + for (int i=0; i] digest ... "); + System.out.println(""); + System.out.println("Digest file(s)"); + System.out.println("Use of this is mostly for debugging purposes."); + System.out.println(""); + System.out.println("options:"); + System.out.println(""); + System.out.println(" -a specify one or more digest algorithm"); + System.out.println(""); + System.out.println("Available digest algorithms:"); + System.out.println("----------------------------"); + System.out.println(spa.getAlgorithmListGrouped()); + System.out.println(""); + } + + @Override + public void runtask(CommandLine cmdLine) { + DigestTask task = new DigestTask(); + DigestOptions options = parseArguments(cmdLine); + System.out.println(options.toString()); + task.runtask(options); + } + + public static final int A_BASE16 = 101; + public static final int A_BASE32 = 102; + public static final int A_BASE64 = 103; + public static final int A_DIGEST_ALGO = 104; + + public static DigestOptions parseArguments(CommandLine cmdLine) { + Options cliOptions = new Options(); + try { + cliOptions.addNamedArgument( "files", JWATTools.A_FILES, 1, 1 ); + //cliOptions.addOption(null, "--base16", A_BASE16, 0, null); + //cliOptions.addOption(null, "--base32", A_BASE32, 0, null); + //cliOptions.addOption(null, "--base64", A_BASE64, 0, null); + cliOptions.addOption("-a", "--digest-algorithm", A_DIGEST_ALGO, 0, null).setValueRequired(); + cmdLine = ArgumentParser.parse(cmdLine.argsArray, cliOptions, cmdLine); + } + catch (ArgumentParserException e) { + System.out.println( DigestTaskCLI.class.getName() + ": " + e.getMessage() ); + System.exit( 1 ); + } + + DigestOptions options = new DigestOptions(); + + Argument argument; + String[] values; + DigestAlgo digestAlgo; + + // Files + argument = cmdLine.idMap.get( JWATTools.A_FILES ); + options.filesList = new LinkedList(); + //options.filesList = argument.values; + if (argument != null) { + options.filesList.add( argument.value ); + } + + argument = cmdLine.idMap.get(A_DIGEST_ALGO); + if (argument != null) { + values = argument.value.split(","); + options.digestAlgos = new DigestAlgo[values.length]; + for (int i=0; i 0 && options.digestAlgos == null) { + System.out.println("Missing digest algorithm."); + System.exit(-1); + } + + options.bBase16 = (cmdLine.idMap.get(A_BASE16) != null); + options.bBase32 = (cmdLine.idMap.get(A_BASE32) != null); + options.bBase64 = (cmdLine.idMap.get(A_BASE64) != null); + + if (options.bBase32 == false && options.bBase64 == false) { + options.bBase16 = true; + } + options.bBase32 = true; + options.bBase64 = true; + + return options; + } + +} diff --git a/src/main/java/org/jwat/tools/tasks/extract/ExtractTaskCLI.java b/src/main/java/org/jwat/tools/tasks/extract/ExtractTaskCLI.java index e3ba340..6d68227 100644 --- a/src/main/java/org/jwat/tools/tasks/extract/ExtractTaskCLI.java +++ b/src/main/java/org/jwat/tools/tasks/extract/ExtractTaskCLI.java @@ -1,8 +1,13 @@ package org.jwat.tools.tasks.extract; +import org.jwat.tools.JWATTools; import org.jwat.tools.tasks.TaskCLI; +import com.antiaction.common.cli.Argument; +import com.antiaction.common.cli.ArgumentParser; +import com.antiaction.common.cli.ArgumentParserException; import com.antiaction.common.cli.CommandLine; +import com.antiaction.common.cli.Options; public class ExtractTaskCLI extends TaskCLI { @@ -12,21 +17,74 @@ public class ExtractTaskCLI extends TaskCLI { @Override public void show_help() { + System.out.println("FileTools v" + JWATTools.getVersionString()); System.out.println("jwattools extract [-u URI] [-w THREADS] ..."); System.out.println(""); - System.out.println("extract one or more entries/records from GZip/ARC/WARC files"); + System.out.println("Extract one or more entries/records from GZip/ARC/WARC files."); System.out.println(""); System.out.println("options:"); System.out.println(""); System.out.println(" -u (target)uri to extract"); System.out.println(" --queue-first queue files before processing"); System.out.println(" -w set the amount of worker thread(s) (defaults to 1)"); + System.out.println(""); } @Override public void runtask(CommandLine cmdLine) { ExtractTask task = new ExtractTask(); - task.runtask(ExtractTaskCLIParser.parseArguments(cmdLine)); + ExtractOptions options = parseArguments(cmdLine); + task.runtask(options); + } + + public static final int A_TARGET_URI = 101; + + public static ExtractOptions parseArguments(CommandLine cmdLine) { + Options cliOptions = new Options(); + try { + cliOptions.addOption(null, "--queue-first", JWATTools.A_QUEUE_FIRST, 0, null); + cliOptions.addOption("-w", "--workers", JWATTools.A_WORKERS, 0, null).setValueRequired(); + cliOptions.addOption("-u", null, A_TARGET_URI, 0, null).setValueRequired(); + cliOptions.addNamedArgument("files", JWATTools.A_FILES, 1, Integer.MAX_VALUE); + cmdLine = ArgumentParser.parse(cmdLine.argsArray, cliOptions, cmdLine); + } + catch (ArgumentParserException e) { + System.out.println( ExtractTaskCLI.class.getName() + ": " + e.getMessage() ); + System.exit( 1 ); + } + + ExtractOptions options = new ExtractOptions(); + + Argument argument; + + // Queue first. + options.bQueueFirst = cmdLine.idMap.containsKey(JWATTools.A_QUEUE_FIRST); + + // Thread workers. + argument = cmdLine.idMap.get( JWATTools.A_WORKERS ); + if ( argument != null && argument.value != null ) { + try { + options.threads = Integer.parseInt(argument.value); + } catch (NumberFormatException e) { + System.out.println( "Invalid number of threads requested: " + argument.value ); + System.exit( 1 ); + } + } + if ( options.threads < 1 ) { + System.out.println( "Invalid number of threads requested: " + options.threads ); + System.exit( 1 ); + } + + argument = cmdLine.idMap.get( A_TARGET_URI ); + if ( argument != null && argument.value != null ) { + options.targetUri = argument.value; + } + + // Files. + argument = cmdLine.idMap.get( JWATTools.A_FILES ); + options.filesList = argument.values; + + return options; } } diff --git a/src/main/java/org/jwat/tools/tasks/extract/ExtractTaskCLIParser.java b/src/main/java/org/jwat/tools/tasks/extract/ExtractTaskCLIParser.java deleted file mode 100644 index 75a668c..0000000 --- a/src/main/java/org/jwat/tools/tasks/extract/ExtractTaskCLIParser.java +++ /dev/null @@ -1,66 +0,0 @@ -package org.jwat.tools.tasks.extract; - -import org.jwat.tools.JWATTools; - -import com.antiaction.common.cli.Argument; -import com.antiaction.common.cli.ArgumentParserException; -import com.antiaction.common.cli.ArgumentParser; -import com.antiaction.common.cli.CommandLine; -import com.antiaction.common.cli.Options; - -public class ExtractTaskCLIParser { - - public static final int A_TARGET_URI = 101; - - protected ExtractTaskCLIParser() { - } - - public static ExtractOptions parseArguments(CommandLine cmdLine) { - Options cliOptions = new Options(); - try { - cliOptions.addOption(null, "--queue-first", JWATTools.A_QUEUE_FIRST, 0, null); - cliOptions.addOption("-w", "--workers", JWATTools.A_WORKERS, 0, null).setValueRequired(); - cliOptions.addOption("-u", null, A_TARGET_URI, 0, null).setValueRequired(); - cliOptions.addNamedArgument("files", JWATTools.A_FILES, 1, Integer.MAX_VALUE); - cmdLine = ArgumentParser.parse(cmdLine.argsArray, cliOptions, cmdLine); - } - catch (ArgumentParserException e) { - System.out.println( ExtractTaskCLIParser.class.getName() + ": " + e.getMessage() ); - System.exit( 1 ); - } - - ExtractOptions options = new ExtractOptions(); - - Argument argument; - - // Queue first. - options.bQueueFirst = cmdLine.idMap.containsKey(JWATTools.A_QUEUE_FIRST); - - // Thread workers. - argument = cmdLine.idMap.get( JWATTools.A_WORKERS ); - if ( argument != null && argument.value != null ) { - try { - options.threads = Integer.parseInt(argument.value); - } catch (NumberFormatException e) { - System.out.println( "Invalid number of threads requested: " + argument.value ); - System.exit( 1 ); - } - } - if ( options.threads < 1 ) { - System.out.println( "Invalid number of threads requested: " + options.threads ); - System.exit( 1 ); - } - - argument = cmdLine.idMap.get( A_TARGET_URI ); - if ( argument != null && argument.value != null ) { - options.targetUri = argument.value; - } - - // Files. - argument = cmdLine.idMap.get( JWATTools.A_FILES ); - options.filesList = argument.values; - - return options; - } - -} diff --git a/src/main/java/org/jwat/tools/tasks/headers2cdx/Headers2CDXTaskCLI.java b/src/main/java/org/jwat/tools/tasks/headers2cdx/Headers2CDXTaskCLI.java index a154be2..49018a0 100644 --- a/src/main/java/org/jwat/tools/tasks/headers2cdx/Headers2CDXTaskCLI.java +++ b/src/main/java/org/jwat/tools/tasks/headers2cdx/Headers2CDXTaskCLI.java @@ -1,8 +1,15 @@ package org.jwat.tools.tasks.headers2cdx; +import java.io.File; + +import org.jwat.tools.JWATTools; import org.jwat.tools.tasks.TaskCLI; +import com.antiaction.common.cli.Argument; +import com.antiaction.common.cli.ArgumentParser; +import com.antiaction.common.cli.ArgumentParserException; import com.antiaction.common.cli.CommandLine; +import com.antiaction.common.cli.Options; public class Headers2CDXTaskCLI extends TaskCLI { @@ -12,24 +19,85 @@ public class Headers2CDXTaskCLI extends TaskCLI { @Override public void show_help() { + System.out.println("FileTools v" + JWATTools.getVersionString()); System.out.println("jwattools headers2cdx [-o OUTPUT_FILE] [-w THREADS] ..."); System.out.println(""); - System.out.println("cdx one or more gzipped json (W)ARC/HTTP header files"); - System.out.println(""); - System.out.println("\tRead through gzipped json (W)ARC/HTTP header file(s) and create a CDX file."); - System.out.println("\tCDX files are primarily used with Wayback."); + System.out.println("Read through gzipped json (W)ARC/HTTP header file(s) and create a CDX file."); + System.out.println("CDX files are primarily used with replay tools like (Open)Wayback."); System.out.println(""); System.out.println("options:"); System.out.println(""); System.out.println(" -o output cdx filename (unsorted)"); System.out.println(" --queue-first queue files before processing"); System.out.println(" -w set the amount of worker thread(s) (defaults to 1)"); + System.out.println(""); } @Override public void runtask(CommandLine cmdLine) { Headers2CDXTask task = new Headers2CDXTask(); - task.runtask(Headers2CDXTaskCLIParser.parseArguments(cmdLine)); + Headers2CDXOptions options = parseArguments(cmdLine); + task.runtask(options); + } + + public static final int A_OUTPUT = 101; + + public static Headers2CDXOptions parseArguments(CommandLine cmdLine) { + Options cliOptions = new Options(); + try { + cliOptions.addOption(null, "--queue-first", JWATTools.A_QUEUE_FIRST, 0, null); + cliOptions.addOption("-w", "--workers", JWATTools.A_WORKERS, 0, null).setValueRequired(); + cliOptions.addOption("-o", null, A_OUTPUT, 0, null).setValueRequired(); + cliOptions.addNamedArgument("files", JWATTools.A_FILES, 1, Integer.MAX_VALUE); + cmdLine = ArgumentParser.parse(cmdLine.argsArray, cliOptions, cmdLine); + } + catch (ArgumentParserException e) { + System.out.println( Headers2CDXTaskCLI.class.getName() + ": " + e.getMessage() ); + System.exit( 1 ); + } + + Headers2CDXOptions options = new Headers2CDXOptions(); + + Argument argument; + + // Queue first. + options.bQueueFirst = cmdLine.idMap.containsKey(JWATTools.A_QUEUE_FIRST); + + // Thread workers. + argument = cmdLine.idMap.get( JWATTools.A_WORKERS ); + if ( argument != null && argument.value != null ) { + try { + options.threads = Integer.parseInt(argument.value); + } catch (NumberFormatException e) { + System.out.println( "Invalid number of threads requested: " + argument.value ); + System.exit( 1 ); + } + } + if ( options.threads < 1 ) { + System.out.println( "Invalid number of threads requested: " + options.threads ); + System.exit( 1 ); + } + + // Output file. + argument = cmdLine.idMap.get( A_OUTPUT ); + if ( argument != null && argument.value != null ) { + options.outputFile = new File(argument.value); + if (options.outputFile.isDirectory()) { + System.out.println("Can not output to a directory!"); + System.exit(1); + } else if (options.outputFile.getParentFile() != null && !options.outputFile.getParentFile().exists()) { + if (!options.outputFile.getParentFile().mkdirs()) { + System.out.println("Could not create parent directories!"); + System.exit(1); + } + } + } + + // Files. + argument = cmdLine.idMap.get( JWATTools.A_FILES ); + options.filesList = argument.values; + + return options; } } diff --git a/src/main/java/org/jwat/tools/tasks/headers2cdx/Headers2CDXTaskCLIParser.java b/src/main/java/org/jwat/tools/tasks/headers2cdx/Headers2CDXTaskCLIParser.java deleted file mode 100644 index b156eeb..0000000 --- a/src/main/java/org/jwat/tools/tasks/headers2cdx/Headers2CDXTaskCLIParser.java +++ /dev/null @@ -1,78 +0,0 @@ -package org.jwat.tools.tasks.headers2cdx; - -import java.io.File; - -import org.jwat.tools.JWATTools; - -import com.antiaction.common.cli.Argument; -import com.antiaction.common.cli.ArgumentParser; -import com.antiaction.common.cli.ArgumentParserException; -import com.antiaction.common.cli.CommandLine; -import com.antiaction.common.cli.Options; - -public class Headers2CDXTaskCLIParser { - - public static final int A_OUTPUT = 101; - - protected Headers2CDXTaskCLIParser() { - } - - public static Headers2CDXOptions parseArguments(CommandLine cmdLine) { - Options cliOptions = new Options(); - try { - cliOptions.addOption(null, "--queue-first", JWATTools.A_QUEUE_FIRST, 0, null); - cliOptions.addOption("-w", "--workers", JWATTools.A_WORKERS, 0, null).setValueRequired(); - cliOptions.addOption("-o", null, A_OUTPUT, 0, null).setValueRequired(); - cliOptions.addNamedArgument("files", JWATTools.A_FILES, 1, Integer.MAX_VALUE); - cmdLine = ArgumentParser.parse(cmdLine.argsArray, cliOptions, cmdLine); - } - catch (ArgumentParserException e) { - System.out.println( Headers2CDXTaskCLIParser.class.getName() + ": " + e.getMessage() ); - System.exit( 1 ); - } - - Headers2CDXOptions options = new Headers2CDXOptions(); - - Argument argument; - - // Queue first. - options.bQueueFirst = cmdLine.idMap.containsKey(JWATTools.A_QUEUE_FIRST); - - // Thread workers. - argument = cmdLine.idMap.get( JWATTools.A_WORKERS ); - if ( argument != null && argument.value != null ) { - try { - options.threads = Integer.parseInt(argument.value); - } catch (NumberFormatException e) { - System.out.println( "Invalid number of threads requested: " + argument.value ); - System.exit( 1 ); - } - } - if ( options.threads < 1 ) { - System.out.println( "Invalid number of threads requested: " + options.threads ); - System.exit( 1 ); - } - - // Output file. - argument = cmdLine.idMap.get( A_OUTPUT ); - if ( argument != null && argument.value != null ) { - options.outputFile = new File(argument.value); - if (options.outputFile.isDirectory()) { - System.out.println("Can not output to a directory!"); - System.exit(1); - } else if (options.outputFile.getParentFile() != null && !options.outputFile.getParentFile().exists()) { - if (!options.outputFile.getParentFile().mkdirs()) { - System.out.println("Could not create parent directories!"); - System.exit(1); - } - } - } - - // Files. - argument = cmdLine.idMap.get( JWATTools.A_FILES ); - options.filesList = argument.values; - - return options; - } - -} diff --git a/src/main/java/org/jwat/tools/tasks/interval/IntervalTaskCLI.java b/src/main/java/org/jwat/tools/tasks/interval/IntervalTaskCLI.java index 114b28d..7eed883 100644 --- a/src/main/java/org/jwat/tools/tasks/interval/IntervalTaskCLI.java +++ b/src/main/java/org/jwat/tools/tasks/interval/IntervalTaskCLI.java @@ -1,8 +1,15 @@ package org.jwat.tools.tasks.interval; +import java.util.LinkedList; + +import org.jwat.tools.JWATTools; import org.jwat.tools.tasks.TaskCLI; +import com.antiaction.common.cli.Argument; +import com.antiaction.common.cli.ArgumentParser; +import com.antiaction.common.cli.ArgumentParserException; import com.antiaction.common.cli.CommandLine; +import com.antiaction.common.cli.Options; public class IntervalTaskCLI extends TaskCLI { @@ -12,24 +19,102 @@ public class IntervalTaskCLI extends TaskCLI { @Override public void show_help() { + System.out.println("FileTools v" + JWATTools.getVersionString()); System.out.println("jwattools [-o] interval offset1 offset2 srcfile dstfile"); System.out.println(""); - System.out.println("extract the byte interval from offset1 to offset2 from a file"); - System.out.println(""); - System.out.println("\tSkips data up to offset1 and save data to file until offset2 is reached."); - System.out.println("\tOffset1/2 can be decimal or hexadecimal ($ or 0x)."); - System.out.println("\tOffset2 can also be a length-ofsset (+)."); - /* + System.out.println("Extract the byte interval from offset1 to offset2 from a file."); + System.out.println("Offset1/2 can be decimal or hexadecimal ($ or 0x)."); + System.out.println("Offset2 can also be a length-offset (+ +$ +0x)."); System.out.println(""); System.out.println("options:"); System.out.println(""); - */ + System.out.println("none"); + System.out.println(""); } @Override public void runtask(CommandLine cmdLine) { IntervalTask task = new IntervalTask(); - task.runtask(IntervalTaskCLIParser.parseArguments(cmdLine)); + IntervalOptions options = parseArguments(cmdLine); + task.runtask(options); + } + + public static final int A_OFFSET1 = 101; + public static final int A_OFFSET2 = 102; + public static final int A_DSTFILE = 103; + + public static IntervalOptions parseArguments(CommandLine cmdLine) { + Options cliOptions = new Options(); + try { + cliOptions.addNamedArgument( "offset1", A_OFFSET1, 1, 1); + cliOptions.addNamedArgument( "offset2", A_OFFSET2, 1, 1); + cliOptions.addNamedArgument( "files", JWATTools.A_FILES, 1, 1 ); + cliOptions.addNamedArgument( "dstfile", A_DSTFILE, 1, 1); + cmdLine = ArgumentParser.parse(cmdLine.argsArray, cliOptions, cmdLine); + } + catch (ArgumentParserException e) { + System.out.println( IntervalTaskCLI.class.getName() + ": " + e.getMessage() ); + System.exit( 1 ); + } + + IntervalOptions options = new IntervalOptions(); + + Argument argument; + String tmpStr; + + argument = cmdLine.idMap.get( A_OFFSET1 ); + tmpStr = argument.value.toLowerCase(); + try { + if (tmpStr.startsWith("$")) { + options.sIdx = Long.parseLong(tmpStr.substring(1), 16); + } + else if (tmpStr.startsWith("0x")) { + options.sIdx = Long.parseLong(tmpStr.substring(2), 16); + } + else { + options.sIdx = Long.parseLong(tmpStr); + } + } + catch (NumberFormatException e) { + System.out.println("Incorrect sidx!"); + System.exit(1); + } + + argument = cmdLine.idMap.get( A_OFFSET2 ); + tmpStr = argument.value.toLowerCase(); + options.bPlusEIdx = tmpStr.startsWith("+"); + if (options.bPlusEIdx) { + tmpStr = tmpStr.substring(1); + } + try { + if (tmpStr.startsWith("$")) { + options.eIdx = Long.parseLong(tmpStr.substring(1), 16); + } + else if (tmpStr.startsWith("0x")) { + options.eIdx = Long.parseLong(tmpStr.substring(2), 16); + } + else { + options.eIdx = Long.parseLong(tmpStr); + } + if (options.bPlusEIdx) { + options.eIdx += options.sIdx; + } + } + catch (NumberFormatException e) { + System.out.println("Incorrect sidx!"); + System.exit(1); + } + + // Files + argument = cmdLine.idMap.get( JWATTools.A_FILES ); + options.filesList = new LinkedList(); + //options.filesList = argument.values; + options.filesList.add( argument.value ); + + argument = cmdLine.idMap.get( A_DSTFILE ); + options.dstName = argument.value; + + return options; } } diff --git a/src/main/java/org/jwat/tools/tasks/interval/IntervalTaskCLIParser.java b/src/main/java/org/jwat/tools/tasks/interval/IntervalTaskCLIParser.java deleted file mode 100644 index 8910f0e..0000000 --- a/src/main/java/org/jwat/tools/tasks/interval/IntervalTaskCLIParser.java +++ /dev/null @@ -1,96 +0,0 @@ -package org.jwat.tools.tasks.interval; - -import java.util.LinkedList; - -import org.jwat.tools.JWATTools; - -import com.antiaction.common.cli.Argument; -import com.antiaction.common.cli.ArgumentParserException; -import com.antiaction.common.cli.ArgumentParser; -import com.antiaction.common.cli.CommandLine; -import com.antiaction.common.cli.Options; - -public class IntervalTaskCLIParser { - - public static final int A_OFFSET1 = 101; - public static final int A_OFFSET2 = 102; - public static final int A_DSTFILE = 103; - - protected IntervalTaskCLIParser() { - } - - public static IntervalOptions parseArguments(CommandLine cmdLine) { - Options cliOptions = new Options(); - try { - cliOptions.addNamedArgument( "offset1", A_OFFSET1, 1, 1); - cliOptions.addNamedArgument( "offset2", A_OFFSET2, 1, 1); - cliOptions.addNamedArgument( "files", JWATTools.A_FILES, 1, 1 ); - cliOptions.addNamedArgument( "dstfile", A_DSTFILE, 1, 1); - cmdLine = ArgumentParser.parse(cmdLine.argsArray, cliOptions, cmdLine); - } - catch (ArgumentParserException e) { - System.out.println( IntervalTaskCLIParser.class.getName() + ": " + e.getMessage() ); - System.exit( 1 ); - } - - IntervalOptions options = new IntervalOptions(); - - Argument argument; - String tmpStr; - - argument = cmdLine.idMap.get( A_OFFSET1 ); - tmpStr = argument.value.toLowerCase(); - try { - if (tmpStr.startsWith("$")) { - options.sIdx = Long.parseLong(tmpStr.substring(1), 16); - } - else if (tmpStr.startsWith("0x")) { - options.sIdx = Long.parseLong(tmpStr.substring(2), 16); - } - else { - options.sIdx = Long.parseLong(tmpStr); - } - } - catch (NumberFormatException e) { - System.out.println("Incorrect sidx!"); - System.exit(1); - } - - argument = cmdLine.idMap.get( A_OFFSET2 ); - tmpStr = argument.value.toLowerCase(); - options.bPlusEIdx = tmpStr.startsWith("+"); - if (options.bPlusEIdx) { - tmpStr = tmpStr.substring(1); - } - try { - if (tmpStr.startsWith("$")) { - options.eIdx = Long.parseLong(tmpStr.substring(1), 16); - } - else if (tmpStr.startsWith("0x")) { - options.eIdx = Long.parseLong(tmpStr.substring(2), 16); - } - else { - options.eIdx = Long.parseLong(tmpStr); - } - if (options.bPlusEIdx) { - options.eIdx += options.sIdx; - } - } - catch (NumberFormatException e) { - System.out.println("Incorrect sidx!"); - System.exit(1); - } - - // Files - argument = cmdLine.idMap.get( JWATTools.A_FILES ); - options.filesList = new LinkedList(); - //options.filesList = argument.values; - options.filesList.add( argument.value ); - - argument = cmdLine.idMap.get( A_DSTFILE ); - options.dstName = argument.value; - - return options; - } - -} diff --git a/src/main/java/org/jwat/tools/tasks/pathindex/PathIndexTaskCLI.java b/src/main/java/org/jwat/tools/tasks/pathindex/PathIndexTaskCLI.java index 47c5383..02890ef 100644 --- a/src/main/java/org/jwat/tools/tasks/pathindex/PathIndexTaskCLI.java +++ b/src/main/java/org/jwat/tools/tasks/pathindex/PathIndexTaskCLI.java @@ -1,8 +1,15 @@ package org.jwat.tools.tasks.pathindex; +import java.io.File; + +import org.jwat.tools.JWATTools; import org.jwat.tools.tasks.TaskCLI; +import com.antiaction.common.cli.Argument; +import com.antiaction.common.cli.ArgumentParser; +import com.antiaction.common.cli.ArgumentParserException; import com.antiaction.common.cli.CommandLine; +import com.antiaction.common.cli.Options; public class PathIndexTaskCLI extends TaskCLI { @@ -12,22 +19,62 @@ public class PathIndexTaskCLI extends TaskCLI { @Override public void show_help() { + System.out.println("FileTools v" + JWATTools.getVersionString()); System.out.println("jwattools pathindex [-o OUTPUT_FILE] ..."); System.out.println(""); - System.out.println("create a pathindex from one or more ARC/WARC files"); - System.out.println(""); - System.out.println("\tRead through ARC/WARC file(s) and create a pathindex file."); - System.out.println("\tPathindex files are primarily used with Wayback."); + System.out.println("Create a pathindex from one or more ARC/WARC files."); + System.out.println("Pathindex files are primarily used with replay tools like (Open)Wayback."); System.out.println(""); System.out.println("options:"); System.out.println(""); System.out.println(" -o output pathindex filename (unsorted)"); + System.out.println(""); } @Override public void runtask(CommandLine cmdLine) { PathIndexTask task = new PathIndexTask(); - task.runtask(PathIndexTaskCLIParser.parseArguments(cmdLine)); + PathIndexOptions options = parseArguments(cmdLine); + task.runtask(options); + } + + public static final int A_OUTPUT = 101; + + public static PathIndexOptions parseArguments(CommandLine cmdLine) { + Options cliOptions = new Options(); + try { + cliOptions.addOption("-o", null, A_OUTPUT, 0, null).setValueRequired(); + cliOptions.addNamedArgument("files", JWATTools.A_FILES, 1, Integer.MAX_VALUE); + cmdLine = ArgumentParser.parse(cmdLine.argsArray, cliOptions, cmdLine); + } + catch (ArgumentParserException e) { + System.out.println( PathIndexTaskCLI.class.getName() + ": " + e.getMessage() ); + System.exit( 1 ); + } + + PathIndexOptions options = new PathIndexOptions(); + Argument argument; + + // Output file. + argument = cmdLine.idMap.get( A_OUTPUT ); + if ( argument != null && argument.value != null ) { + options.outputFile = new File(argument.value); + if (options.outputFile.isDirectory()) { + System.out.println("Can not output to a directory!"); + System.exit(1); + } else if (options.outputFile.getParentFile() != null && !options.outputFile.getParentFile().exists()) { + if (!options.outputFile.getParentFile().mkdirs()) { + System.out.println("Could not create parent directories!"); + System.exit(1); + } + } + } + + // Files. + argument = cmdLine.idMap.get( JWATTools.A_FILES ); + options.filesList = argument.values; + + return options; } } diff --git a/src/main/java/org/jwat/tools/tasks/pathindex/PathIndexTaskCLIParser.java b/src/main/java/org/jwat/tools/tasks/pathindex/PathIndexTaskCLIParser.java deleted file mode 100644 index ab2e9e0..0000000 --- a/src/main/java/org/jwat/tools/tasks/pathindex/PathIndexTaskCLIParser.java +++ /dev/null @@ -1,57 +0,0 @@ -package org.jwat.tools.tasks.pathindex; - -import java.io.File; - -import org.jwat.tools.JWATTools; - -import com.antiaction.common.cli.Argument; -import com.antiaction.common.cli.ArgumentParserException; -import com.antiaction.common.cli.ArgumentParser; -import com.antiaction.common.cli.CommandLine; -import com.antiaction.common.cli.Options; - -public class PathIndexTaskCLIParser { - - public static final int A_OUTPUT = 101; - - protected PathIndexTaskCLIParser() { - } - - public static PathIndexOptions parseArguments(CommandLine cmdLine) { - Options cliOptions = new Options(); - try { - cliOptions.addOption("-o", null, A_OUTPUT, 0, null).setValueRequired(); - cliOptions.addNamedArgument("files", JWATTools.A_FILES, 1, Integer.MAX_VALUE); - cmdLine = ArgumentParser.parse(cmdLine.argsArray, cliOptions, cmdLine); - } - catch (ArgumentParserException e) { - System.out.println( PathIndexTaskCLIParser.class.getName() + ": " + e.getMessage() ); - System.exit( 1 ); - } - - PathIndexOptions options = new PathIndexOptions(); - Argument argument; - - // Output file. - argument = cmdLine.idMap.get( A_OUTPUT ); - if ( argument != null && argument.value != null ) { - options.outputFile = new File(argument.value); - if (options.outputFile.isDirectory()) { - System.out.println("Can not output to a directory!"); - System.exit(1); - } else if (options.outputFile.getParentFile() != null && !options.outputFile.getParentFile().exists()) { - if (!options.outputFile.getParentFile().mkdirs()) { - System.out.println("Could not create parent directories!"); - System.exit(1); - } - } - } - - // Files. - argument = cmdLine.idMap.get( JWATTools.A_FILES ); - options.filesList = argument.values; - - return options; - } - -} diff --git a/src/main/java/org/jwat/tools/tasks/test/TestTask.java b/src/main/java/org/jwat/tools/tasks/test/TestTask.java index 3c6dc4b..fbfe450 100644 --- a/src/main/java/org/jwat/tools/tasks/test/TestTask.java +++ b/src/main/java/org/jwat/tools/tasks/test/TestTask.java @@ -260,11 +260,16 @@ class TaskRunnable implements Runnable { } @Override public void run() { - TestFile2 testFile = new TestFile2(); - testFile.callback = null; - TestFileResult result = testFile.processFile(srcFile, options, cloner); - results.add(result); - resultsReady.release(); + try { + TestFile2 testFile = new TestFile2(); + testFile.callback = null; + TestFileResult result = testFile.processFile(srcFile, options, cloner); + results.add(result); + resultsReady.release(); + } + catch (Throwable t) { + t.printStackTrace(); + } } } diff --git a/src/main/java/org/jwat/tools/tasks/test/TestTaskCLI.java b/src/main/java/org/jwat/tools/tasks/test/TestTaskCLI.java index f24c7e2..74a4c2b 100644 --- a/src/main/java/org/jwat/tools/tasks/test/TestTaskCLI.java +++ b/src/main/java/org/jwat/tools/tasks/test/TestTaskCLI.java @@ -1,8 +1,20 @@ package org.jwat.tools.tasks.test; +import java.text.DateFormat; +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.Date; + +import org.jwat.common.UriProfile; +import org.jwat.tools.JWATTools; import org.jwat.tools.tasks.TaskCLI; +import org.jwat.tools.validators.XmlValidatorPlugin; +import com.antiaction.common.cli.Argument; +import com.antiaction.common.cli.ArgumentParser; +import com.antiaction.common.cli.ArgumentParserException; import com.antiaction.common.cli.CommandLine; +import com.antiaction.common.cli.Options; public class TestTaskCLI extends TaskCLI { @@ -12,9 +24,10 @@ public class TestTaskCLI extends TaskCLI { @Override public void show_help() { + System.out.println("FileTools v" + JWATTools.getVersionString()); System.out.println("jwattools test [-beilx] [-w THREADS] [-a] ..."); System.out.println(""); - System.out.println("test one or more ARC/WARC/GZip files"); + System.out.println("Test one or more ARC/WARC/GZip files."); System.out.println(""); System.out.println("options:"); System.out.println(""); @@ -27,12 +40,115 @@ public void show_help() { System.out.println(" -x to validate text/xml payload (eg. mets)"); System.out.println(" --queue-first queue files before processing"); System.out.println(" -w set the amount of worker thread(s) (defaults to 1)"); + System.out.println(""); } @Override public void runtask(CommandLine cmdLine) { TestTask task = new TestTask(); - task.runtask(TestTaskCLIParser.parseArguments(cmdLine)); + TestOptions options = parseArguments(cmdLine); + task.runtask(options); + } + + public static final int A_AFTER = 101; + public static final int A_BAD = 102; + public static final int A_SHOW_ERRORS = 103; + public static final int A_IGNORE_DIGEST = 104; + public static final int A_HTTP_HEADER_ERRORS = 105; + public static final int A_LAX = 106; + public static final int A_XML = 107; + + public static TestOptions parseArguments(CommandLine cmdLine) { + Options cliOptions = new Options(); + try { + cliOptions.addOption(null, "--queue-first", JWATTools.A_QUEUE_FIRST, 0, null); + cliOptions.addOption("-w", "--workers", JWATTools.A_WORKERS, 0, null).setValueRequired(); + cliOptions.addOption("-a", null, A_AFTER, 0, null).setValueRequired(); + cliOptions.addOption("-b", null, A_BAD, 0, null); + cliOptions.addOption("-e", null, A_SHOW_ERRORS, 0, null); + cliOptions.addOption("-h", null, A_HTTP_HEADER_ERRORS, 0, null); + cliOptions.addOption("-i", "--ignore-digest", A_IGNORE_DIGEST, 0, null); + cliOptions.addOption("-l", null, A_LAX, 0, null); + cliOptions.addOption("-x", null, A_XML, 0, null); + cliOptions.addNamedArgument("files", JWATTools.A_FILES, 1, Integer.MAX_VALUE); + cmdLine = ArgumentParser.parse(cmdLine.argsArray, cliOptions, cmdLine); + } + catch (ArgumentParserException e) { + System.out.println(TestTaskCLI.class.getName() + ": " + e.getMessage()); + System.exit(1); + } + + TestOptions options = new TestOptions(); + + Argument argument; + + // Queue first. + options.bQueueFirst = cmdLine.idMap.containsKey(JWATTools.A_QUEUE_FIRST); + options.bHttpHeaderErrors = cmdLine.idMap.containsKey(A_HTTP_HEADER_ERRORS); + + // Thread workers. + argument = cmdLine.idMap.get( JWATTools.A_WORKERS ); + if ( argument != null && argument.value != null ) { + try { + options.threads = Integer.parseInt(argument.value); + } catch (NumberFormatException e) { + System.out.println( "Invalid number of threads requested: " + argument.value ); + System.exit( 1 ); + } + } + if ( options.threads < 1 ) { + System.out.println( "Invalid number of threads requested: " + options.threads ); + System.exit( 1 ); + } + + // Show errors. + if ( cmdLine.idMap.containsKey( A_SHOW_ERRORS ) ) { + options.bShowErrors = true; + } + System.out.println("Showing errors: " + options.bShowErrors); + + // Ignore digest. + if ( cmdLine.idMap.containsKey( A_IGNORE_DIGEST ) ) { + options.bValidateDigest = false; + } + System.out.println("Validate digest: " + options.bValidateDigest); + + // Relaxed URI validation. + if ( cmdLine.idMap.containsKey( A_LAX ) ) { + options.uriProfile = UriProfile.RFC3986_ABS_16BIT_LAX; + System.out.println("Using relaxed URI validation for ARC URL and WARC Target-URI."); + } + + // XML validation. + if ( cmdLine.idMap.containsKey( A_XML ) ) { + options.validatorPlugins.add(new XmlValidatorPlugin()); + } + + // Tag. + if ( cmdLine.idMap.containsKey( A_BAD ) ) { + options.bBad = true; + System.out.println("Tagging enabled for invalid files"); + } + + // After. + argument = cmdLine.idMap.get( A_AFTER ); + if ( argument != null && argument.value != null ) { + try { + DateFormat dateFormat = new SimpleDateFormat("yyyyMMddHHmmss"); + dateFormat.setLenient(false); + Date afterDate = dateFormat.parse(argument.value); + options.after = afterDate.getTime(); + } catch (ParseException e) { + System.out.println("Invalid date format - " + argument.value); + System.exit( 1 ); + } + } + + // Files. + argument = cmdLine.idMap.get( JWATTools.A_FILES ); + options.filesList = argument.values; + + return options; } } diff --git a/src/main/java/org/jwat/tools/tasks/test/TestTaskCLIParser.java b/src/main/java/org/jwat/tools/tasks/test/TestTaskCLIParser.java deleted file mode 100644 index f8457f7..0000000 --- a/src/main/java/org/jwat/tools/tasks/test/TestTaskCLIParser.java +++ /dev/null @@ -1,124 +0,0 @@ -package org.jwat.tools.tasks.test; - -import java.text.DateFormat; -import java.text.ParseException; -import java.text.SimpleDateFormat; -import java.util.Date; - -import org.jwat.common.UriProfile; -import org.jwat.tools.JWATTools; -import org.jwat.tools.validators.XmlValidatorPlugin; - -import com.antiaction.common.cli.Argument; -import com.antiaction.common.cli.ArgumentParser; -import com.antiaction.common.cli.ArgumentParserException; -import com.antiaction.common.cli.CommandLine; -import com.antiaction.common.cli.Options; - -public class TestTaskCLIParser { - - public static final int A_AFTER = 101; - public static final int A_BAD = 102; - public static final int A_SHOW_ERRORS = 103; - public static final int A_IGNORE_DIGEST = 104; - public static final int A_HTTP_HEADER_ERRORS = 105; - public static final int A_LAX = 106; - public static final int A_XML = 107; - - protected TestTaskCLIParser() { - } - - public static TestOptions parseArguments(CommandLine cmdLine) { - Options cliOptions = new Options(); - try { - cliOptions.addOption(null, "--queue-first", JWATTools.A_QUEUE_FIRST, 0, null); - cliOptions.addOption("-w", "--workers", JWATTools.A_WORKERS, 0, null).setValueRequired(); - cliOptions.addOption("-a", null, A_AFTER, 0, null).setValueRequired(); - cliOptions.addOption("-b", null, A_BAD, 0, null); - cliOptions.addOption("-e", null, A_SHOW_ERRORS, 0, null); - cliOptions.addOption("-h", null, A_HTTP_HEADER_ERRORS, 0, null); - cliOptions.addOption("-i", "--ignore-digest", A_IGNORE_DIGEST, 0, null); - cliOptions.addOption("-l", null, A_LAX, 0, null); - cliOptions.addOption("-x", null, A_XML, 0, null); - cliOptions.addNamedArgument("files", JWATTools.A_FILES, 1, Integer.MAX_VALUE); - cmdLine = ArgumentParser.parse(cmdLine.argsArray, cliOptions, cmdLine); - } - catch (ArgumentParserException e) { - System.out.println( TestTaskCLIParser.class.getName() + ": " + e.getMessage() ); - System.exit( 1 ); - } - - TestOptions options = new TestOptions(); - - Argument argument; - - // Queue first. - options.bQueueFirst = cmdLine.idMap.containsKey(JWATTools.A_QUEUE_FIRST); - options.bHttpHeaderErrors = cmdLine.idMap.containsKey(A_HTTP_HEADER_ERRORS); - - // Thread workers. - argument = cmdLine.idMap.get( JWATTools.A_WORKERS ); - if ( argument != null && argument.value != null ) { - try { - options.threads = Integer.parseInt(argument.value); - } catch (NumberFormatException e) { - System.out.println( "Invalid number of threads requested: " + argument.value ); - System.exit( 1 ); - } - } - if ( options.threads < 1 ) { - System.out.println( "Invalid number of threads requested: " + options.threads ); - System.exit( 1 ); - } - - // Show errors. - if ( cmdLine.idMap.containsKey( A_SHOW_ERRORS ) ) { - options.bShowErrors = true; - } - System.out.println("Showing errors: " + options.bShowErrors); - - // Ignore digest. - if ( cmdLine.idMap.containsKey( A_IGNORE_DIGEST ) ) { - options.bValidateDigest = false; - } - System.out.println("Validate digest: " + options.bValidateDigest); - - // Relaxed URI validation. - if ( cmdLine.idMap.containsKey( A_LAX ) ) { - options.uriProfile = UriProfile.RFC3986_ABS_16BIT_LAX; - System.out.println("Using relaxed URI validation for ARC URL and WARC Target-URI."); - } - - // XML validation. - if ( cmdLine.idMap.containsKey( A_XML ) ) { - options.validatorPlugins.add(new XmlValidatorPlugin()); - } - - // Tag. - if ( cmdLine.idMap.containsKey( A_BAD ) ) { - options.bBad = true; - System.out.println("Tagging enabled for invalid files"); - } - - // After. - argument = cmdLine.idMap.get( A_AFTER ); - if ( argument != null && argument.value != null ) { - try { - DateFormat dateFormat = new SimpleDateFormat("yyyyMMddHHmmss"); - dateFormat.setLenient(false); - Date afterDate = dateFormat.parse(argument.value); - options.after = afterDate.getTime(); - } catch (ParseException e) { - System.out.println("Invalid date format - " + argument.value); - System.exit( 1 ); - } - } - - // Files. - argument = cmdLine.idMap.get( JWATTools.A_FILES ); - options.filesList = argument.values; - - return options; - } - -} diff --git a/src/main/java/org/jwat/tools/tasks/unchunk/UnchunkOptions.java b/src/main/java/org/jwat/tools/tasks/unchunk/UnchunkOptions.java new file mode 100644 index 0000000..33b50cd --- /dev/null +++ b/src/main/java/org/jwat/tools/tasks/unchunk/UnchunkOptions.java @@ -0,0 +1,9 @@ +package org.jwat.tools.tasks.unchunk; + +import java.util.List; + +public class UnchunkOptions { + + public List filesList; + +} diff --git a/src/main/java/org/jwat/tools/tasks/unchunk/UnchunkTask.java b/src/main/java/org/jwat/tools/tasks/unchunk/UnchunkTask.java new file mode 100644 index 0000000..745a5f7 --- /dev/null +++ b/src/main/java/org/jwat/tools/tasks/unchunk/UnchunkTask.java @@ -0,0 +1,76 @@ +package org.jwat.tools.tasks.unchunk; + +import java.io.File; +import java.io.IOException; +import java.io.RandomAccessFile; + +import org.jwat.common.DigestInputStreamChunkedNoSkip; +import org.jwat.common.RandomAccessFileInputStream; +import org.jwat.common.RandomAccessFileOutputStream; +import org.jwat.tools.tasks.AbstractTask; + +public class UnchunkTask extends AbstractTask { + + @SuppressWarnings("unused") + private UnchunkOptions options; + + public UnchunkTask() { + } + + public void runtask(UnchunkOptions options) { + this.options = options; + filelist_feeder( options.filesList, this ); + } + + @Override + public void process(File srcFile) { + RandomAccessFile rafin = null; + RandomAccessFileInputStream rafis = null; + DigestInputStreamChunkedNoSkip dis = null; + RandomAccessFile rafout = null; + RandomAccessFileOutputStream rafos = null; + try { + rafin = new RandomAccessFile(srcFile, "r"); + rafis = new RandomAccessFileInputStream(rafin); + rafout = new RandomAccessFile(srcFile + ".unchunked", "rw"); + rafos = new RandomAccessFileOutputStream(rafout); + dis = new DigestInputStreamChunkedNoSkip(rafis, null, null, rafos); + long remaining = rafin.length(); + long skipped; + while (remaining > 0) { + skipped = dis.skip(remaining); + if (skipped > 0) { + remaining -= skipped; + } + } + dis.close(); + rafos.close(); + rafis.close(); + rafout.close(); + rafin.close(); + } + catch (IOException e) { + e.printStackTrace(); + } + int state; + if (dis != null) { + state = dis.getState(); + switch (state) { + case DigestInputStreamChunkedNoSkip.S_LENGTH: + case DigestInputStreamChunkedNoSkip.S_LENGTH_CR: + case DigestInputStreamChunkedNoSkip.S_LENGTH_LF: + case DigestInputStreamChunkedNoSkip.S_CHUNK_CR: + case DigestInputStreamChunkedNoSkip.S_CHUNK_LF: + case DigestInputStreamChunkedNoSkip.S_END_CR: + case DigestInputStreamChunkedNoSkip.S_END_LF: + break; + case DigestInputStreamChunkedNoSkip.S_DONE: + break; + case DigestInputStreamChunkedNoSkip.S_ERROR: + break; + } + dis.getOverflow(); + } + } + +} diff --git a/src/main/java/org/jwat/tools/tasks/unchunk/UnchunkTaskCLI.java b/src/main/java/org/jwat/tools/tasks/unchunk/UnchunkTaskCLI.java new file mode 100644 index 0000000..d22f2ec --- /dev/null +++ b/src/main/java/org/jwat/tools/tasks/unchunk/UnchunkTaskCLI.java @@ -0,0 +1,68 @@ +package org.jwat.tools.tasks.unchunk; + +import java.util.LinkedList; + +import org.jwat.tools.JWATTools; +import org.jwat.tools.tasks.TaskCLI; + +import com.antiaction.common.cli.Argument; +import com.antiaction.common.cli.ArgumentParser; +import com.antiaction.common.cli.ArgumentParserException; +import com.antiaction.common.cli.CommandLine; +import com.antiaction.common.cli.Options; + +public class UnchunkTaskCLI extends TaskCLI { + + public static final String commandName = "unchunk"; + + public static final String commandDescription = "unchunk file(s) containing only chunked transfter encoded data"; + + @Override + public void show_help() { + System.out.println("FileTools v" + JWATTools.getVersionString()); + System.out.println("jwattools [-o] unchunk ..."); + System.out.println(""); + System.out.println("Unchunk one or more files which consist of chunked transfer encoded data files."); + System.out.println("This command only handles the chunked data."); + System.out.println("All encapsulating headers or similar must be removed before using this command."); + System.out.println("This includes removing headers for ARC, WARC, HTTP etc."); + System.out.println("Use of this is mostly for debugging purposes."); + System.out.println(""); + System.out.println("options:"); + System.out.println(""); + System.out.println("none"); + System.out.println(""); + } + + @Override + public void runtask(CommandLine cmdLine) { + UnchunkTask task = new UnchunkTask(); + UnchunkOptions options = parseArguments(cmdLine); + task.runtask(options); + } + + public static UnchunkOptions parseArguments(CommandLine cmdLine) { + Options cliOptions = new Options(); + try { + cliOptions.addNamedArgument( "files", JWATTools.A_FILES, 1, 1 ); + cmdLine = ArgumentParser.parse(cmdLine.argsArray, cliOptions, cmdLine); + } + catch (ArgumentParserException e) { + System.out.println( UnchunkTaskCLI.class.getName() + ": " + e.getMessage() ); + System.exit( 1 ); + } + + UnchunkOptions options = new UnchunkOptions(); + + Argument argument; + + // Files + argument = cmdLine.idMap.get( JWATTools.A_FILES ); + options.filesList = new LinkedList(); + //options.filesList = argument.values; + options.filesList.add( argument.value ); + + return options; + } + +} diff --git a/src/main/java/org/jwat/tools/tasks/UnpackOptions.java b/src/main/java/org/jwat/tools/tasks/unpack/UnpackOptions.java similarity index 70% rename from src/main/java/org/jwat/tools/tasks/UnpackOptions.java rename to src/main/java/org/jwat/tools/tasks/unpack/UnpackOptions.java index ec9f197..bcb0beb 100644 --- a/src/main/java/org/jwat/tools/tasks/UnpackOptions.java +++ b/src/main/java/org/jwat/tools/tasks/unpack/UnpackOptions.java @@ -1,4 +1,4 @@ -package org.jwat.tools.tasks; +package org.jwat.tools.tasks.unpack; import java.util.List; diff --git a/src/main/java/org/jwat/tools/tasks/UnpackTask.java b/src/main/java/org/jwat/tools/tasks/unpack/UnpackTask.java similarity index 93% rename from src/main/java/org/jwat/tools/tasks/UnpackTask.java rename to src/main/java/org/jwat/tools/tasks/unpack/UnpackTask.java index 90349c1..365ede2 100644 --- a/src/main/java/org/jwat/tools/tasks/UnpackTask.java +++ b/src/main/java/org/jwat/tools/tasks/unpack/UnpackTask.java @@ -1,4 +1,4 @@ -package org.jwat.tools.tasks; +package org.jwat.tools.tasks.unpack; import java.io.BufferedInputStream; import java.io.File; @@ -11,6 +11,7 @@ import org.jwat.common.RandomAccessFileInputStream; import org.jwat.gzip.GzipEntry; import org.jwat.gzip.GzipReader; +import org.jwat.tools.tasks.AbstractTask; // TODO public class UnpackTask extends AbstractTask { diff --git a/src/main/java/org/jwat/tools/tasks/UnpackTaskCLIParser.java b/src/main/java/org/jwat/tools/tasks/unpack/UnpackTaskCLI.java similarity index 54% rename from src/main/java/org/jwat/tools/tasks/UnpackTaskCLIParser.java rename to src/main/java/org/jwat/tools/tasks/unpack/UnpackTaskCLI.java index 1b7b291..f34b7b5 100644 --- a/src/main/java/org/jwat/tools/tasks/UnpackTaskCLIParser.java +++ b/src/main/java/org/jwat/tools/tasks/unpack/UnpackTaskCLI.java @@ -1,16 +1,31 @@ -package org.jwat.tools.tasks; +package org.jwat.tools.tasks.unpack; import org.jwat.tools.JWATTools; +import org.jwat.tools.tasks.TaskCLI; import com.antiaction.common.cli.Argument; -import com.antiaction.common.cli.ArgumentParserException; import com.antiaction.common.cli.ArgumentParser; +import com.antiaction.common.cli.ArgumentParserException; import com.antiaction.common.cli.CommandLine; import com.antiaction.common.cli.Options; -public class UnpackTaskCLIParser { +public class UnpackTaskCLI extends TaskCLI { + + public static final String commandName = "unpack"; + + public static final String commandDescription = "unpack multifile GZip"; + + @Override + public void show_help() { + System.out.println("FileTools v" + JWATTools.getVersionString()); + System.out.println("Work in progress..."); + } - protected UnpackTaskCLIParser() { + @Override + public void runtask(CommandLine cmdLine) { + UnpackTask task = new UnpackTask(); + UnpackOptions options = parseArguments(cmdLine); + task.runtask(options); } public static UnpackOptions parseArguments(CommandLine cmdLine) { @@ -20,7 +35,7 @@ public static UnpackOptions parseArguments(CommandLine cmdLine) { cmdLine = ArgumentParser.parse(cmdLine.argsArray, cliOptions, cmdLine); } catch (ArgumentParserException e) { - System.out.println( UnpackTaskCLIParser.class.getName() + ": " + e.getMessage() ); + System.out.println( UnpackTaskCLI.class.getName() + ": " + e.getMessage() ); System.exit( 1 ); } diff --git a/src/test/java/org/jwat/tools/tasks/arc2warc/TestArc2WarcTaskCLIParser.java b/src/test/java/org/jwat/tools/tasks/arc2warc/TestArc2WarcTaskCLI.java similarity index 89% rename from src/test/java/org/jwat/tools/tasks/arc2warc/TestArc2WarcTaskCLIParser.java rename to src/test/java/org/jwat/tools/tasks/arc2warc/TestArc2WarcTaskCLI.java index f663fa9..eb935bf 100644 --- a/src/test/java/org/jwat/tools/tasks/arc2warc/TestArc2WarcTaskCLIParser.java +++ b/src/test/java/org/jwat/tools/tasks/arc2warc/TestArc2WarcTaskCLI.java @@ -15,7 +15,7 @@ import com.antiaction.common.cli.CommandLine; @RunWith(JUnit4.class) -public class TestArc2WarcTaskCLIParser { +public class TestArc2WarcTaskCLI { private SecurityManager securityManager; @@ -35,7 +35,7 @@ public void test_arc2warctask_cli_parser() { CommandLine cmdLine; Arc2WarcOptions options; - Arc2WarcTaskCLIParser object = new Arc2WarcTaskCLIParser(); + Arc2WarcTaskCLI object = new Arc2WarcTaskCLI(); Assert.assertNotNull(object); Object[][] cases = new Object[][] { @@ -85,7 +85,7 @@ public void test_arc2warctask_cli_parser() { System.out.println(i); cmdLine = new CommandLine(); cmdLine.argsArray = (String[])cases[ i ][ 0 ]; - options = Arc2WarcTaskCLIParser.parseArguments(cmdLine); + options = Arc2WarcTaskCLI.parseArguments(cmdLine); Assert.assertEquals(cases[ i ][ 1 ], options.threads); Assert.assertEquals(cases[ i ][ 2 ], options.destDir); Assert.assertEquals(cases[ i ][ 3 ], options.prefix); @@ -101,7 +101,7 @@ public void test_arc2warctask_cli_parser() { try { cmdLine = new CommandLine(); cmdLine.argsArray = new String[] {}; - options = Arc2WarcTaskCLIParser.parseArguments(cmdLine); + options = Arc2WarcTaskCLI.parseArguments(cmdLine); Assert.fail("Exception expected!"); } catch (ExitException e) { @@ -110,7 +110,7 @@ public void test_arc2warctask_cli_parser() { try { cmdLine = new CommandLine(); cmdLine.argsArray = new String[] {"-w", "42"}; - options = Arc2WarcTaskCLIParser.parseArguments(cmdLine); + options = Arc2WarcTaskCLI.parseArguments(cmdLine); Assert.fail("Exception expected!"); } catch (ExitException e) { @@ -119,7 +119,7 @@ public void test_arc2warctask_cli_parser() { try { cmdLine = new CommandLine(); cmdLine.argsArray = new String[] {"-w", "0", "file"}; - options = Arc2WarcTaskCLIParser.parseArguments(cmdLine); + options = Arc2WarcTaskCLI.parseArguments(cmdLine); Assert.fail("Exception expected!"); } catch (ExitException e) { @@ -128,7 +128,7 @@ public void test_arc2warctask_cli_parser() { try { cmdLine = new CommandLine(); cmdLine.argsArray = new String[] {"-w", "fourtytwo", "file"}; - options = Arc2WarcTaskCLIParser.parseArguments(cmdLine); + options = Arc2WarcTaskCLI.parseArguments(cmdLine); Assert.fail("Exception expected!"); } catch (ExitException e) { diff --git a/src/test/java/org/jwat/tools/tasks/cdx/TestCDXTaskCLIParser.java b/src/test/java/org/jwat/tools/tasks/cdx/TestCDXTaskCLIParser.java index 7960f31..b3395d1 100644 --- a/src/test/java/org/jwat/tools/tasks/cdx/TestCDXTaskCLIParser.java +++ b/src/test/java/org/jwat/tools/tasks/cdx/TestCDXTaskCLIParser.java @@ -35,7 +35,7 @@ public void test_cdxtask_cli_parser() { CommandLine cmdLine; CDXOptions options; - CDXTaskCLIParser object = new CDXTaskCLIParser(); + CDXTaskCLI object = new CDXTaskCLI(); Assert.assertNotNull(object); Object[][] cases = new Object[][] { @@ -69,7 +69,7 @@ public void test_cdxtask_cli_parser() { for (int i=0; i fileList = options.filesList; @@ -72,7 +72,7 @@ public void test_changedtask_cli_parser() { try { cmdLine = new CommandLine(); cmdLine.argsArray = new String[] {}; - options = ChangedTaskCLIParser.parseArguments(cmdLine); + options = ChangedTaskCLI.parseArguments(cmdLine); Assert.fail("Exception expected!"); } catch (ExitException e) { @@ -81,7 +81,7 @@ public void test_changedtask_cli_parser() { try { cmdLine = new CommandLine(); cmdLine.argsArray = new String[] {"-o", "outfile"}; - options = ChangedTaskCLIParser.parseArguments(cmdLine); + options = ChangedTaskCLI.parseArguments(cmdLine); Assert.fail("Exception expected!"); } catch (ExitException e) { diff --git a/src/test/java/org/jwat/tools/tasks/compress/TestCompressTaskCLIParser.java b/src/test/java/org/jwat/tools/tasks/compress/TestCompressTaskCLI.java similarity index 92% rename from src/test/java/org/jwat/tools/tasks/compress/TestCompressTaskCLIParser.java rename to src/test/java/org/jwat/tools/tasks/compress/TestCompressTaskCLI.java index fb34cd0..3cd3f53 100644 --- a/src/test/java/org/jwat/tools/tasks/compress/TestCompressTaskCLIParser.java +++ b/src/test/java/org/jwat/tools/tasks/compress/TestCompressTaskCLI.java @@ -15,7 +15,7 @@ import com.antiaction.common.cli.CommandLine; @RunWith(JUnit4.class) -public class TestCompressTaskCLIParser { +public class TestCompressTaskCLI { private SecurityManager securityManager; @@ -35,7 +35,7 @@ public void test_compresstask_cli_parser() { CommandLine cmdLine; CompressOptions options; - CompressTaskCLIParser object = new CompressTaskCLIParser(); + CompressTaskCLI object = new CompressTaskCLI(); Assert.assertNotNull(object); Object[][] cases = new Object[][] { @@ -134,7 +134,7 @@ null, new File("listfile1"), new String[] {"file12"} for (int i=0; i fileList = options.filesList; @@ -76,7 +76,7 @@ public void test_decompresstask_cli_parser() { try { cmdLine = new CommandLine(); cmdLine.argsArray = new String[] {}; - options = DecompressTaskCLIParser.parseArguments(cmdLine); + options = DecompressTaskCLI.parseArguments(cmdLine); Assert.fail("Exception expected!"); } catch (ExitException e) { @@ -85,7 +85,7 @@ public void test_decompresstask_cli_parser() { try { cmdLine = new CommandLine(); cmdLine.argsArray = new String[] {"-w", "8"}; - options = DecompressTaskCLIParser.parseArguments(cmdLine); + options = DecompressTaskCLI.parseArguments(cmdLine); Assert.fail("Exception expected!"); } catch (ExitException e) { @@ -94,7 +94,7 @@ public void test_decompresstask_cli_parser() { try { cmdLine = new CommandLine(); cmdLine.argsArray = new String[] {"-w", "0", "file"}; - options = DecompressTaskCLIParser.parseArguments(cmdLine); + options = DecompressTaskCLI.parseArguments(cmdLine); Assert.fail("Exception expected!"); } catch (ExitException e) { @@ -103,7 +103,7 @@ public void test_decompresstask_cli_parser() { try { cmdLine = new CommandLine(); cmdLine.argsArray = new String[] {"-w", "fourtytwo", "file"}; - options = DecompressTaskCLIParser.parseArguments(cmdLine); + options = DecompressTaskCLI.parseArguments(cmdLine); Assert.fail("Exception expected!"); } catch (ExitException e) { diff --git a/src/test/java/org/jwat/tools/tasks/delete/TestDeleteTaskCLIParser.java b/src/test/java/org/jwat/tools/tasks/delete/TestDeleteTaskCLI.java similarity index 89% rename from src/test/java/org/jwat/tools/tasks/delete/TestDeleteTaskCLIParser.java rename to src/test/java/org/jwat/tools/tasks/delete/TestDeleteTaskCLI.java index b733407..a7572d7 100644 --- a/src/test/java/org/jwat/tools/tasks/delete/TestDeleteTaskCLIParser.java +++ b/src/test/java/org/jwat/tools/tasks/delete/TestDeleteTaskCLI.java @@ -15,7 +15,7 @@ import com.antiaction.common.cli.CommandLine; @RunWith(JUnit4.class) -public class TestDeleteTaskCLIParser { +public class TestDeleteTaskCLI { private SecurityManager securityManager; @@ -35,7 +35,7 @@ public void test_deletetask_cli_parser() { CommandLine cmdLine; DeleteOptions options; - DeleteTaskCLIParser object = new DeleteTaskCLIParser(); + DeleteTaskCLI object = new DeleteTaskCLI(); Assert.assertNotNull(object); Object[][] cases = new Object[][] { @@ -64,7 +64,7 @@ true, new File("output-file2"), for (int i=0; i