From fcafb586f5f4e86c1d263b36a304c4b2d3f06839 Mon Sep 17 00:00:00 2001 From: Adrian Dumascu Date: Sun, 25 Aug 2019 06:59:14 -0700 Subject: [PATCH] A few errors discovered while reading the book. --- pom.xml | 2 +- src/main/java/com/oreilly/dswj/dataops/MatrixResampler.java | 4 ---- src/main/java/com/oreilly/dswj/datasets/Iris.java | 2 +- .../dswj/mapreduce/CustomWordCountMapReduceExample.java | 3 ++- .../java/com/oreilly/dswj/mapreduce/SparseMatrixWritable.java | 2 +- 5 files changed, 5 insertions(+), 8 deletions(-) diff --git a/pom.xml b/pom.xml index f949de3..324c119 100644 --- a/pom.xml +++ b/pom.xml @@ -2,7 +2,7 @@ 4.0.0 com.oreilly.javabook - Dava_Science_with_Java + Data_Science_with_Java 1.0-SNAPSHOT jar diff --git a/src/main/java/com/oreilly/dswj/dataops/MatrixResampler.java b/src/main/java/com/oreilly/dswj/dataops/MatrixResampler.java index 47075a4..26dd0b5 100644 --- a/src/main/java/com/oreilly/dswj/dataops/MatrixResampler.java +++ b/src/main/java/com/oreilly/dswj/dataops/MatrixResampler.java @@ -67,10 +67,6 @@ public void calculateTestTrainSplit(double testFraction, long seed) { Random rnd = new Random(seed); -// for (int i = 1; i <= features.getRowDimension(); i++) { - for (int i = 0; i < features.getRowDimension(); i++) { - indeces.add(i); - } Collections.shuffle(indeces, rnd); int testSize = new Long(Math.round(testFraction * features.getRowDimension())).intValue(); diff --git a/src/main/java/com/oreilly/dswj/datasets/Iris.java b/src/main/java/com/oreilly/dswj/datasets/Iris.java index 99da17e..58fbbe3 100644 --- a/src/main/java/com/oreilly/dswj/datasets/Iris.java +++ b/src/main/java/com/oreilly/dswj/datasets/Iris.java @@ -64,7 +64,7 @@ public Iris() throws IOException { labels.setEntry(rowCounter, 1, 1); break; case "Iris-virginica": - labels.setEntry(rowCounter, 3, 1); + labels.setEntry(rowCounter, 2, 1); break; default: System.out.println("something wrong with " + plantClass); diff --git a/src/main/java/com/oreilly/dswj/mapreduce/CustomWordCountMapReduceExample.java b/src/main/java/com/oreilly/dswj/mapreduce/CustomWordCountMapReduceExample.java index 284612c..3c3c98e 100644 --- a/src/main/java/com/oreilly/dswj/mapreduce/CustomWordCountMapReduceExample.java +++ b/src/main/java/com/oreilly/dswj/mapreduce/CustomWordCountMapReduceExample.java @@ -23,6 +23,7 @@ import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.mapreduce.lib.reduce.IntSumReducer; +import org.apache.hadoop.mapreduce.lib.reduce.LongSumReducer; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; @@ -53,7 +54,7 @@ public int run(String[] args) throws Exception { job.setMapperClass(SimpleTokenMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(LongWritable.class); - job.setReducerClass(IntSumReducer.class); + job.setReducerClass(LongSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); job.setNumReduceTasks(1); diff --git a/src/main/java/com/oreilly/dswj/mapreduce/SparseMatrixWritable.java b/src/main/java/com/oreilly/dswj/mapreduce/SparseMatrixWritable.java index 0e17b34..4420283 100644 --- a/src/main/java/com/oreilly/dswj/mapreduce/SparseMatrixWritable.java +++ b/src/main/java/com/oreilly/dswj/mapreduce/SparseMatrixWritable.java @@ -40,7 +40,7 @@ public SparseMatrixWritable(int rowIndex, int columnIndex, double entry) { @Override public void write(DataOutput d) throws IOException { d.writeInt(rowIndex); - d.writeInt(rowIndex); + d.writeInt(columnIndex); d.writeDouble(entry); }