diff --git a/Sources/DataAccess/DataTableBuilder.cs b/Sources/DataAccess/DataTableBuilder.cs
index 0601d80..1cf4aed 100644
--- a/Sources/DataAccess/DataTableBuilder.cs
+++ b/Sources/DataAccess/DataTableBuilder.cs
@@ -115,11 +115,11 @@ public static MutableDataTable GetMutableCopy(this DataTableBuilder builder, Dat
/// ignored
/// filename of table to load. Schema is inferred from header row.
/// a in-memory table containing the topN rows from the supplied file.
- public static MutableDataTable ReadSampleTopN(this DataTableBuilder builder, string filename)
+ public static MutableDataTable ReadSampleTopN(this DataTableBuilder builder, string filename, char columnSeparator = default (char))
{
- return ReadSampleTopN(builder, filename, 100);
+ return ReadSampleTopN(builder, filename, columnSeparator, 100);
}
-
+
///
/// Return an in-memory table that contains the topN rows from the table in the filename.
///
@@ -127,7 +127,7 @@ public static MutableDataTable ReadSampleTopN(this DataTableBuilder builder, str
/// filename of table to load. Schema is inferred from header row.
/// reads the topN rows from the table.
/// a in-memory table containing the topN rows from the supplied file.
- public static MutableDataTable ReadSampleTopN(this DataTableBuilder builder, string filename, int topN = 100)
+ public static MutableDataTable ReadSampleTopN(this DataTableBuilder builder, string filename, char columnSeparator = default(char), int topN = 100)
{
Debug.Assert(builder != null);
if (filename == null)
@@ -135,7 +135,7 @@ public static MutableDataTable ReadSampleTopN(this DataTableBuilder builder, str
throw new ArgumentNullException("filename");
}
- DataTable source = new FileStreamingDataTable(filename);
+ DataTable source = new FileStreamingDataTable(filename, columnSeparator);
MutableDataTable dt = Analyze.SampleTopN(source, topN);
return dt;
}
@@ -147,11 +147,11 @@ public static MutableDataTable ReadSampleTopN(this DataTableBuilder builder, str
///
/// filename of CSV to read
/// a streaming data table for the given filename
- public static DataTable ReadLazy(this DataTableBuilder builder, string filename)
+ public static DataTable ReadLazy(this DataTableBuilder builder, string filename, char columnSeparator = default(char))
{
Debug.Assert(builder != null);
- return new FileStreamingDataTable(filename) { Name = filename };
+ return new FileStreamingDataTable(filename, columnSeparator) { Name = filename };
}
///
@@ -161,14 +161,13 @@ public static DataTable ReadLazy(this DataTableBuilder builder, string filename)
///
/// input stream. Must be seekable and readable
/// a streaming data table for the given filename
- public static DataTable ReadLazy(this DataTableBuilder builder, Stream inputStream)
+ public static DataTable ReadLazy(this DataTableBuilder builder, Stream inputStream, char columnSeparator = default(char))
{
Debug.Assert(builder != null);
- return new StreamingDataTable(inputStream);
+ return new StreamingDataTable(inputStream, columnSeparator);
}
-
///
/// Create an in-memory table with 2 columns (key and value), where each row is a KeyValuePair from the dictionary.
///
diff --git a/Sources/DataAccess/Readers.cs b/Sources/DataAccess/Readers.cs
index dd3879e..dc396ef 100644
--- a/Sources/DataAccess/Readers.cs
+++ b/Sources/DataAccess/Readers.cs
@@ -1,8 +1,10 @@
using System;
using System.Collections.Generic;
using System.IO;
+using System.Linq;
using System.Text;
+
namespace DataAccess
{
@@ -225,24 +227,15 @@ public static MutableDataTable Read(TextReader stream, char delimiter = '\0')
public static char GuessSeparateFromHeaderRow(string header)
{
- if (header.Contains("\t"))
- {
- return '\t';
- }
-
- if (header.Contains(","))
- {
- return ',';
- }
-
- if (header.Contains(";"))
- {
- return ';';
- }
-
- // Fallback is always comma. This implies a single column.
- return ',';
-
+ var validSeparators = new[] { '\t', ',', ';' };
+
+ var firstSeparator =
+ (from x in validSeparators.Select(c => new { separator = c, index = header.IndexOf(c) })
+ where x.index >= 0
+ orderby x.index
+ select x.separator).ToList();
+
+ return firstSeparator.Any() ? firstSeparator.FirstOrDefault() : ',';
}
// Read in a Ascii file that uses the given separate characters.
diff --git a/Sources/DataAccess/StreamingDataTable.cs b/Sources/DataAccess/StreamingDataTable.cs
index 875c579..fcfb13a 100644
--- a/Sources/DataAccess/StreamingDataTable.cs
+++ b/Sources/DataAccess/StreamingDataTable.cs
@@ -15,7 +15,8 @@ internal class StreamingDataTable : TextReaderDataTable
{
readonly Stream _input;
- public StreamingDataTable(Stream input)
+ public StreamingDataTable(Stream input, char columnSeparator)
+ : base(columnSeparator)
{
// We could optimize to avoid requiring CanSeek if we failed on attemps
// to read the the rows multiple times.
@@ -30,7 +31,7 @@ protected override TextReader OpenText()
{
_input.Position = 0;
-
+
return new StreamReader(_input);
}
protected override void CloseText(TextReader reader)
@@ -43,8 +44,9 @@ protected override void CloseText(TextReader reader)
internal class FileStreamingDataTable : TextReaderDataTable
{
private readonly string _filename;
-
- public FileStreamingDataTable(string filename)
+
+ public FileStreamingDataTable(string filename, char columnSeparator)
+ : base(columnSeparator)
{
_filename = filename;
}
@@ -65,9 +67,15 @@ protected override void CloseText(TextReader reader)
///
internal abstract class TextReaderDataTable : DataTable
{
+ private readonly char columnSeparator;
+
private string[] _names;
-
+ protected TextReaderDataTable(char columnSeparator)
+ {
+ this.columnSeparator = columnSeparator;
+ }
+
public override IEnumerable ColumnNames
{
get
@@ -77,10 +85,10 @@ public override IEnumerable ColumnNames
TextReader sr = null;
try
{
- sr = this.OpenText();
+ sr = this.OpenText();
// First get columns.
string header = sr.ReadLine();
- char ch = Reader.GuessSeparateFromHeaderRow(header);
+ char ch = this.columnSeparator == default(char) ? Reader.GuessSeparateFromHeaderRow(header) : this.columnSeparator;
_names = Reader.split(header, ch);
}
finally
@@ -100,7 +108,7 @@ public override IEnumerable ColumnNames
// called on reader from OpenText
// Don't call dipose because that can close streams.
protected abstract void CloseText(TextReader reader);
-
+
public override IEnumerable Rows
{
get
@@ -113,7 +121,7 @@ public override IEnumerable Rows
sr = this.OpenText();
string header = sr.ReadLine(); // skip past header
- char chSeparator = Reader.GuessSeparateFromHeaderRow(header);
+ char chSeparator = this.columnSeparator == default(char) ? Reader.GuessSeparateFromHeaderRow(header) : this.columnSeparator;
int illegal = 0;
string line;
@@ -124,7 +132,7 @@ public override IEnumerable Rows
{
string[] parts = Reader.split(line, chSeparator);
-
+
// $$$ Major hack for dealing with newlines in quotes strings.
// The better fix here would be to switch to a streaming interface.
if (parts.Length != columnCount)