Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 11 additions & 19 deletions csv_parser.c
Original file line number Diff line number Diff line change
Expand Up @@ -225,24 +225,28 @@ char* read_full_record(FILE *file, Arena *arena) {
return NULL;
}

char *record = malloc(1024);
if (!record) {
size_t record_capacity = 1024;
void *arena_ptr;
ArenaResult result = arena_alloc(arena, record_capacity, &arena_ptr);
if (result != ARENA_OK) {
return NULL;
}

char *record = (char*)arena_ptr;
size_t record_len = 0;
size_t record_capacity = 1024;
bool in_quotes = false;
int c;

while ((c = fgetc(file)) != EOF) {
if (record_len >= record_capacity - 1) {
size_t new_capacity = record_capacity * 2;
char *new_record = realloc(record, new_capacity);
if (!new_record) {
free(record);
void *new_ptr;
ArenaResult grow_result = arena_alloc(arena, new_capacity, &new_ptr);
if (grow_result != ARENA_OK) {
return NULL;
}
char *new_record = (char*)new_ptr;
memcpy(new_record, record, record_len);
record = new_record;
record_capacity = new_capacity;
}
Expand Down Expand Up @@ -282,22 +286,10 @@ char* read_full_record(FILE *file, Arena *arena) {
}

if (record_len == 0 && c == EOF) {
free(record);
return NULL;
}

record[record_len] = '\0';

void *arena_ptr;
ArenaResult result = arena_alloc(arena, record_len + 1, &arena_ptr);
if (result != ARENA_OK) {
free(record);
return NULL;
}

char *arena_record = (char*)arena_ptr;
memcpy(arena_record, record, record_len + 1);
free(record);

return arena_record;
return record;
}
127 changes: 112 additions & 15 deletions csv_reader.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@
#include "csv_parser.h"
#include "arena.h"

CSVReader* csv_reader_init_with_config(Arena *arena, CSVConfig *config) {
CSVReader* csv_reader_init_with_config(Arena *persistent_arena, Arena *temp_arena, CSVConfig *config) {
void *ptr;
ArenaResult result = arena_alloc(arena, sizeof(CSVReader), &ptr);
ArenaResult result = arena_alloc(persistent_arena, sizeof(CSVReader), &ptr);
if (result != ARENA_OK) {
return NULL;
}
Expand All @@ -18,18 +18,91 @@ CSVReader* csv_reader_init_with_config(Arena *arena, CSVConfig *config) {
return NULL;
}

reader->arena = arena;
reader->persistent_arena = persistent_arena;
reader->temp_arena = temp_arena;
reader->config = config;
reader->headers_loaded = false;
reader->cached_header_count = 0;
reader->cached_headers = NULL;
reader->line_number = 0;
reader->current_record = NULL;
reader->owns_arenas = false;

if (config->hasHeader) {
char *line = read_full_record(reader->file, arena);
char *line = read_full_record(reader->file, reader->persistent_arena);
if (line) {
reader->line_number++;
CSVParseResult result = csv_parse_line_inplace(line, arena, config, reader->line_number);
CSVParseResult result = csv_parse_line_inplace(line, reader->persistent_arena, config, reader->line_number);
if (result.success) {
reader->cached_headers = result.fields.fields;
reader->cached_header_count = result.fields.count;
reader->headers_loaded = true;
}
}
}

return reader;
}

CSVReader* csv_reader_init_standalone(CSVConfig *config) {
if (!config) {
return NULL;
}

Arena *persistent_arena = malloc(sizeof(Arena));
Arena *temp_arena = malloc(sizeof(Arena));

if (!persistent_arena || !temp_arena) {
if (persistent_arena) free(persistent_arena);
if (temp_arena) free(temp_arena);
return NULL;
}

ArenaResult p_result = arena_create(persistent_arena, 1024 * 1024);
ArenaResult t_result = arena_create(temp_arena, 1024 * 1024);

if (p_result != ARENA_OK || t_result != ARENA_OK) {
if (p_result == ARENA_OK) arena_destroy(persistent_arena);
if (t_result == ARENA_OK) arena_destroy(temp_arena);
free(persistent_arena);
free(temp_arena);
return NULL;
}

CSVReader *reader = malloc(sizeof(CSVReader));
if (!reader) {
arena_destroy(persistent_arena);
arena_destroy(temp_arena);
free(persistent_arena);
free(temp_arena);
return NULL;
}

reader->file = fopen(config->path, "r");
if (!reader->file) {
arena_destroy(persistent_arena);
arena_destroy(temp_arena);
free(persistent_arena);
free(temp_arena);
free(reader);
return NULL;
}

reader->persistent_arena = persistent_arena;
reader->temp_arena = temp_arena;
reader->config = config;
reader->headers_loaded = false;
reader->cached_header_count = 0;
reader->cached_headers = NULL;
reader->line_number = 0;
reader->current_record = NULL;
reader->owns_arenas = true;

if (config->hasHeader) {
char *line = read_full_record(reader->file, reader->persistent_arena);
if (line) {
reader->line_number++;
CSVParseResult result = csv_parse_line_inplace(line, reader->persistent_arena, config, reader->line_number);
if (result.success) {
reader->cached_headers = result.fields.fields;
reader->cached_header_count = result.fields.count;
Expand All @@ -46,33 +119,55 @@ CSVRecord* csv_reader_next_record(CSVReader *reader) {
return NULL;
}

char *line = read_full_record(reader->file, reader->arena);
arena_reset(reader->temp_arena);

char *line = read_full_record(reader->file, reader->temp_arena);
if (!line) {
return NULL;
}

reader->line_number++;
CSVParseResult result = csv_parse_line_inplace(line, reader->arena, reader->config, reader->line_number);
CSVParseResult result = csv_parse_line_inplace(line, reader->temp_arena, reader->config, reader->line_number);
if (!result.success) {
return NULL;
}

void *ptr;
ArenaResult arena_result = arena_alloc(reader->arena, sizeof(CSVRecord), &ptr);
ArenaResult arena_result = arena_alloc(reader->temp_arena, sizeof(CSVRecord), &ptr);
if (arena_result != ARENA_OK) {
return NULL;
}

CSVRecord *record = (CSVRecord*)ptr;
record->fields = result.fields.fields;
record->field_count = result.fields.count;
reader->current_record = record;

return record;
}

void csv_reader_free(CSVReader *reader) {
if (reader) {
if (reader->file) {
fclose(reader->file);
reader->file = NULL;
}

if (reader->owns_arenas) {
if (reader->persistent_arena) {
arena_destroy(reader->persistent_arena);
free(reader->persistent_arena);
reader->persistent_arena = NULL;
}
if (reader->temp_arena) {
arena_destroy(reader->temp_arena);
free(reader->temp_arena);
reader->temp_arena = NULL;
}
free(reader);
} else {
reader->persistent_arena = NULL;
reader->temp_arena = NULL;
}
}
}
Expand All @@ -97,21 +192,22 @@ void csv_reader_rewind(CSVReader *reader) {
reader->line_number = 0;

if (reader->config->hasHeader && reader->headers_loaded) {
char *line = read_full_record(reader->file, reader->arena);
char *line = read_full_record(reader->file, reader->persistent_arena);
if (line) {
reader->line_number = 1;
}
}
}
}

int csv_reader_set_config(CSVReader *reader, Arena *arena, const CSVConfig *config) {
if (!reader || !config || !arena) {
int csv_reader_set_config(CSVReader *reader, Arena *persistent_arena, Arena *temp_arena, const CSVConfig *config) {
if (!reader || !config || !persistent_arena || !temp_arena) {
return 0;
}

reader->config = (CSVConfig*)config;
reader->arena = arena;
reader->persistent_arena = persistent_arena;
reader->temp_arena = temp_arena;
return 1;
}

Expand All @@ -130,15 +226,15 @@ long csv_reader_get_record_count(CSVReader *reader) {
long record_count = 0;

if (reader->config && reader->config->hasHeader) {
char *header_line = read_full_record(reader->file, reader->arena);
char *header_line = read_full_record(reader->file, reader->persistent_arena);
if (!header_line) {
fseek(reader->file, current_pos, SEEK_SET);
return 0;
}
}

while (1) {
char *line = read_full_record(reader->file, reader->arena);
char *line = read_full_record(reader->file, reader->persistent_arena);
if (!line) {
break;
}
Expand Down Expand Up @@ -180,7 +276,8 @@ int csv_reader_seek(CSVReader *reader, long position) {
csv_reader_rewind(reader);

for (long i = 0; i < position; i++) {
char *line = read_full_record(reader->file, reader->arena);
arena_reset(reader->temp_arena);
char *line = read_full_record(reader->file, reader->temp_arena);
if (!line) {
return 0;
}
Expand Down
14 changes: 9 additions & 5 deletions csv_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,21 +12,25 @@ typedef struct {

typedef struct {
FILE *file;
Arena *arena;
CSVConfig *config;
Arena *persistent_arena;
Arena *temp_arena;
bool headers_loaded;
size_t cached_header_count;
int cached_header_count;
char **cached_headers;
int line_number;
long line_number;
CSVRecord *current_record;
bool owns_arenas;
} CSVReader;

CSVReader* csv_reader_init_with_config(Arena *arena, CSVConfig *config);
CSVReader* csv_reader_init_with_config(Arena *persistent_arena, Arena *temp_arena, CSVConfig *config);
CSVReader* csv_reader_init_standalone(CSVConfig *config);
void csv_reader_free(CSVReader *reader);
CSVRecord* csv_reader_next_record(CSVReader *reader);


void csv_reader_rewind(CSVReader *reader);
int csv_reader_set_config(CSVReader *reader, Arena *arena, const CSVConfig *config);
int csv_reader_set_config(CSVReader *reader, Arena *persistent_arena, Arena *temp_arena, const CSVConfig *config);
long csv_reader_get_record_count(CSVReader *reader);
long csv_reader_get_position(CSVReader *reader);
char** csv_reader_get_headers(CSVReader *reader, int *header_count);
Expand Down
Loading
Loading