Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
49 commits
Select commit Hold shift + click to select a range
cfd4dc6
cloud6 config
Feb 9, 2021
24851c4
Initial implem
jlegriel Feb 25, 2021
45ddb34
98% tp and 37% cm
jlegriel Mar 2, 2021
6b4cbb2
Implement changes explained by Bertil: variant if frequency > 20%
jlegriel Mar 2, 2021
5d3738f
Reporting all variants > 20% frequency
jlegriel Mar 2, 2021
d155195
Remove the filter, free memory of variant
jlegriel Mar 2, 2021
ff64289
Changes to also select the best score when no pair with matching
jlegriel Mar 10, 2021
aa0df86
Implementation of mapq score
jlegriel Mar 17, 2021
f9f6f77
Add some filters and read of quality information
jlegriel Mar 26, 2021
0e33e26
Saving current state. Bug fixed for using indels for frequency table.
jlegriel Mar 31, 2021
0182090
Several changes/refactoring. Regression on chr3 need to debug
jlegriel Apr 14, 2021
fcade21
Saving current dev status. Workaround for SW algo issue. Going to int…
jlegriel May 20, 2021
7a5391b
Integrate Dominique's fix. Seems to remove all missed indels on chr3.…
jlegriel May 20, 2021
7affce1
Fix the issue by retrieving previous code to read SW result. And do n…
jlegriel May 24, 2021
7323e03
last fix from dominique in SW plus some debug. Still some issue in ho…
jlegriel Jun 2, 2021
a62d0b9
A bit of cosmetics
jlegriel Nov 19, 2021
53e3e71
Disable debug code.
jlegriel Nov 22, 2021
eb87cfe
created a debugging and logging framework for read mappings
amoisson Nov 23, 2021
417aea7
improved read mapping debugging framework and used better constants f…
amoisson Dec 7, 2021
988d57e
replaced '=' with nucleotide in read mapping file
amoisson Dec 8, 2021
6949534
fixed case issue in mapping file
amoisson Dec 8, 2021
9802377
made host-side filters more permissive
amoisson Dec 10, 2021
f5dc5d6
added a whole bunch of comments in DPD and DPD_compute functions
amoisson Dec 10, 2021
50eacea
modified processread giving 93% performance on chr22
amoisson Jan 14, 2022
93c352d
fixed backtrack_end offset
amoisson Jan 19, 2022
ac3b976
fixed small bug + added more debug prints
amoisson Jan 21, 2022
fa700ec
some bug fixes and profiling mixed together
amoisson Feb 1, 2022
c511ed7
98.8% common match \o/
amoisson Feb 1, 2022
2668efc
fixed max score bug
amoisson Feb 3, 2022
5629652
now using affine filter
amoisson Mar 4, 2022
0899449
Added a way to dump the frequency table in a bin file and extract par…
amoisson Mar 11, 2022
2bbb39f
fixed frequency table dumping
amoisson Mar 18, 2022
7a09e4f
added a variants codependence info for more context aware vc
amoisson Apr 5, 2022
64e38bb
fixed deallocation
amoisson Apr 18, 2022
595c3f3
better constants
amoisson Apr 26, 2022
365bb77
fixed mapping file for chromosomes beyond chr1; now using single mapping
amoisson May 5, 2022
d570f60
updated some constants to the best found yet for single-mapping
amoisson May 6, 2022
ecd2216
Merge branch 'sdk-2021.4' into improve-variant-calling
amoisson May 17, 2022
5cef587
reduced max_score back to 40. It seems to have improved both algorith…
amoisson May 18, 2022
0ecebfa
back to 120 read size for integration purposes
amoisson May 19, 2022
2179062
removed a few ugly gotos and commented out code
amoisson May 19, 2022
080bb41
fixed a TODO or two
amoisson May 19, 2022
00f8a00
made filters adapt to more read sizes
amoisson May 30, 2022
12ef73c
removed unused code and some deprecated TODOs
amoisson May 30, 2022
4b85cb6
made profiling optionnal and removed some useless code
amoisson May 31, 2022
f546d33
fixed no freq table issues
amoisson Jul 20, 2022
53e8cd2
added debug read mapping to no-freq-table
amoisson Jul 20, 2022
153d013
variant tree only allocated if frequency table not used
amoisson Sep 9, 2022
9b06ee0
added INFO logs for more allocs
amoisson Sep 9, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 21 additions & 4 deletions common/inc/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,13 +36,14 @@ typedef uint32_t delta_info_t;
* @brief Coordonates of the read that matched in the reference genome.
*/
typedef struct {
union {
uint64_t coord;
//union {
//uint64_t coord;
struct {
uint32_t seed_nr;
uint32_t seq_nr;
uint32_t seq_nr:31;
uint32_t nodp:1;
};
};
//};
} dpu_result_coord_t;

/**
Expand Down Expand Up @@ -119,4 +120,20 @@ typedef struct {
uint8_t nbr[ALIGN_DPU(SIZE_NEIGHBOUR_IN_BYTES)];
} coords_and_nbr_t;


// configuration for variant calling using frequency table

// to activate use of reads with indels
#define USE_INDEL
// to activate use of mapq score
//#define USE_MAPQ_SCORE

// various parameters/thresholds
#define DIST_PAIR_THRESHOLD 1
#define DIST_SINGLE_THRESHOLD 0
#define MAPQ_SCALING_FACTOR 2
#define READ_DIST_LOWER_BOUND 50
#define READ_DIST_UPPER_BOUND 2000
#define depth_filter depth_filter3

#endif /* __COMMON_H__ */
3 changes: 2 additions & 1 deletion dpu/inc/dout.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,9 @@ void dout_init(unsigned int tid, dout_t *dout);
* @param seed_nr Recorded seed number.
* @param seq_nr Recorded sequence number.
* @param stats To update statistical report.
* @param nodp True if the result was from nodp, false if from odpd.
*/
void dout_add(dout_t *dout, uint32_t num, unsigned int score, uint32_t seed_nr, uint32_t seq_nr, dpu_tasklet_stats_t *stats);
void dout_add(dout_t *dout, uint32_t num, unsigned int score, uint32_t seed_nr, uint32_t seq_nr, dpu_tasklet_stats_t *stats, uint8_t nodp);

/**
* @brief locates a swap page for a given data out structure.
Expand Down
3 changes: 2 additions & 1 deletion dpu/src/dout.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ void dout_init(unsigned int tid, dout_t *dout)
dout_clear(dout);
}

void dout_add(dout_t *dout, uint32_t num, unsigned int score, uint32_t seed_nr, uint32_t seq_nr, dpu_tasklet_stats_t *stats)
void dout_add(dout_t *dout, uint32_t num, unsigned int score, uint32_t seed_nr, uint32_t seq_nr, dpu_tasklet_stats_t *stats, uint8_t nodp)
{
dpu_result_out_t *new_out;
if (dout->nb_cached_out == MAX_LOCAL_RESULTS_PER_READ) {
Expand All @@ -49,6 +49,7 @@ void dout_add(dout_t *dout, uint32_t num, unsigned int score, uint32_t seed_nr,
new_out->score = score;
new_out->coord.seed_nr = seed_nr;
new_out->coord.seq_nr = seq_nr;
new_out->coord.nodp = nodp;

dout->nb_cached_out++;
dout->nb_results++;
Expand Down
2 changes: 1 addition & 1 deletion dpu/src/odpd_opt.S
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
#define COST_SUB 10
#define COST_GAPO 11
#define COST_GAPE 1
#define COST_INIT 99
#define COST_INIT 999

#define LINE_SIZE ( 6*4 )
#define d0off ( 0*4 )
Expand Down
14 changes: 12 additions & 2 deletions dpu/src/task.c
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,11 @@ __host dpu_compute_time_t DPU_COMPUTE_TIME_VAR;
/**
* @brief Maximum score allowed.
*/
#define MAX_SCORE (40)
#if SIZE_READ>120
#define MAX_SCORE 40
#else
#define MAX_SCORE 40
#endif

/**
* @brief Number of reference read to be fetch per mram read
Expand Down Expand Up @@ -121,6 +125,10 @@ static void compare_neighbours(sysname_t tasklet_id, uint32_t *mini, coords_and_
STATS_STORE_NODP_TIME(tasklet_stats, (end + acc - start));
STATS_INCR_NB_NODP_CALLS(*tasklet_stats);

bool nodp = true;

//TODO uncomment for indel
#ifdef USE_INDEL
if (score_nodp == UINT_MAX) {
STATS_GET_START_TIME(start, acc, end);

Expand All @@ -129,7 +137,9 @@ static void compare_neighbours(sysname_t tasklet_id, uint32_t *mini, coords_and_
STATS_GET_END_TIME(end, acc);
STATS_STORE_ODPD_TIME(tasklet_stats, (end + acc - start));
STATS_INCR_NB_ODPD_CALLS(*tasklet_stats);
nodp = false;
}
#endif

if (score > *mini) {
return;
Expand All @@ -148,7 +158,7 @@ static void compare_neighbours(sysname_t tasklet_id, uint32_t *mini, coords_and_
}

dout_add(dout, request->num, (unsigned int)score, cached_coords_and_nbr->coord.seed_nr, cached_coords_and_nbr->coord.seq_nr,
tasklet_stats);
tasklet_stats, nodp);
}

static void compute_request(sysname_t tasklet_id, coords_and_nbr_t *cached_coords_and_nbr, uint8_t *current_read_nbr,
Expand Down
2 changes: 1 addition & 1 deletion host/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ file(GLOB_RECURSE SOURCES src/*.c)
add_executable(upvc ${SOURCES})

target_include_directories(upvc PUBLIC "${DPU_HOST_INCLUDE_DIRECTORIES}" inc/ ../common/inc/)
target_link_libraries(upvc ${DPU_HOST_LIBRARIES} pthread)
target_link_libraries(upvc ${DPU_HOST_LIBRARIES} pthread m)

set(NB_DPU_MARK)
if (NB_DPU)
Expand Down
3 changes: 2 additions & 1 deletion host/inc/accumulateread.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,15 @@
#define __ACCUMULATEREAD_H__

#include "common.h"
#include <stdbool.h>

typedef struct {
nb_result_t nb_res;
dpu_result_out_t *results;
} acc_results_t;

acc_results_t *accumulate_get_buffer(unsigned int dpu_id, unsigned int pass_id);
acc_results_t accumulate_get_result(unsigned int pass_id);
acc_results_t accumulate_get_result(unsigned int pass_id, bool free_results);

void accumulate_read(unsigned int pass_id, unsigned int dpu_offset);

Expand Down
84 changes: 84 additions & 0 deletions host/inc/debug.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
#ifndef __DEBUG_H__
#define __DEBUG_H__
#include <time.h>

#define V_QUIET 0
#define V_FATAL 1
#define V_ERROR 2
#define V_WARN 3
#define V_INFO 4
#define V_DEBUG 5
#define V_TRACE 6

#define VERBOSE V_WARN
#define VERBOSE_COLORS true
#define VERBOSE_LOG_LEVEL true
#define VERBOSE_TIMESTAMP true

#if VERBOSE_COLORS
#define VERBOSE_COLOR_START_FATAL "\033[41m"
#define VERBOSE_COLOR_START_ERROR "\033[31m"
#define VERBOSE_COLOR_START_WARN "\033[33m"
#define VERBOSE_COLOR_START_INFO "\033[32m"
#define VERBOSE_COLOR_START_DEBUG "\033[34m"
#define VERBOSE_COLOR_START_TRACE "\033[36m"
#define VERBOSE_COLOR_END "\033[0m"
#else
#define VERBOSE_COLOR_START_FATAL
#define VERBOSE_COLOR_START_ERROR
#define VERBOSE_COLOR_START_WARN
#define VERBOSE_COLOR_START_INFO
#define VERBOSE_COLOR_START_DEBUG
#define VERBOSE_COLOR_START_TRACE
#define VERBOSE_COLOR_END
#endif

#if VERBOSE_LOG_LEVEL
#define VERBOSE_PRINT_PREFIX(level) VERBOSE_COLOR_START_##level #level "\t" VERBOSE_COLOR_END
#else
#define VERBOSE_PRINT_PREFIX(level)
#endif

#if VERBOSE_TIMESTAMP
#define VERBOSE_PRINT_TIMESTAMP() float t= (float) clock()/CLOCKS_PER_SEC; fprintf(stderr, "(%02d:%02d:%02.3f)", (int) (t/3600), (int) (t/60)%60, (float) ((int) (t*1000)%60000)/1000.);
#else
#define VERBOSE_PRINT_TIMESTAMP()
#endif

#if VERBOSE>=V_TRACE
#define LOG_TRACE(string, ...) {VERBOSE_PRINT_TIMESTAMP() fprintf(stderr, VERBOSE_PRINT_PREFIX(TRACE) string, ## __VA_ARGS__);}
#else
#define LOG_TRACE(...)
#endif

#if VERBOSE>=V_DEBUG
#define LOG_DEBUG(string, ...) {VERBOSE_PRINT_TIMESTAMP() fprintf(stderr, VERBOSE_PRINT_PREFIX(DEBUG) string, ## __VA_ARGS__);}
#else
#define LOG_DEBUG(...)
#endif

#if VERBOSE>=V_INFO
#define LOG_INFO(string, ...) {VERBOSE_PRINT_TIMESTAMP() fprintf(stderr, VERBOSE_PRINT_PREFIX(INFO) string, ## __VA_ARGS__);}
#else
#define LOG_INFO(...)
#endif

#if VERBOSE>=V_WARN
#define LOG_WARN(string, ...) {VERBOSE_PRINT_TIMESTAMP() fprintf(stderr, VERBOSE_PRINT_PREFIX(WARN) string, ## __VA_ARGS__);}
#else
#define LOG_WARN(...)
#endif

#if VERBOSE>=V_ERROR
#define LOG_ERROR(string, ...) {VERBOSE_PRINT_TIMESTAMP() fprintf(stderr, VERBOSE_PRINT_PREFIX(ERROR) string, ## __VA_ARGS__);}
#else
#define LOG_ERROR(...)
#endif

#if VERBOSE>=V_FATAL
#define LOG_FATAL(string, ...) {VERBOSE_PRINT_TIMESTAMP() fprintf(stderr, VERBOSE_PRINT_PREFIX(FATAL) string, ## __VA_ARGS__);}
#else
#define LOG_FATAL(...)
#endif

#endif // __DEBUG_H__
36 changes: 32 additions & 4 deletions host/inc/genome.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,19 +6,20 @@
#define __GENOME_H__

#include <stdint.h>
#include <pthread.h>

#define MAX_SEQ_GEN (24) // max number of chromosomes
#define MAX_SEQ_NAME_SIZE (8)

typedef struct {
uint32_t magic;
uint32_t version;
uint32_t nb_seq;
uint64_t pt_seq[MAX_SEQ_GEN];
uint64_t len_seq[MAX_SEQ_GEN];
uint32_t nb_seq; // nb of chromosomes (24)
uint64_t pt_seq[MAX_SEQ_GEN]; // offset of each chromosome in data
uint64_t len_seq[MAX_SEQ_GEN]; // length of chromosome in data
uint64_t fasta_file_size;
char seq_name[MAX_SEQ_GEN][MAX_SEQ_NAME_SIZE];
int8_t *data;
int8_t *data; //genome de reference 1B = 1 nucleotide
int32_t *mapping_coverage;
} genome_t;

Expand All @@ -30,4 +31,31 @@ void genome_free();

genome_t *genome_get();

struct frequency_info {

float freq;
unsigned int score;
unsigned int unsure_score;
};

#pragma pack(push,1)
struct variants_codependence_info {
int16_t key;
uint8_t codependence_count;
};

#define COD_LIST_SIZE 4
struct variants_codependence_info_list {
struct variants_codependence_info_list* next_list;
struct variants_codependence_info content[COD_LIST_SIZE];
};
#pragma pack(pop)

void add_codependence_info(struct variants_codependence_info_list** next_variants_info_list, int16_t other_index_delta, uint8_t current_letter, uint8_t other_letter, unsigned int genome_size, pthread_mutex_t* mutex);

struct frequency_info** get_frequency_table();
struct variants_codependence_info_list** get_codependence_table(pthread_mutex_t* mutex);
void free_frequency_table();
void free_codependence_chunks();

#endif /* __GENOME_H__ */
1 change: 1 addition & 0 deletions host/inc/getread.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
int get_reads_in_buffer(unsigned int pass_id);

int8_t *get_reads_buffer(unsigned int pass_id);
float *get_reads_quality_buffer(unsigned int pass_id);

void get_reads(FILE *fpe1, FILE *fpe2, unsigned int pass_id);

Expand Down
16 changes: 16 additions & 0 deletions host/inc/mapping_file.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
/**
* Copyright 2021 - A Moisson-Franckhauser & UPMEM
*/
#ifndef __SAM_H__
#define __SAM_H__

#include "processread.h"

void open_mapping_file();
//TODO : either reuse this code or delete it
//void write_mapping_read(uint64_t genome_pos, uint8_t *code, int8_t *read);
void write_read_mapping_from_backtrack(char *chromosome_name, uint64_t genome_pos, backtrack_t *backtrack_end, int8_t *read, int read_id);
void write_read_mapping(char *chromosome_name, uint64_t genome_pos, uint8_t *code, uint8_t *read);
void close_mapping_file();

#endif /* __SAM_H__ */
2 changes: 2 additions & 0 deletions host/inc/parse_args.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ unsigned int get_nb_thread_for_simu();

bool get_index_with_dpus();

bool get_use_frequency_table();

/**
* @brief Parse and validate the argument of the application.
*/
Expand Down
14 changes: 14 additions & 0 deletions host/inc/processread.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,20 @@

#include <stdio.h>

typedef struct {
int type;
int ix;
int jx;
} backtrack_t;


#define CODE_MATCH 0
#define CODE_SUB 10
#define CODE_DEL 11
#define CODE_INS 12
#define CODE_END 13
#define CODE_ERR 14

void process_read(FILE *fpe1, FILE *fpe2, int round, unsigned int pass_id);

void process_read_init();
Expand Down
Loading