simonsfoundation · AaronWatters · Jul 20, 2016 · Jul 20, 2016 · Jul 20, 2016 · Jul 20, 2016
diff --git a/.travis.yml b/.travis.yml
@@ -26,13 +26,15 @@ install:
   # install R components
   - sudo R -f inferelator_ng/R_code/packages.R
 script:
+  - export TRAVIS_TESTING=noninteractive
   - python -c "import os; print(repr(os.name))"
   - coverage run --source=inferelator_ng setup.py test
   #- nosetests
 after_success:
   - codecov
 after_failure:
   - pwd
+  - which Rscript
   - find .
   - cat ./inferelator_ng/tests/artifacts/run_mi.R
   - R -f ./inferelator_ng/tests/artifacts/run_mi.R
diff --git a/Inferelator/README b/Inferelator/README
@@ -0,0 +1,132 @@
+NOTE:
+
+This directory contains a subset of https://github.com/ChristophH/Inferelator
+for testing and comparison purposes.
+
+Call the inferelator script from the base directory (the one containing this 
+README) with a job config file as argument. 
+
+Example call: Rscript inferelator.R jobs/dream4_cfg.R
+
+
+
+--------------------------------------------------------------------------------
+Default parameters and a brief explanation of each one
+--------------------------------------------------------------------------------
+
+PARS$input.dir <- 'input/dream4'  # path to the input files
+
+PARS$exp.mat.file <- 'expression.tsv'  # required; see definition below
+PARS$tf.names.file <- 'tf_names.tsv'  # required; see definition below
+PARS$meta.data.file <- 'meta_data.tsv'  # assume all steady state if NULL
+PARS$priors.file <- 'gold_standard.tsv'  # no priors if NULL
+PARS$gold.standard.file <- 'gold_standard.tsv'  # no evaluation if NULL
+PARS$leave.out.file <- NULL  # file with list of conditions that will be ignored
+PARS$randomize.expression <- FALSE  # whether to scramble input expression
+
+PARS$job.seed <- 42  # random seed; can be NULL
+PARS$save.to.dir <- file.path(PARS$input.dir, date.time.str)  # output directory
+PARS$num.boots <- 20  # number of bootstraps; no bootstrapping with a value of 1
+PARS$max.preds <- 10  # max number of predictors based on CLR to pass to model
+                      # selection method
+PARS$mi.bins <- 10  # number of bins to use for mutual information calculation
+PARS$cores <- 8  # number of cpu cores
+
+PARS$delT.max <- 110  # max number of time units allowed between time series 
+                      # conditions
+PARS$delT.min <- 0  # min number of time units allowed between time series 
+                    # conditions
+PARS$tau <- 45  # constant related to half life of mRNA (see Core model)
+
+PARS$perc.tp <- 0  # percent of true priors that will be used; can be vector
+PARS$perm.tp <- 1  # number of permutations of true priors
+PARS$perc.fp <- 0  # percent of false priors (100 = as many false priors as 
+                   # there are true priors); can be vector
+PARS$perm.fp <- 1  # number of permutations of false priors
+PARS$pr.sel.mode <- 'random'  # prior selection mode: 'random' or 'tf'
+                              # if 'random', the true priors are randomly chosen
+                              # from all priors edges, if 'tf', 
+                              # PARS$perc.tp is interpreted as the percent of
+                              # TFs to use for true priors and all interactions
+                              # for the chosen TFs will be used
+
+PARS$eval.on.subset <- FALSE  # whether to evaluate only on the part of the 
+                              # network that has connections in the gold
+                              # standard; if TRUE false priors will only be 
+                              # drawn from that part of the network
+
+PARS$method <- 'BBSR'  # which method to use; either 'MEN' or 'BBSR'
+PARS$prior.weight <- 1  # the weight for the priors; has to be larger than 1
+                        # for priors to have an effect
+
+PARS$use.tfa <- FALSE  # whether to estimate transcription factor activities and
+                       # use those in the regression models
+                       # if TRUE, interactions in priors file shoud be signed,
+                       # i.e. -1 for repression and +1 for activation
+PARS$prior.ss <- FALSE # whether to also sub-sample from the prior matrix during
+                       # each bootstrap; if TRUE, priors are sampled randomly with 
+                       # replacement; if FALSE, all priors are used as is
+
+PARS$output.summary <- TRUE  # write a summary tsv and RData file of network
+
+PARS$output.report <- TRUE  # create html network report
+
+PARS$output.tf.plots <- TRUE  # create png files with plots of TFs and targets
+
+--------------------------------------------------------------------------------
+Required Input Files
+--------------------------------------------------------------------------------
+
+expression.tsv
+--------------
+expression values; must include row (genes) and column (conditions) names
+
+tf_names.tsv
+------------
+one TF name on each line; must be subset of the row names of the expression data
+
+
+
+--------------------------------------------------------------------------------
+Optional Input Files
+--------------------------------------------------------------------------------
+
+meta_data.tsv
+-------------
+the meta data describing the conditions; must include column names;
+has five columns:
+isTs: TRUE if the condition is part of a time-series, FALSE else
+is1stLast: "e" if not part of a time-series; "f" if first; "m" middle; "l" last
+prevCol: name of the preceding condition in time-series; NA if "e" or "f"
+del.t: time in minutes since prevCol; NA if "e" or "f"
+condName: name of the condition
+
+priors.tsv
+----------
+matrix of 0 and 1 indicating whether we have prior knowledge in 
+the interaction of one TF and a gene; one row for each gene, one column for 
+each TF; must include row (genes) and column (TF) names
+
+gold_standard.tsv
+-----------------
+needed for validation; matrix of 0 and 1 indicating whether there is an 
+interaction between one TF and a gene; one row for each gene, one column for 
+each TF; must include row (genes) and column (TF) names
+
+
+
+--------------------------------------------------------------------------------
+Output Files
+--------------------------------------------------------------------------------
+
+One or more betas_frac_tp_X_perm_X--frac_fp_X_perm_X_X.RData files. One file
+per true and false prior and prior weight combination. Each RData file contains
+two lists of length PARS$num.boots where every entry is a matrix of betas and
+confidence scores (rescaled betas) respectively.
+
+One or more combinedconf_frac_tp_X_perm_X--frac_fp_X_perm_X_X.RData files with
+one matrix each. The matrix is the rank-combined version of the confidence
+scores of all bootstraps.
+
+A params_and_input.RData file with data objects holding the user set parameters,
+and input and input-derived objects.