diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index a00af99..93e43f3 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -7,13 +7,13 @@ jobs: build: runs-on: ubuntu-latest steps: - - name: Set up Go 1.x - uses: actions/setup-go@v2 + - name: Set up Go + uses: actions/setup-go@v5 with: - go-version: ^1.15 + go-version: '1.24.x' - name: Check out code into the Go module directory - uses: actions/checkout@v2 + uses: actions/checkout@v4 - name: Update pip run: pip install --upgrade pip @@ -31,13 +31,13 @@ jobs: name: lint runs-on: ubuntu-latest steps: - - uses: actions/setup-go@v3 + - uses: actions/setup-go@v5 with: - go-version: 1.21 - - uses: actions/checkout@v3 + go-version: '1.24.x' + - uses: actions/checkout@v4 - name: golangci-lint - uses: golangci/golangci-lint-action@v3 + uses: golangci/golangci-lint-action@v7 with: # Optional: version of golangci-lint to use in form of v1.2 or v1.2.3 or `latest` to use the latest version - version: v1.54 + version: v2.10.1 # args: --timeout 2m diff --git a/.golangci.yml b/.golangci.yml index 0bbd141..29f0c3b 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -1,27 +1,30 @@ -run: - skip-files: - - ".*bindata.go$" - - ".*pb.go" - - ".*pb.gw.go" +version: "2" +run: timeout: 5m -issues: - exclude: - - "not declared by package utf8" - - "unicode/utf8/utf8.go" - linters: - # Disable all linters. - # Default: false - disable-all: true + default: none # Enable specific linter # https://golangci-lint.run/usage/linters/#enabled-by-default enable: - - gofmt - - goimports - misspell - - typecheck - - gosimple - - govet \ No newline at end of file + - govet + exclusions: + paths: + - ".*bindata.go$" + - ".*pb.go" + - ".*pb.gw.go" + rules: + - text: "not declared by package utf8" + linters: + - govet + - path: "unicode/utf8/utf8.go" + linters: + - govet + +formatters: + enable: + - gofmt + - goimports \ No newline at end of file diff --git a/Makefile b/Makefile index e8b4980..4e3c39c 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ -SIFTER_VERSION=0.1.5 +SIFTER_VERSION=0.2.0 #hack to get around submodule weirdness in automated docker builds hub-build: @@ -30,5 +30,3 @@ test: .TEST .TEST: go test ./test -docs: - @go run docschema/main.go | ./docschema/schema-to-markdown.py > Playbook.md diff --git a/Playbook.md b/Playbook.md deleted file mode 100644 index d21bce5..0000000 --- a/Playbook.md +++ /dev/null @@ -1,1216 +0,0 @@ -# Introduction -SIFTER is an Extract Transform Load (ETL) platform that is designed to take -a variety of standard input sources, create a message streams and run a -set of transforms to create JSON schema validated output classes. -SIFTER is based based on implementing a Playbook that describes top level -Extractions, that can include downloads, file manipulation and finally reading -the contents of the files. Every extractor is meant to produce a stream of -MESSAGES for transformation. A message is a simple nested dictionary data structure. - -Example Message: - -``` -{ - "firstName" : "bob", - "age" : "25" - "friends" : [ "Max", "Alex"] -} -``` - -Once a stream of messages are produced, that can be run through a TRANSFORM -pipeline. A transform pipeline is an array of transform steps, each transform -step can represent a different way to alter the data. The array of transforms link -togeather into a pipe that makes multiple alterations to messages as they are -passed along. There are a number of different transform steps types that can -be done in a transform pipeline these include: - - - Projection - - Filtering - - Programmatic transformation - - Table based field translation - - Outputing the message as a JSON Schema checked object - - -*** -# Example Playbook -Our first task will be to convert a ZIP code TSV into a set of county level -entries. - -The input file looks like: - -``` -ZIP,COUNTYNAME,STATE,STCOUNTYFP,CLASSFP -36003,Autauga County,AL,01001,H1 -36006,Autauga County,AL,01001,H1 -36067,Autauga County,AL,01001,H1 -36066,Autauga County,AL,01001,H1 -36703,Autauga County,AL,01001,H1 -36701,Autauga County,AL,01001,H1 -36091,Autauga County,AL,01001,H1 -``` - -First is the header of the Playbook. This declares the -unique name of the playbook and it's output directory. - -``` -name: zipcode_map -outdir: ./ -docs: Converts zipcode TSV into graph elements -``` - -Next the configuration is declared. In this case the only input is the zipcode TSV. -There is a default value, so the playbook can be invoked without passing in -any parameters. However, to apply this playbook to a new input file, the -input parameter `zipcode` could be used to define the source file. - -``` -config: - schema: - type: Dir - default: ../covid19_datadictionary/gdcdictionary/schemas/ - zipcode: - type: File - default: ../data/ZIP-COUNTY-FIPS_2017-06.csv -``` - -The `inputs` section declares data input sources. In this playbook, there is -only one input, which is to run the table loader. -``` -inputs: - tableLoad: - input: "{{config.zipcode}}" - sep: "," -``` - -Tableload operaters of the input file that was originally passed in using the -`inputs` stanza. SIFTER string parsing is based on mustache template system. -To access the string passed in the template is `{{config.zipcode}}`. -The seperator in the file input file is a `,` so that is also passed in as a -parameter to the extractor. - - -The `tableLoad` extractor opens up the TSV and generates a one message for -every row in the file. It uses the header of the file to map the column values -into a dictionary. The first row would produce the message: - -``` -{ - "ZIP" : "36003", - "COUNTYNAME" : "Autauga County", - "STATE" : "AL", - "STCOUNTYFP" : "01001", - "CLASSFP" : "H1" -} -``` - -The stream of messages are then passed into the steps listed in the `transform` -section of the tableLoad extractor. - -For the current tranform, we want to produce a single entry per `STCOUNTYFP`, -however, the file has a line per `ZIP`. We need to run a `reduce` transform, -that collects rows togeather using a field key, which in this case is `"{{row.STCOUNTYFP}}"`, -and then runs a function `merge` that takes two messages, merges them togeather -and produces a single output message. - -The two messages: - -``` -{ "ZIP" : "36003", "COUNTYNAME" : "Autauga County", "STATE" : "AL", "STCOUNTYFP" : "01001", "CLASSFP" : "H1"} -{ "ZIP" : "36006", "COUNTYNAME" : "Autauga County", "STATE" : "AL", "STCOUNTYFP" : "01001", "CLASSFP" : "H1"} -``` - -Would be merged into the message: - -``` -{ "ZIP" : ["36003", "36006"], "COUNTYNAME" : "Autauga County", "STATE" : "AL", "STCOUNTYFP" : "01001", "CLASSFP" : "H1"} -``` - -The `reduce` transform step uses a block of python code to describe the function. -The `method` field names the function, in this case `merge` that will be used -as the reduce function. - -``` - zipReduce: - - from: zipcode - - reduce: - field: STCOUNTYFP - method: merge - python: > - def merge(x,y): - a = x.get('zipcodes', []) + [x['ZIP']] - b = y.get('zipcodes', []) + [y['ZIP']] - x['zipcodes'] = a + b - return x -``` - -The original messages produced by the loader have all of the information required -by the `summary_location` object type as described by the JSON schema that was linked -to in the header stanza. However, the data is all under the wrong field names. -To remap the data, we use a `project` tranformation that uses the template engine -to project data into new files in the message. The template engine has the current -message data in the value `row`. So the value -`FIPS:{{row.STCOUNTYFP}}` is mapped into the field `id`. - -``` - - project: - mapping: - id: "FIPS:{{row.STCOUNTYFP}}" - province_state: "{{row.STATE}}" - summary_locations: "{{row.STCOUNTYFP}}" - county: "{{row.COUNTYNAME}}" - submitter_id: "{{row.STCOUNTYFP}}" - type: summary_location - projects: [] -``` - -Using this projection, the message: - -``` -{ - "ZIP" : ["36003", "36006"], - "COUNTYNAME" : "Autauga County", - "STATE" : "AL", - "STCOUNTYFP" : "01001", - "CLASSFP" : "H1" -} -``` - -would become - -``` -{ - "id" : "FIPS:01001", - "province_state" : "AL", - "summary_locations" : "01001", - "county" : "Autauga County", - "submitter_id" : "01001", - "type" : "summary_location" - "projects" : [], - "ZIP" : ["36003", "36006"], - "COUNTYNAME" : "Autauga County", - "STATE" : "AL", - "STCOUNTYFP" : "01001", - "CLASSFP" : "H1" -} -``` - -Now that the data has been remapped, we pass the data into the 'objectCreate' -transformation, which will read in the schema for `summary_location`, check the -message to make sure it matches and then output it. - -``` - - objectCreate: - class: summary_location -``` - - -Outputs - -To create an output table, with two columns connecting -`ZIP` values to `STCOUNTYFP` values. The `STCOUNTYFP` is a county level FIPS -code, used by the census office. A single FIPS code my contain many ZIP codes, -and we can use this table later for mapping ids when loading the data into a database. - -``` -outputs: - zip2fips: - tableWrite: - from: - output: zip2fips - columns: - - ZIP - - STCOUNTYFP -``` - - -*** -# File Format -A Playbook is a YAML file, that links a schema to a series of extractors that -in turn, can run several transforms to emit objects that are checked against -the schema. - - -*** -## Playbook - -The Playbook represents a single ETL pipeline that takes multiple inputs -and turns them into multiple output streams. It can take a set of inputs -then run a sequential set of extraction steps. - - - - class - -> Type: *string* - - - name - -> Type: *string* - -: Unique name of the playbook - - - docs - -> Type: *string* - - - outdir - -> Type: *string* - - - config - -> Type: *object* of [ConfigVar](#configvar) - - -: Configuration for Playbook - - - inputs - -> Type: *object* of [Extractor](#extractor) - - -: Steps of the transformation - - - outputs - -> Type: *object* of [WriteConfig](#writeconfig) - - - - pipelines - -> Type: *object* - - -*** -## ConfigVar - - - name - -> Type: *string* - - - type - -> Type: *string* - - - default - -> Type: *string* - - -*** -# Extraction Steps -Every playbook consists of a series of extraction steps. An extraction step -can be a data extractor that runs a transform pipeline. - - -*** -## Extractor - -This object represents a single extractor step. It has a field for each possible -extractor type, but only one is supposed to be filed in at a time. - - - - description - -> Type: *string* - -: Human Readable description of step - - - xmlLoad - - of [XMLLoadStep](#xmlloadstep) - - - tableLoad - - of [TableLoadStep](#tableloadstep) - -: Run transform pipeline on a TSV or CSV - - - jsonLoad - - of [JSONLoadStep](#jsonloadstep) - -: Run a transform pipeline on a multi line json file - - - sqldumpLoad - - of [SQLDumpStep](#sqldumpstep) - -: Parse the content of a SQL dump to find insert and run a transform pipeline - - - gripperLoad - - of [GripperLoadStep](#gripperloadstep) - -: Use a GRIPPER server to get data and run a transform pipeline - - - avroLoad - - of [AvroLoadStep](#avroloadstep) - -: Load data from avro file - - - embedded - -> Type: *array* - - - glob - - of [GlobLoadStep](#globloadstep) - - - sqliteLoad - - of [SQLiteStep](#sqlitestep) - -An array of Extractors, each defining a different extraction step - -``` -- desc: Untar the input file - untar: - input: "{{inputs.tar}}" -- desc: Loading Patient List - tableLoad: - input: data_clinical_patient.txt - transform: - ... -- desc: Loading Sample List - tableLoad: - input: data_clinical_sample.txt - transform: - ... -- fileGlob: - files: [ data_RNA_Seq_expression_median.txt, data_RNA_Seq_V2_expression_median.txt ] - steps: - ... -``` - - -*** -## SQLDumpStep - - - input - -> Type: *string* - -: Path to the SQL dump file - - - tables - -> Type: *array* - -: Array of transforms for the different tables in the SQL dump - - -*** -## TableLoadStep - - - input - -> Type: *string* - -: TSV to be transformed - - - rowSkip - -> Type: *integer* - -: Number of header rows to skip - - - columns - -> Type: *array* - -: Manually set names of columns - - - extraColumns - -> Type: *string* - -: Columns beyond originally declared columns will be placed in this array - - - sep - -> Type: *string* - -: Separator \t for TSVs or , for CSVs - - -*** -## JSONLoadStep - - - input - -> Type: *string* - -: Path of multiline JSON file to transform - - - transform - -> Type: *array* of [Step](#step) - -: Transformation Pipeline - - - multiline - -> Type: *boolean* - -: Load file as a single multiline JSON object - -``` -- desc: Convert Census File - jsonLoad: - input: "{{inputs.census}}" - transform: - ... -``` - - -*** -## GripperLoadStep - -Use a GRIPPER server to obtain data - - - host - -> Type: *string* - -: GRIPPER URL - - - collection - -> Type: *string* - -: GRIPPER collection to target - - -*** -# Transform Pipelines -A tranform pipeline is a series of method to alter a message stream. - - -*** -## ObjectCreateStep - -Output a JSON schema described object - - - class - -> Type: *string* - -: Object class, should match declared class in JSON Schema - - - schema - -> Type: *string* - -: Directory with JSON schema files - - -*** -## MapStep - -Apply the sample function to every message - - - method - -> Type: *string* - -: Name of function to call - - - python - -> Type: *string* - -: Python code to be run - - - gpython - -> Type: *string* - -: Python code to be run using GPython - -The `python` section defines the code, and the `method` parameter defines -which function from the code to call -``` -- map: - #fix weird formatting of zip code - python: > - def f(x): - d = int(x['zipcode']) - x['zipcode'] = "%05d" % (int(d)) - return x - method: f -``` - - -*** -## ProjectStep - -Project templates into fields in the message - - - mapping - -> Type: *object* - -: New fields to be generated from template - - - rename - -> Type: *object* - -: Rename field (no template engine) - - -``` -- project: - mapping: - code: "{{row.project_id}}" - programs: "{{row.program.name}}" - submitter_id: "{{row.program.name}}" - projects: "{{row.project_id}}" - type: experiment -``` - - -*** -## LookupStep - -Use a two column file to make values from one value to another. - - - replace - -> Type: *string* - - - tsv - - of [TSVTable](#tsvtable) - - - json - - of [JSONTable](#jsontable) - - - table - -> Type: *object* - - - lookup - -> Type: *string* - - - copy - -> Type: *object* - -Starting with a table that maps state names to the two character state code: - -``` -North Dakota ND -Ohio OH -Oklahoma OK -Oregon OR -Pennsylvania PA -``` - -The transform: - -``` - - tableReplace: - input: "{{inputs.stateTable}}" - field: sub_region_1 -``` - -Would change the message: - -``` -{ "sub_region_1" : "Oregon" } -``` - -to - -``` -{ "sub_region_1" : "OR" } -``` - - -*** -## RegexReplaceStep - -Use a regular expression based replacement to alter a field - - - field - -> Type: *string* - - - regex - -> Type: *string* - - - replace - -> Type: *string* - - - dst - -> Type: *string* - - -``` -- regexReplace: - col: "{{row.attributes.Parent}}" - regex: "^transcript:" - replace: "" - dst: transcript_id -``` - - -*** -## ReduceStep - - - field - -> Type: *string* - - - method - -> Type: *string* - - - python - -> Type: *string* - - - gpython - -> Type: *string* - - - init - -> Type: *object* - -``` - - reduce: - field: "{{row.STCOUNTYFP}}" - method: merge - python: > - def merge(x,y): - a = x.get('zipcodes', []) + [x['ZIP']] - b = y.get('zipcodes', []) + [y['ZIP']] - x['zipcodes'] = a + b - return x -``` - - -*** -## FilterStep - - - field - -> Type: *string* - - - value - -> Type: *string* - - - match - -> Type: *string* - - - check - -> Type: *string* - -: How to check value, 'exists' or 'hasValue' - - - method - -> Type: *string* - - - python - -> Type: *string* - - - gpython - -> Type: *string* - - - steps - -> Type: *array* of [Step](#step) - - -Match based filtering: - -``` - - filter: - col: "{{row.tax_id}}" - match: "9606" - steps: - - tableWrite: -``` - -Code based filters: - -``` -- filter: - python: > - def f(x): - if 'FIPS' in x and len(x['FIPS']) > 0 and len(x['date']) > 0: - return True - return False - method: f - steps: - - objectCreate: - class: summary_report -``` - - -*** -## DebugStep - -Print out messages - - - label - -> Type: *string* - -``` -- debug: {} -``` - - -*** -## FieldProcessStep - -Table an array field from a message, split it into a series of -messages and run on child transform pipeline. The `mapping` field -allows you to take data from the parent message and map it into the -child messages. - - - - field - -> Type: *string* - - - mapping - -> Type: *object* - - - itemField - -> Type: *string* - -: If processing an array of non-dict elements, create a dict as {itemField:element} - -``` -- fieldProcess: - col: portions - mapping: - samples: "{{row.id}}" -``` - - -*** -## FieldParseStep - -Take a param style string and parse it into independent elements in the message - - - field - -> Type: *string* - - - sep - -> Type: *string* - - - assign - -> Type: *string* - - -The messages - -``` -{ "attributes" : "ID=CDS:ENSP00000419345;Parent=transcript:ENST00000486405;protein_id=ENSP00000419345" } -``` - -After the transform: - -``` - - fieldParse: - col: attributes - sep: ";" -``` - -Becomes: -``` -{ - "attributes" : "ID=CDS:ENSP00000419345;Parent=transcript:ENST00000486405;protein_id=ENSP00000419345", - "ID" : "CDS:ENSP00000419345", - "Parent" : "transcript:ENST00000486405", - "protein_id" : "ENSP00000419345" -} -``` - - -*** -## AccumulateStep - - - field - -> Type: *string* - -: Field to use for group definition - - - dest - -> Type: *string* - -## AvroLoadStep - - - input - -> Type: *string* - -: Path of avro object file to transform - -## CleanStep - - - fields - -> Type: *array* - -: List of valid fields that will be left. All others will be removed - - - removeEmpty - -> Type: *boolean* - - - storeExtra - -> Type: *string* - -## CommandLineTemplate - - - template - -> Type: *string* - - - outputs - -> Type: *array* - - - inputs - -> Type: *array* - -## DistinctStep - - - value - -> Type: *string* - - - steps - -> Type: *array* of [Step](#step) - -## EdgeRule - - - prefixFilter - -> Type: *boolean* - - - blankFilter - -> Type: *boolean* - - - toPrefix - -> Type: *string* - - - sep - -> Type: *string* - - - idTemplate - -> Type: *string* - -## EmitStep - - - name - -> Type: *string* - -## GlobLoadStep - - - storeFilename - -> Type: *string* - - - input - -> Type: *string* - -: Path of avro object file to transform - - - xmlLoad - - of [XMLLoadStep](#xmlloadstep) - - - tableLoad - - of [TableLoadStep](#tableloadstep) - -: Run transform pipeline on a TSV or CSV - - - jsonLoad - - of [JSONLoadStep](#jsonloadstep) - -: Run a transform pipeline on a multi line json file - - - avroLoad - - of [AvroLoadStep](#avroloadstep) - -: Load data from avro file - -## GraphBuildStep - - - schema - -> Type: *string* - - - class - -> Type: *string* - - - idPrefix - -> Type: *string* - - - idTemplate - -> Type: *string* - - - idField - -> Type: *string* - - - filePrefix - -> Type: *string* - - - sep - -> Type: *string* - - - fields - -> Type: *object* of [EdgeRule](#edgerule) - - - - flat - -> Type: *boolean* - -## HashStep - - - field - -> Type: *string* - - - value - -> Type: *string* - - - method - -> Type: *string* - -## JSONTable - - - input - -> Type: *string* - - - value - -> Type: *string* - - - key - -> Type: *string* - -## SQLiteStep - - - input - -> Type: *string* - -: Path to the SQLite file - - - query - -> Type: *string* - -: SQL select statement based input - -## SnakeFileWriter - - - from - -> Type: *string* - - - commands - -> Type: *array* of [CommandLineTemplate](#commandlinetemplate) - -## Step - - - from - -> Type: *string* - - - fieldParse - - of [FieldParseStep](#fieldparsestep) - -: fieldParse to run - - - fieldType - -> Type: *object* - -: Change type of a field (ie string -> integer) - - - objectCreate - - of [ObjectCreateStep](#objectcreatestep) - -: Create a JSON schema based object - - - emit - - of [EmitStep](#emitstep) - -: Write to unstructured JSON file - - - filter - - of [FilterStep](#filterstep) - - - clean - - of [CleanStep](#cleanstep) - - - debug - - of [DebugStep](#debugstep) - -: Print message contents to stdout - - - regexReplace - - of [RegexReplaceStep](#regexreplacestep) - - - project - - of [ProjectStep](#projectstep) - -: Run a projection mapping message - - - map - - of [MapStep](#mapstep) - -: Apply a single function to all records - - - reduce - - of [ReduceStep](#reducestep) - - - distinct - - of [DistinctStep](#distinctstep) - - - fieldProcess - - of [FieldProcessStep](#fieldprocessstep) - -: Take an array field from a message and run in child transform - - - lookup - - of [LookupStep](#lookupstep) - - - hash - - of [HashStep](#hashstep) - - - graphBuild - - of [GraphBuildStep](#graphbuildstep) - - - accumulate - - of [AccumulateStep](#accumulatestep) - -## TSVTable - - - input - -> Type: *string* - - - sep - -> Type: *string* - - - value - -> Type: *string* - - - key - -> Type: *string* - - - header - -> Type: *array* - -## TableWriter - - - from - -> Type: *string* - - - output - -> Type: *string* - -: Name of file to create - - - columns - -> Type: *array* - -: Columns to be written into table file - - - sep - -> Type: *string* - -## WriteConfig - - - tableWrite - - of [TableWriter](#tablewriter) - - - snakefile - - of [SnakeFileWriter](#snakefilewriter) - -## XMLLoadStep - - - input - -> Type: *string* - diff --git a/README.md b/README.md index 794811a..1fae78f 100644 --- a/README.md +++ b/README.md @@ -31,7 +31,7 @@ More detailed descriptions can be found in out [Playbook manual](Playbook.md) class: sifter name: census_2010 -config: +params: census: ../data/census_2010_byzip.json date: "2010-01-01" schema: ../covid19_datadictionary/gdcdictionary/schemas/ @@ -39,7 +39,7 @@ config: inputs: censusData: jsonLoad: - input: "{{config.census}}" + input: "{{params.census}}" pipelines: transform: @@ -54,13 +54,13 @@ pipelines: method: f - project: mapping: - submitter_id: "{{row.geo_id}}:{{inputs.date}}" + submitter_id: "{{row.geo_id}}:{{params.date}}" type: census_report - date: "{{config.date}}" + date: "{{params.date}}" summary_location: "{{row.zipcode}}" - objectValidate: title: census_report - schema: "{{config.schema}}" + schema: "{{params.schema}}" ``` diff --git a/cmd/graphplan/main.go b/cmd/graphplan/main.go deleted file mode 100644 index 56b93f8..0000000 --- a/cmd/graphplan/main.go +++ /dev/null @@ -1,67 +0,0 @@ -package graphplan - -import ( - "path/filepath" - - "github.com/bmeg/sifter/graphplan" - "github.com/bmeg/sifter/logger" - "github.com/bmeg/sifter/playbook" - "github.com/spf13/cobra" -) - -var outScriptDir = "" -var outDataDir = "./" -var objectExclude = []string{} -var verbose bool = false - -// Cmd is the declaration of the command line -var Cmd = &cobra.Command{ - Use: "graph-plan", - Short: "Scan directory to plan operations", - Args: cobra.MinimumNArgs(1), - RunE: func(cmd *cobra.Command, args []string) error { - - if verbose { - logger.Init(true, false) - } - - scriptPath, _ := filepath.Abs(args[0]) - - /* - if outScriptDir != "" { - baseDir, _ = filepath.Abs(outScriptDir) - } else if len(args) > 1 { - return fmt.Errorf("for multiple input directories, based dir must be defined") - } - - _ = baseDir - */ - outScriptDir, _ = filepath.Abs(outScriptDir) - outDataDir, _ = filepath.Abs(outDataDir) - - outDataDir, _ = filepath.Rel(outScriptDir, outDataDir) - - pb := playbook.Playbook{} - - if sifterErr := playbook.ParseFile(scriptPath, &pb); sifterErr == nil { - if len(pb.Pipelines) > 0 || len(pb.Inputs) > 0 { - err := graphplan.NewGraphBuild( - &pb, outScriptDir, outDataDir, objectExclude, - ) - if err != nil { - logger.Error("Parse Error", "error", err) - } - } - } - - return nil - }, -} - -func init() { - flags := Cmd.Flags() - flags.BoolVarP(&verbose, "verbose", "v", verbose, "Verbose logging") - flags.StringVarP(&outScriptDir, "dir", "C", outScriptDir, "Change Directory for script base") - flags.StringVarP(&outDataDir, "out", "o", outDataDir, "Change output Directory") - flags.StringArrayVarP(&objectExclude, "exclude", "x", objectExclude, "Object Exclude") -} diff --git a/cmd/inspect/main.go b/cmd/inspect/main.go index 4375f45..bd2cbb8 100644 --- a/cmd/inspect/main.go +++ b/cmd/inspect/main.go @@ -53,23 +53,18 @@ var Cmd = &cobra.Command{ out := map[string]any{} cf := map[string]string{} - for _, f := range pb.GetConfigFields() { + for _, f := range pb.GetRequiredParams() { cf[f.Name] = f.Name //f.Type } out["configFields"] = cf - ins := pb.GetConfigFields() + ins := pb.GetRequiredParams() out["config"] = ins outputs := map[string]any{} - sinks, _ := pb.GetOutputs(task) - for k, v := range sinks { - outputs[k] = v - } - - emitters, _ := pb.GetEmitters(task) - for k, v := range emitters { + pouts, _ := pb.GetOutputs(task) + for k, v := range pouts { outputs[k] = v } diff --git a/cmd/root.go b/cmd/root.go index 2e889eb..d23161c 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -3,10 +3,8 @@ package cmd import ( "os" - "github.com/bmeg/sifter/cmd/graphplan" "github.com/bmeg/sifter/cmd/inspect" "github.com/bmeg/sifter/cmd/run" - "github.com/bmeg/sifter/cmd/scan" "github.com/spf13/cobra" ) @@ -20,8 +18,6 @@ var RootCmd = &cobra.Command{ func init() { RootCmd.AddCommand(run.Cmd) RootCmd.AddCommand(inspect.Cmd) - RootCmd.AddCommand(graphplan.Cmd) - RootCmd.AddCommand(scan.Cmd) } var genBashCompletionCmd = &cobra.Command{ diff --git a/cmd/run/main.go b/cmd/run/main.go index f439127..8b3f016 100644 --- a/cmd/run/main.go +++ b/cmd/run/main.go @@ -11,9 +11,9 @@ import ( ) var outDir string = "" -var inputFile string = "" +var paramsFile string = "" var verbose bool = false -var cmdInputs map[string]string +var cmdParams map[string]string // Cmd is the declaration of the command line var Cmd = &cobra.Command{ @@ -26,15 +26,15 @@ var Cmd = &cobra.Command{ logger.Init(true, false) } - inputs := map[string]string{} - if inputFile != "" { - if err := playbook.ParseStringFile(inputFile, &inputs); err != nil { + params := map[string]string{} + if paramsFile != "" { + if err := playbook.ParseStringFile(paramsFile, ¶ms); err != nil { logger.Error("%s", err) return err } } - for k, v := range cmdInputs { - inputs[k] = v + for k, v := range cmdParams { + params[k] = v logger.Info("Input Params", k, v) } for _, playFile := range args { @@ -46,11 +46,11 @@ var Cmd = &cobra.Command{ } pb := playbook.Playbook{} playbook.ParseBytes(yaml, "./playbook.yaml", &pb) - if err := Execute(pb, "./", "./", outDir, inputs); err != nil { + if err := Execute(pb, "./", "./", outDir, params); err != nil { return err } } else { - if err := ExecuteFile(playFile, "./", outDir, inputs); err != nil { + if err := ExecuteFile(playFile, "./", outDir, params); err != nil { return err } } @@ -63,6 +63,6 @@ var Cmd = &cobra.Command{ func init() { flags := Cmd.Flags() flags.BoolVarP(&verbose, "verbose", "v", verbose, "Verbose logging") - flags.StringToStringVarP(&cmdInputs, "config", "c", cmdInputs, "Config variable") - flags.StringVarP(&inputFile, "configFile", "f", inputFile, "Config file") + flags.StringToStringVarP(&cmdParams, "param", "p", cmdParams, "Parameter variable") + flags.StringVarP(¶msFile, "params-file", "f", paramsFile, "Parameter file") } diff --git a/cmd/run/run.go b/cmd/run/run.go index a5607a6..42651b2 100644 --- a/cmd/run/run.go +++ b/cmd/run/run.go @@ -22,7 +22,7 @@ func ExecuteFile(playFile string, workDir string, outDir string, inputs map[stri return Execute(pb, baseDir, workDir, outDir, inputs) } -func Execute(pb playbook.Playbook, baseDir string, workDir string, outDir string, inputs map[string]string) error { +func Execute(pb playbook.Playbook, baseDir string, workDir string, outDir string, params map[string]string) error { if outDir == "" { outDir = pb.GetDefaultOutDir() @@ -32,7 +32,7 @@ func Execute(pb playbook.Playbook, baseDir string, workDir string, outDir string os.MkdirAll(outDir, 0777) } - nInputs, err := pb.PrepConfig(inputs, workDir) + nInputs, err := pb.PrepConfig(params, workDir) if err != nil { return err } diff --git a/cmd/scan/main.go b/cmd/scan/main.go deleted file mode 100644 index f1fbe48..0000000 --- a/cmd/scan/main.go +++ /dev/null @@ -1,217 +0,0 @@ -package scan - -import ( - "encoding/json" - "fmt" - "io/fs" - "os" - "path/filepath" - "strings" - - "github.com/bmeg/sifter/playbook" - "github.com/bmeg/sifter/task" - "github.com/spf13/cobra" -) - -var jsonOut = false -var objectsOnly = false -var baseDir = "" - -type Entry struct { - ObjectType string `json:"objectType"` - SifterFile string `json:"sifterFile"` - Outfile string `json:"outFile"` -} - -var ObjectCommand = &cobra.Command{ - Use: "objects", - Short: "Scan for outputs", - Args: cobra.MinimumNArgs(1), - RunE: func(cmd *cobra.Command, args []string) error { - - scanDir := args[0] - - outputs := []Entry{} - - PathWalker(scanDir, func(pb *playbook.Playbook) { - for pname, p := range pb.Pipelines { - emitName := "" - for _, s := range p { - if s.Emit != nil { - emitName = s.Emit.Name - } - } - if emitName != "" { - for _, s := range p { - outdir := pb.GetDefaultOutDir() - outname := fmt.Sprintf("%s.%s.%s.json.gz", pb.Name, pname, emitName) - outpath := filepath.Join(outdir, outname) - o := Entry{SifterFile: pb.GetPath(), Outfile: outpath} - if s.ObjectValidate != nil { - //outpath, _ = filepath.Rel(baseDir, outpath) - //fmt.Printf("%s\t%s\n", s.ObjectValidate.Title, outpath) - o.ObjectType = s.ObjectValidate.Title - } - if objectsOnly { - if o.ObjectType != "" { - outputs = append(outputs, o) - } - } else { - outputs = append(outputs, o) - } - } - } - } - }) - - if jsonOut { - j := json.NewEncoder(os.Stdout) - j.SetIndent("", " ") - j.Encode(outputs) - } else { - for _, i := range outputs { - fmt.Printf("%s\t%s\n", i.ObjectType, i.Outfile) - } - } - - return nil - - }, -} - -type ScriptEntry struct { - Name string `json:"name"` - Path string `json:"path"` - Inputs []string `json:"inputs"` - Outputs []string `json:"outputs"` -} - -func removeDuplicates(s []string) []string { - t := map[string]bool{} - - for _, i := range s { - t[i] = true - } - out := []string{} - for k := range t { - out = append(out, k) - } - return out -} - -func relPathArray(basedir string, paths []string) []string { - out := []string{} - for _, i := range paths { - if o, err := filepath.Rel(baseDir, i); err == nil { - out = append(out, o) - } - } - return out -} - -var ScriptCommand = &cobra.Command{ - Use: "scripts", - Short: "Scan for scripts", - Args: cobra.MinimumNArgs(1), - RunE: func(cmd *cobra.Command, args []string) error { - - scanDir := args[0] - - scripts := []ScriptEntry{} - - if baseDir == "" { - baseDir, _ = os.Getwd() - } - baseDir, _ = filepath.Abs(baseDir) - //fmt.Printf("basedir: %s\n", baseDir) - - userInputs := map[string]string{} - - PathWalker(scanDir, func(pb *playbook.Playbook) { - path := pb.GetPath() - scriptDir := filepath.Dir(path) - - config, _ := pb.PrepConfig(userInputs, baseDir) - - task := task.NewTask(pb.Name, scriptDir, baseDir, pb.GetDefaultOutDir(), config) - sourcePath, _ := filepath.Abs(path) - - cmdPath, _ := filepath.Rel(baseDir, sourcePath) - - inputs := []string{} - outputs := []string{} - for _, p := range pb.GetConfigFields() { - if p.IsDir() || p.IsFile() { - inputs = append(inputs, config[p.Name]) - } - } - //inputs = append(inputs, sourcePath) - - sinks, _ := pb.GetOutputs(task) - for _, v := range sinks { - outputs = append(outputs, v...) - } - - emitters, _ := pb.GetEmitters(task) - for _, v := range emitters { - outputs = append(outputs, v) - } - - //for _, e := range pb.Inputs { - //} - - s := ScriptEntry{ - Path: cmdPath, - Name: pb.Name, - Outputs: relPathArray(baseDir, removeDuplicates(outputs)), - Inputs: relPathArray(baseDir, removeDuplicates(inputs)), - } - scripts = append(scripts, s) - }) - - if jsonOut { - e := json.NewEncoder(os.Stdout) - e.SetIndent("", " ") - e.Encode(scripts) - } else { - for _, i := range scripts { - fmt.Printf("%s\n", i) - } - } - - return nil - }, -} - -// Cmd is the declaration of the command line -var Cmd = &cobra.Command{ - Use: "scan", - Short: "Scan for scripts or objects", -} - -func init() { - Cmd.AddCommand(ObjectCommand) - Cmd.AddCommand(ScriptCommand) - - objFlags := ObjectCommand.Flags() - objFlags.BoolVarP(&objectsOnly, "objects", "s", objectsOnly, "Objects Only") - objFlags.BoolVarP(&jsonOut, "json", "j", jsonOut, "Output JSON") - - scriptFlags := ScriptCommand.Flags() - scriptFlags.StringVarP(&baseDir, "base", "b", baseDir, "Base Dir") - scriptFlags.BoolVarP(&jsonOut, "json", "j", jsonOut, "Output JSON") - -} - -func PathWalker(baseDir string, userFunc func(*playbook.Playbook)) { - filepath.Walk(baseDir, - func(path string, info fs.FileInfo, err error) error { - if strings.HasSuffix(path, ".yaml") { - pb := playbook.Playbook{} - if parseErr := playbook.ParseFile(path, &pb); parseErr == nil { - userFunc(&pb) - } - } - return nil - }) -} diff --git a/config/config.go b/config/config.go index f1b3ac2..5f3fc48 100644 --- a/config/config.go +++ b/config/config.go @@ -2,33 +2,43 @@ package config import "strings" -type Config map[string]*string +type Params map[string]Param -type Type string - -const ( - Unknown Type = "" - File Type = "File" - Dir Type = "Dir" -) +type Param struct { + Type string `json:"type"` + Default any `json:"default,omitempty"` +} -type Variable struct { +type ParamRequest struct { + Type string `json:"type"` Name string `json:"name"` - Type Type } type Configurable interface { - GetConfigFields() []Variable + GetRequiredParams() []ParamRequest } -func (in *Variable) IsFile() bool { - return in.Type == File +func (in *Param) IsFile() bool { + return strings.ToLower(in.Type) == "file" } -func (in *Variable) IsDir() bool { - return in.Type == Dir +func (in *Param) IsDir() bool { + t := strings.ToLower(in.Type) + return t == "path" || t == "dir" } func TrimPrefix(s string) string { - return strings.TrimPrefix(s, "config.") + if strings.HasPrefix(s, "params.") { + return strings.TrimPrefix(s, "params.") + } + return s +} + +func (in *ParamRequest) IsFile() bool { + return strings.ToLower(in.Type) == "file" +} + +func (in *ParamRequest) IsDir() bool { + t := strings.ToLower(in.Type) + return t == "path" || t == "dir" } diff --git a/docs/css/darcula.css b/docs/assets/css/darcula.css similarity index 100% rename from docs/css/darcula.css rename to docs/assets/css/darcula.css diff --git a/docs/css/dark.css b/docs/assets/css/dark.css similarity index 100% rename from docs/css/dark.css rename to docs/assets/css/dark.css diff --git a/docs/css/flexboxgrid.css b/docs/assets/css/flexboxgrid.css similarity index 100% rename from docs/css/flexboxgrid.css rename to docs/assets/css/flexboxgrid.css diff --git a/docs/css/funnel.css b/docs/assets/css/funnel.css similarity index 100% rename from docs/css/funnel.css rename to docs/assets/css/funnel.css diff --git a/docs/css/highlight.min.css b/docs/assets/css/highlight.min.css similarity index 100% rename from docs/css/highlight.min.css rename to docs/assets/css/highlight.min.css diff --git a/docs/css/html5reset.css b/docs/assets/css/html5reset.css similarity index 100% rename from docs/css/html5reset.css rename to docs/assets/css/html5reset.css diff --git a/docs/css/hybrid.css b/docs/assets/css/hybrid.css similarity index 100% rename from docs/css/hybrid.css rename to docs/assets/css/hybrid.css diff --git a/docs/css/monokai-sublime.css b/docs/assets/css/monokai-sublime.css similarity index 100% rename from docs/css/monokai-sublime.css rename to docs/assets/css/monokai-sublime.css diff --git a/docs/css/poole.css b/docs/assets/css/poole.css similarity index 100% rename from docs/css/poole.css rename to docs/assets/css/poole.css diff --git a/docs/css/syntax.css b/docs/assets/css/syntax.css similarity index 100% rename from docs/css/syntax.css rename to docs/assets/css/syntax.css diff --git a/docs/css/theme.css b/docs/assets/css/theme.css similarity index 100% rename from docs/css/theme.css rename to docs/assets/css/theme.css diff --git a/docs/sifter_example.png b/docs/assets/sifter_example.png similarity index 100% rename from docs/sifter_example.png rename to docs/assets/sifter_example.png diff --git a/docs/categories/index.xml b/docs/categories/index.xml deleted file mode 100644 index 4b26c88..0000000 --- a/docs/categories/index.xml +++ /dev/null @@ -1,11 +0,0 @@ - - - - Categories on Sifter - https://bmeg.github.io/sifter/categories/ - Recent content in Categories on Sifter - Hugo -- gohugo.io - en-us - - - diff --git a/docs/docs/.nav.yml b/docs/docs/.nav.yml new file mode 100644 index 0000000..1d7fa65 --- /dev/null +++ b/docs/docs/.nav.yml @@ -0,0 +1,11 @@ + +title: Sifter Documentation + +nav: + - index.md + - example.md + - schema.md + - config.md + - inputs + - transforms + - outputs \ No newline at end of file diff --git a/docs/docs/config.md b/docs/docs/config.md new file mode 100644 index 0000000..391e21c --- /dev/null +++ b/docs/docs/config.md @@ -0,0 +1,34 @@ +--- +title: Parameters +--- + +## Parameters Variables + +Playbooks can be parameterized. They are defined in the `params` section of the playbook YAML file. + +### Configuration Syntax +```yaml +params: + variableName: + type: File # one of: File, Path, String, Number + default: "path/to/default" +``` + +### Supported Types +- `File`: Represents a file path +- `Dir`: Represents a directory path + +### Example Configuration +```yaml +params: + inputDir: + type: Dir + default: "/data/input" + outputDir: + type: Dir + default: "/data/output" + schemaFile: + type: File + default: "/config/schema.json" +``` + diff --git a/docs/docs/developers/source_mapping.md b/docs/docs/developers/source_mapping.md new file mode 100644 index 0000000..335e52f --- /dev/null +++ b/docs/docs/developers/source_mapping.md @@ -0,0 +1,48 @@ +# SIFTER Project Documentation to Source Code Mapping + +## Inputs + +| Documentation File | Source Code File | +|-------------------|------------------| +| docs/docs/inputs/avro.md | extractors/avro_load.go | +| docs/docs/inputs/embedded.md | extractors/embedded.go | +| docs/docs/inputs/glob.md | extractors/glob_load.go | +| docs/docs/inputs/json.md | extractors/json_load.go | +| docs/docs/inputs/plugin.md | extractors/plugin_load.go | +| docs/docs/inputs/sqldump.md | extractors/sqldump_step.go | +| docs/docs/inputs/sqlite.md | extractors/sqlite_load.go | +| docs/docs/inputs/table.md | extractors/tabular_load.go | +| docs/docs/inputs/xml.md | extractors/xml_step.go | + +## Transforms + +| Documentation File | Source Code File | +|-------------------|------------------| +| docs/docs/transforms/accumulate.md | transform/accumulate.go | +| docs/docs/transforms/clean.md | transform/clean.go | +| docs/docs/transforms/debug.md | transform/debug.go | +| docs/docs/transforms/distinct.md | transform/distinct.go | +| docs/docs/transforms/fieldParse.md | transform/field_parse.go | +| docs/docs/transforms/fieldProcess.md | transform/field_process.go | +| docs/docs/transforms/fieldType.md | transform/field_type.go | +| docs/docs/transforms/filter.md | transform/filter.go | +| docs/docs/transforms/flatmap.md | transform/flat_map.go | +| docs/docs/transforms/from.md | transform/from.go | +| docs/docs/transforms/hash.md | transform/hash.go | +| docs/docs/transforms/lookup.md | transform/lookup.go | +| docs/docs/transforms/map.md | transform/mapping.go | +| docs/docs/transforms/objectValidate.md | transform/object_validate.go | +| docs/docs/transforms/plugin.md | transform/plugin.go | +| docs/docs/transforms/project.md | transform/project.go | +| docs/docs/transforms/reduce.md | transform/reduce.go | +| docs/docs/transforms/regexReplace.md | transform/regex.go | +| docs/docs/transforms/split.md | transform/split.go | +| docs/docs/transforms/uuid.md | transform/uuid.go | + +## Outputs + +| Documentation File | Source Code File | +|-------------------|------------------| +| docs/docs/outputs/graphBuild.md | playbook/output_graph.go | +| docs/docs/outputs/json.md | playbook/output_json.go | +| docs/docs/outputs/tableWrite.md | playbook/output_table.go | \ No newline at end of file diff --git a/website/content/docs/example.md b/docs/docs/example.md similarity index 82% rename from website/content/docs/example.md rename to docs/docs/example.md index d1d29b0..d506f6b 100644 --- a/website/content/docs/example.md +++ b/docs/docs/example.md @@ -1,11 +1,3 @@ ---- -title: Example -menu: - main: - identifier: example - weight: 3 ---- - # Example Pipeline Our first task will be to convert a ZIP code TSV into a set of county level @@ -13,7 +5,7 @@ entries. The input file looks like: -``` +```csv ZIP,COUNTYNAME,STATE,STCOUNTYFP,CLASSFP 36003,Autauga County,AL,01001,H1 36006,Autauga County,AL,01001,H1 @@ -27,44 +19,50 @@ ZIP,COUNTYNAME,STATE,STCOUNTYFP,CLASSFP First is the header of the pipeline. This declares the unique name of the pipeline and it's output directory. -``` +```yaml name: zipcode_map outdir: ./ docs: Converts zipcode TSV into graph elements ``` -Next the configuration is declared. In this case the only input is the zipcode TSV. -There is a default value, so the pipeline can be invoked without passing in +Next the parameters are declared. In this case the only parameter is the path to the +zipcode TSV. There is a default value, so the pipeline can be invoked without passing in any parameters. However, to apply this pipeline to a new input file, the -input parameter `zipcode` could be used to define the source file. +input parameter `zipcode` could be used to define the source file. +Path and File Parameters can be relative to the directory that the playbook file is in. -``` -config: - schema: ../covid19_datadictionary/gdcdictionary/schemas/ - zipcode: ../data/ZIP-COUNTY-FIPS_2017-06.csv +```yaml +params: + schema: + type: path + default: ../covid19_datadictionary/gdcdictionary/schemas/ + zipcode: + type: path + default: ../data/ZIP-COUNTY-FIPS_2017-06.csv ``` The `inputs` section declares data input sources. In this pipeline, there is only one input, which is to run the table loader. -``` +```yaml inputs: - tableLoad: - input: "{{config.zipcode}}" - sep: "," + zipcode: + table: + path: "{{params.zipcode}}" + sep: "," ``` Tableload operaters of the input file that was originally passed in using the `inputs` stanza. SIFTER string parsing is based on mustache template system. -To access the string passed in the template is `{{config.zipcode}}`. +To access the string passed in the template is `{{params.zipcode}}`. The seperator in the file input file is a `,` so that is also passed in as a parameter to the extractor. -The `tableLoad` extractor opens up the TSV and generates a one message for +The `table` extractor opens up the TSV and generates a one message for every row in the file. It uses the header of the file to map the column values into a dictionary. The first row would produce the message: -``` +```json { "ZIP" : "36003", "COUNTYNAME" : "Autauga County", @@ -85,14 +83,14 @@ and produces a single output message. The two messages: -``` +```json { "ZIP" : "36003", "COUNTYNAME" : "Autauga County", "STATE" : "AL", "STCOUNTYFP" : "01001", "CLASSFP" : "H1"} { "ZIP" : "36006", "COUNTYNAME" : "Autauga County", "STATE" : "AL", "STCOUNTYFP" : "01001", "CLASSFP" : "H1"} ``` Would be merged into the message: -``` +```json { "ZIP" : ["36003", "36006"], "COUNTYNAME" : "Autauga County", "STATE" : "AL", "STCOUNTYFP" : "01001", "CLASSFP" : "H1"} ``` @@ -100,7 +98,7 @@ The `reduce` transform step uses a block of python code to describe the function The `method` field names the function, in this case `merge` that will be used as the reduce function. -``` +```yaml zipReduce: - from: zipcode - reduce: @@ -122,7 +120,7 @@ to project data into new files in the message. The template engine has the curre message data in the value `row`. So the value `FIPS:{{row.STCOUNTYFP}}` is mapped into the field `id`. -``` +```yaml - project: mapping: id: "FIPS:{{row.STCOUNTYFP}}" @@ -136,7 +134,7 @@ message data in the value `row`. So the value Using this projection, the message: -``` +```json { "ZIP" : ["36003", "36006"], "COUNTYNAME" : "Autauga County", @@ -148,7 +146,7 @@ Using this projection, the message: would become -``` +```json { "id" : "FIPS:01001", "province_state" : "AL", @@ -165,13 +163,14 @@ would become } ``` -Now that the data has been remapped, we pass the data into the 'objectCreate' -transformation, which will read in the schema for `summary_location`, check the +Now that the data has been remapped, we pass the data into the 'objectValidate' +step, which will open the schema directory and find the class titled `summary_location`, check the message to make sure it matches and then output it. -``` - - objectCreate: - class: summary_location +```yaml + - objectValidate: + title: summary_location + schema: {{params.schema}} ``` @@ -182,12 +181,12 @@ To create an output table, with two columns connecting code, used by the census office. A single FIPS code my contain many ZIP codes, and we can use this table later for mapping ids when loading the data into a database. -``` +```yaml outputs: zip2fips: tableWrite: - from: - output: zip2fips + from: zipReduce + path: zip2fips.tsv columns: - ZIP - STCOUNTYFP diff --git a/docs/docs/example/index.html b/docs/docs/example/index.html deleted file mode 100644 index af38dec..0000000 --- a/docs/docs/example/index.html +++ /dev/null @@ -1,507 +0,0 @@ - - - - - - - - - - - Example · Sifter - - - - - - - - - - - - - - - - - - - - - - - -
- - - -
-

Example Pipeline

-

Our first task will be to convert a ZIP code TSV into a set of county level -entries.

-

The input file looks like:

-
ZIP,COUNTYNAME,STATE,STCOUNTYFP,CLASSFP
-36003,Autauga County,AL,01001,H1
-36006,Autauga County,AL,01001,H1
-36067,Autauga County,AL,01001,H1
-36066,Autauga County,AL,01001,H1
-36703,Autauga County,AL,01001,H1
-36701,Autauga County,AL,01001,H1
-36091,Autauga County,AL,01001,H1
-

First is the header of the pipeline. This declares the -unique name of the pipeline and it’s output directory.

-
name: zipcode_map
-outdir: ./
-docs: Converts zipcode TSV into graph elements
-

Next the configuration is declared. In this case the only input is the zipcode TSV. -There is a default value, so the pipeline can be invoked without passing in -any parameters. However, to apply this pipeline to a new input file, the -input parameter zipcode could be used to define the source file.

-
config:
-  schema: ../covid19_datadictionary/gdcdictionary/schemas/
-  zipcode: ../data/ZIP-COUNTY-FIPS_2017-06.csv
-

The inputs section declares data input sources. In this pipeline, there is -only one input, which is to run the table loader.

-
inputs:
-  tableLoad:
-    input: "{{config.zipcode}}"
-    sep: ","
-

Tableload operaters of the input file that was originally passed in using the -inputs stanza. SIFTER string parsing is based on mustache template system. -To access the string passed in the template is {{config.zipcode}}. -The seperator in the file input file is a , so that is also passed in as a -parameter to the extractor.

-

The tableLoad extractor opens up the TSV and generates a one message for -every row in the file. It uses the header of the file to map the column values -into a dictionary. The first row would produce the message:

-
{
-    "ZIP" : "36003",
-    "COUNTYNAME" : "Autauga County",
-    "STATE" : "AL",
-    "STCOUNTYFP" : "01001",
-    "CLASSFP" : "H1"
-}
-

The stream of messages are then passed into the steps listed in the transform -section of the tableLoad extractor.

-

For the current tranform, we want to produce a single entry per STCOUNTYFP, -however, the file has a line per ZIP. We need to run a reduce transform, -that collects rows togeather using a field key, which in this case is "{{row.STCOUNTYFP}}", -and then runs a function merge that takes two messages, merges them togeather -and produces a single output message.

-

The two messages:

-
{ "ZIP" : "36003", "COUNTYNAME" : "Autauga County", "STATE" : "AL", "STCOUNTYFP" : "01001", "CLASSFP" : "H1"}
-{ "ZIP" : "36006", "COUNTYNAME" : "Autauga County", "STATE" : "AL", "STCOUNTYFP" : "01001", "CLASSFP" : "H1"}
-

Would be merged into the message:

-
{ "ZIP" : ["36003", "36006"], "COUNTYNAME" : "Autauga County", "STATE" : "AL", "STCOUNTYFP" : "01001", "CLASSFP" : "H1"}
-

The reduce transform step uses a block of python code to describe the function. -The method field names the function, in this case merge that will be used -as the reduce function.

-
  zipReduce:
-    - from: zipcode
-    - reduce:
-        field: STCOUNTYFP
-        method: merge
-        python: >
-          def merge(x,y):
-            a = x.get('zipcodes', []) + [x['ZIP']]
-            b = y.get('zipcodes', []) + [y['ZIP']]
-            x['zipcodes'] = a + b
-            return x
-

The original messages produced by the loader have all of the information required -by the summary_location object type as described by the JSON schema that was linked -to in the header stanza. However, the data is all under the wrong field names. -To remap the data, we use a project tranformation that uses the template engine -to project data into new files in the message. The template engine has the current -message data in the value row. So the value -FIPS:{{row.STCOUNTYFP}} is mapped into the field id.

-
  - project:
-      mapping:
-        id: "FIPS:{{row.STCOUNTYFP}}"
-        province_state: "{{row.STATE}}"
-        summary_locations: "{{row.STCOUNTYFP}}"
-        county: "{{row.COUNTYNAME}}"
-        submitter_id: "{{row.STCOUNTYFP}}"
-        type: summary_location
-        projects: []
-

Using this projection, the message:

-
{
-  "ZIP" : ["36003", "36006"],
-  "COUNTYNAME" : "Autauga County",
-  "STATE" : "AL",
-  "STCOUNTYFP" : "01001",
-  "CLASSFP" : "H1"
-}
-

would become

-
{
-  "id" : "FIPS:01001",
-  "province_state" : "AL",
-  "summary_locations" : "01001",
-  "county" : "Autauga County",
-  "submitter_id" : "01001",
-  "type" : "summary_location"
-  "projects" : [],
-  "ZIP" : ["36003", "36006"],
-  "COUNTYNAME" : "Autauga County",
-  "STATE" : "AL",
-  "STCOUNTYFP" : "01001",
-  "CLASSFP" : "H1"
-}
-

Now that the data has been remapped, we pass the data into the ‘objectCreate’ -transformation, which will read in the schema for summary_location, check the -message to make sure it matches and then output it.

-
  - objectCreate:
-        class: summary_location
-

Outputs

-

To create an output table, with two columns connecting -ZIP values to STCOUNTYFP values. The STCOUNTYFP is a county level FIPS -code, used by the census office. A single FIPS code my contain many ZIP codes, -and we can use this table later for mapping ids when loading the data into a database.

-
outputs:
-  zip2fips:
-    tableWrite:
-      from: 
-      output: zip2fips
-      columns:
-        - ZIP
-        - STCOUNTYFP
-
-
- -
- - diff --git a/docs/docs/index.html b/docs/docs/index.html deleted file mode 100644 index 6e55370..0000000 --- a/docs/docs/index.html +++ /dev/null @@ -1,528 +0,0 @@ - - - - - - - - - - - Overview · Sifter - - - - - - - - - - - - - - - - - - - - - - - -
- - - -
-

Sifter pipelines

-

Sifter pipelines process steams of nested JSON messages. Sifter comes with a number of -file extractors that operate as inputs to these pipelines. The pipeline engine -connects togeather arrays of transform steps into directed acylic graph that is processed -in parallel.

-

Example Message:

-
{
-  "firstName" : "bob",
-  "age" : "25"
-  "friends" : [ "Max", "Alex"]
-}
-

Once a stream of messages are produced, that can be run through a transform -pipeline. A transform pipeline is an array of transform steps, each transform -step can represent a different way to alter the data. The array of transforms link -togeather into a pipe that makes multiple alterations to messages as they are -passed along. There are a number of different transform steps types that can -be done in a transform pipeline these include:

- -

Script structure

-

Pipeline File

-

An sifter pipeline file is in YAML format and describes an entire processing pipelines. -If is composed of the following sections: config, inputs, pipelines, outputs. In addition, -for tracking, the file will also include name and class entries.

-

-class: sifter
-name: <script name>
-outdir: <where output files should go, relative to this file>
-
-config:
-  <config key>: <config value>
-  <config key>: <config value> 
-  # values that are referenced in pipeline parameters for 
-  # files will be treated like file paths and be 
-  # translated to full paths
-
-inputs:
-  <input name>:
-    <input driver>:
-      <driver config>
-
-pipelines:
-  <pipeline name>:
-    # all pipelines must start with a from step
-    - from: <name of input or pipeline> 
-    - <transform name>:
-       <transform parameters>
-
-outputs:
-  <output name>:
-    <output driver>:
-      <driver config>
-
-

Each sifter file starts with a set of field to let the software know this is a sifter script, and not some random YAML file. There is also a name field for the script. This name will be used for output file creation and logging. Finally, there is an outdir that defines the directory where all output files will be placed. All paths are relative to the script file, so the outdir set to my-results will create the directory my-results in the same directory as the script file, regardless of where the sifter command is invoked.

-
class : sifter
-name: <name of script>
-outdir: <where files should be stored>
-

Config and templating

-

The config section is a set of defined keys that are used throughout the rest of the script.

-

Example config:

-
config:
-  sqlite:  ../../source/chembl/chembl_33/chembl_33_sqlite/chembl_33.db
-  uniprot2ensembl: ../../tables/uniprot2ensembl.tsv
-  schema: ../../schema/
-

Various fields in the script file will be be parsed using a Mustache template engine. For example, to access the various values within the config block, the template {{config.sqlite}}.

-

Inputs

-

The input block defines the various data extractors that will be used to open resources and create streams of JSON messages for processing. The possible input engines include:

- -

For any other file types, there is also a plugin option to allow the user to call their own code for opening files.

-

Pipeline

-

The pipelines defined a set of named processing pipelines that can be used to transform data. Each pipeline starts with a from statement that defines where data comes from. It then defines a linear set of transforms that are chained togeather to do processing. Pipelines may used emit steps to output messages to disk. The possible data transform steps include:

- -

Additionally, users are able to define their one transform step types using the plugin step.

-

Example script

-
class: sifter
-
-name: go
-outdir: ../../output/go/
-
-config:
-  oboFile: ../../source/go/go.obo
-  schema: ../../schema
-
-inputs:
-  oboData:
-    plugin:
-      commandLine: ../../util/obo_reader.py {{config.oboFile}}
-
-pipelines:
-  transform:
-    - from: oboData
-    - project:
-        mapping:
-          submitter_id: "{{row.id[0]}}"
-          case_id: "{{row.id[0]}}"
-          id: "{{row.id[0]}}"
-          go_id: "{{row.id[0]}}"
-          project_id: "gene_onotology"
-          namespace: "{{row.namespace[0]}}"
-          name: "{{row.name[0]}}"
-    - map: 
-        method: fix
-        gpython: | 
-          def fix(row):
-            row['definition'] = row['def'][0].strip('"')
-            if 'xref' not in row:
-              row['xref'] = []
-            if 'synonym' not in row:
-              row['synonym'] = []
-            return row
-    - objectValidate:
-        title: GeneOntologyTerm
-        schema: "{{config.schema}}"
-    - emit:
-        name: term
-
-
- -
- - diff --git a/website/content/docs.md b/docs/docs/index.md similarity index 81% rename from website/content/docs.md rename to docs/docs/index.md index d45d044..2601927 100644 --- a/website/content/docs.md +++ b/docs/docs/index.md @@ -1,17 +1,13 @@ - --- -title: Overview -menu: - main: - identifier: overview - weight: 1 +title: Sifter --- -# Sifter pipelines -Sifter pipelines process steams of nested JSON messages. Sifter comes with a number of +# Sifter + +Sifter is a stream based processing engine. It comes with a number of file extractors that operate as inputs to these pipelines. The pipeline engine -connects togeather arrays of transform steps into directed acylic graph that is processed +connects togeather several processing data into directed acylic graph that is processed in parallel. Example Message: @@ -43,7 +39,7 @@ be done in a transform pipeline these include: # Pipeline File An sifter pipeline file is in YAML format and describes an entire processing pipelines. -If is composed of the following sections: `config`, `inputs`, `pipelines`, `outputs`. In addition, +If is composed of the following sections: `params`, `inputs`, `pipelines`, `outputs`. In addition, for tracking, the file will also include `name` and `class` entries. ```yaml @@ -52,9 +48,12 @@ class: sifter name: - - - - - - - - -
- - - -
-

avroLoad

-

Load an AvroFile

-

Parameters

- - - - - - - - - - - - - -
nameDescription
inputPath to input file
- -
- -
- - diff --git a/website/content/docs/inputs/embedded.md b/docs/docs/inputs/embedded.md similarity index 100% rename from website/content/docs/inputs/embedded.md rename to docs/docs/inputs/embedded.md diff --git a/docs/docs/inputs/embedded/index.html b/docs/docs/inputs/embedded/index.html deleted file mode 100644 index b1fd8ac..0000000 --- a/docs/docs/inputs/embedded/index.html +++ /dev/null @@ -1,387 +0,0 @@ - - - - - - - - - - - embedded · Sifter - - - - - - - - - - - - - - - - - - - - - - - -
- - - -
-

embedded

-

Load data from embedded structure

-

Example

-
inputs:
-  data:
-    embedded:
-      - { "name" : "Alice", "age": 28 }
-      - { "name" : "Bob", "age": 27 }
-
-
- -
- - diff --git a/website/content/docs/inputs/glob.md b/docs/docs/inputs/glob.md similarity index 59% rename from website/content/docs/inputs/glob.md rename to docs/docs/inputs/glob.md index 5c20fdc..451d5ea 100644 --- a/website/content/docs/inputs/glob.md +++ b/docs/docs/inputs/glob.md @@ -16,10 +16,10 @@ as input. |-------|--------| | storeFilename | Store value of filename in parameter each row | | input | Path of avro object file to transform | -| xmlLoad | xmlLoad configutation | -| tableLoad | Run transform pipeline on a TSV or CSV | -| jsonLoad | Run a transform pipeline on a multi line json file | -| avroLoad | Load data from avro file | +| xml | xmlLoad configutation | +| table | Run transform pipeline on a TSV or CSV | +| json | Run a transform pipeline on a multi line json file | +| avro | Load data from avro file | ## Example @@ -27,7 +27,7 @@ as input. inputs: pubmedRead: glob: - input: "{{config.baseline}}/*.xml.gz" - xmlLoad: {} + path: "{{params.baseline}}/*.xml.gz" + xml: {} ``` \ No newline at end of file diff --git a/docs/docs/inputs/glob/index.html b/docs/docs/inputs/glob/index.html deleted file mode 100644 index 5039085..0000000 --- a/docs/docs/inputs/glob/index.html +++ /dev/null @@ -1,423 +0,0 @@ - - - - - - - - - - - glob · Sifter - - - - - - - - - - - - - - - - - - - - - - - -
- - - -
-

glob

-

Scan files using * based glob statement and open all files -as input.

-

Parameters

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
NameDescription
storeFilenameStore value of filename in parameter each row
inputPath of avro object file to transform
xmlLoadxmlLoad configutation
tableLoadRun transform pipeline on a TSV or CSV
jsonLoadRun a transform pipeline on a multi line json file
avroLoadLoad data from avro file
-

Example

-
inputs:
-  pubmedRead:
-    glob:
-      input: "{{config.baseline}}/*.xml.gz"
-      xmlLoad: {}
-
-
- -
- - diff --git a/docs/docs/inputs/gripperload/index.html b/docs/docs/inputs/gripperload/index.html deleted file mode 100644 index 2d720cb..0000000 --- a/docs/docs/inputs/gripperload/index.html +++ /dev/null @@ -1,350 +0,0 @@ - - - - - - - - - - - gripperLoad · Sifter - - - - - - - - - - - - - - - - - - - - - - - -
- - - -
- -
- -
- - diff --git a/docs/docs/inputs/index.html b/docs/docs/inputs/index.html deleted file mode 100644 index f689cfb..0000000 --- a/docs/docs/inputs/index.html +++ /dev/null @@ -1,380 +0,0 @@ - - - - - - - - - - - Inputs · Sifter - - - - - - - - - - - - - - - - - - - - - - - -
- - - -
-

Every playbook consists of a series of inputs.

- -
- -
- - diff --git a/docs/docs/inputs/index.md b/docs/docs/inputs/index.md new file mode 100644 index 0000000..f858b3c --- /dev/null +++ b/docs/docs/inputs/index.md @@ -0,0 +1,56 @@ +--- +title: Inputs +menu: + main: + identifier: inputs + weight: 4 +--- + +Every playbook has a section of **input loaders** – components that read raw data (files, APIs, databases, etc.) and convert it into Python objects for downstream steps. +An *input* can accept user‑supplied values passed by the **params** section. + +## Common input types + +* `table` – extracts data from tabular files (TSV/CSV) +* `avro` – loads an Avro file (see `docs/docs/inputs/avro.md`) +* `json`, `csv`, `sql`, etc. + +## Example – `table` + +The `table` loader is a good starting point because it demonstrates the typical parameter set required by most inputs. See the full specification in `docs/docs/inputs/table.md`: + +```yaml +params: + gafFile: + type: File + default: ../../source/go/goa_human.gaf.gz + +inputs: + gafLoad: + tableLoad: + path: "{{params.gafFile}}" + columns: + - db + - id + - symbol + - qualifier + - goID + - reference + - evidenceCode + - from + - aspect + - name + - synonym + - objectType + - taxon + - date + - assignedBy + - extension + - geneProduct +``` + +When you run the playbook you can override any of these parameters, e.g.: + +```bash +sifter run gatplaybook.yaml --param gafFile=/tmp/mydata.tsv +``` diff --git a/website/content/docs/inputs/jsonLoad.md b/docs/docs/inputs/json.md similarity index 77% rename from website/content/docs/inputs/jsonLoad.md rename to docs/docs/inputs/json.md index 42a4a2b..cbbe45c 100644 --- a/website/content/docs/inputs/jsonLoad.md +++ b/docs/docs/inputs/json.md @@ -1,19 +1,19 @@ --- -title: jsonLoad +title: json menu: main: parent: inputs weight: 100 --- -# jsonLoad +# json Load data from a JSON file. Default behavior expects a single dictionary per line. Each line is a seperate entry. The `multiline` parameter reads all of the lines of the files and returns a single object. ## Parameters | name | Description | | --- | --- | -| input | Path of JSON file to transform | +| path | Path of JSON file to transform | | multiline | Load file as a single multiline JSON object | @@ -22,6 +22,6 @@ Load data from a JSON file. Default behavior expects a single dictionary per lin ```yaml inputs: caseData: - jsonLoad: - input: "{{config.casesJSON}}" + json: + path: "{{params.casesJSON}}" ``` \ No newline at end of file diff --git a/docs/docs/inputs/jsonload/index.html b/docs/docs/inputs/jsonload/index.html deleted file mode 100644 index 012d9ba..0000000 --- a/docs/docs/inputs/jsonload/index.html +++ /dev/null @@ -1,405 +0,0 @@ - - - - - - - - - - - jsonLoad · Sifter - - - - - - - - - - - - - - - - - - - - - - - -
- - - -
-

jsonLoad

-

Load data from a JSON file. Default behavior expects a single dictionary per line. Each line is a seperate entry. The multiline parameter reads all of the lines of the files and returns a single object.

-

Parameters

- - - - - - - - - - - - - - - - - -
nameDescription
inputPath of JSON file to transform
multilineLoad file as a single multiline JSON object
-

Example

-
inputs:
-  caseData:
-    jsonLoad:
-      input: "{{config.casesJSON}}"
-
-
- -
- - diff --git a/website/content/docs/inputs/plugin.md b/docs/docs/inputs/plugin.md similarity index 96% rename from website/content/docs/inputs/plugin.md rename to docs/docs/inputs/plugin.md index 2036c9b..c829887 100644 --- a/website/content/docs/inputs/plugin.md +++ b/docs/docs/inputs/plugin.md @@ -15,7 +15,7 @@ Run user program for customized data extraction. inputs: oboData: plugin: - commandLine: ../../util/obo_reader.py {{config.oboFile}} + commandLine: ../../util/obo_reader.py {{params.oboFile}} ``` The plugin program is expected to output JSON messages, one per line, to STDOUT that will then diff --git a/docs/docs/inputs/plugin/index.html b/docs/docs/inputs/plugin/index.html deleted file mode 100644 index 3c35ee2..0000000 --- a/docs/docs/inputs/plugin/index.html +++ /dev/null @@ -1,437 +0,0 @@ - - - - - - - - - - - input plugin · Sifter - - - - - - - - - - - - - - - - - - - - - - - -
- - - -
-

plugin

-

Run user program for customized data extraction.

-

Example

-
inputs:
-  oboData:
-    plugin:
-      commandLine: ../../util/obo_reader.py {{config.oboFile}}
-

The plugin program is expected to output JSON messages, one per line, to STDOUT that will then -be passed to the transform pipelines.

-

Example Plugin

-

The obo_reader.py plugin, it reads a OBO file, such as the kind the describe the GeneOntology, and emits the -records as single line JSON messages.

-
 #!/usr/bin/env python
-
-import re
-import sys
-import json
-
-re_section = re.compile(r'^\[(.*)\]')
-re_field = re.compile(r'^(\w+): (.*)$')
-
-def obo_parse(handle):
-    rec = None
-    for line in handle:
-        res = re_section.search(line)
-        if res:
-            if rec is not None:
-                yield rec
-            rec = None
-            if res.group(1) == "Term":
-                rec = {"type": res.group(1)}
-        else:
-            if rec is not None:
-                res = re_field.search(line)
-                if res:
-                    key = res.group(1)
-                    val = res.group(2)
-                    val = re.split(" ! | \(|\)", val)
-                    val = ":".join(val[0:3])
-                    if key in rec:
-                        rec[key].append(val)
-                    else:
-                        rec[key] = [val]
-
-    if rec is not None:
-        yield rec
-
-
-def unquote(s):
-    res = re.search(r'"(.*)"', s)
-    if res:
-        return res.group(1)
-    return s
-
-
-with open(sys.argv[1]) as handle:
-    for rec in obo_parse(handle):
-        print(json.dumps(rec))
-
-
- -
- - diff --git a/website/content/docs/inputs/sqldump.md b/docs/docs/inputs/sqldump.md similarity index 85% rename from website/content/docs/inputs/sqldump.md rename to docs/docs/inputs/sqldump.md index 1a958d0..174a966 100644 --- a/website/content/docs/inputs/sqldump.md +++ b/docs/docs/inputs/sqldump.md @@ -13,7 +13,7 @@ Scan file produced produced from sqldump. | Name | Type | Description | |-------|---|--------| -| input | string | Path to the SQL dump file | +| path | string | Path to the SQL dump file | | tables | []string | Names of tables to read out | ## Example @@ -22,7 +22,7 @@ Scan file produced produced from sqldump. inputs: database: sqldumpLoad: - input: "{{config.sql}}" + path: "{{params.sql}}" tables: - cells - cell_tissues diff --git a/docs/docs/inputs/sqldump/index.html b/docs/docs/inputs/sqldump/index.html deleted file mode 100644 index 44b2a7c..0000000 --- a/docs/docs/inputs/sqldump/index.html +++ /dev/null @@ -1,416 +0,0 @@ - - - - - - - - - - - sqldump · Sifter - - - - - - - - - - - - - - - - - - - - - - - -
- - - -
-

sqlDump

-

Scan file produced produced from sqldump.

-

Parameters

- - - - - - - - - - - - - - - - - - - - -
NameTypeDescription
inputstringPath to the SQL dump file
tables[]stringNames of tables to read out
-

Example

-
inputs:
-  database:
-    sqldumpLoad:
-      input: "{{config.sql}}"
-      tables:
-        - cells
-        - cell_tissues
-        - dose_responses
-        - drugs
-        - drug_annots
-        - experiments
-        - profiles
-
-
- -
- - diff --git a/website/content/docs/inputs/sqliteLoad.md b/docs/docs/inputs/sqlite.md similarity index 83% rename from website/content/docs/inputs/sqliteLoad.md rename to docs/docs/inputs/sqlite.md index b0f323f..0fb66fb 100644 --- a/website/content/docs/inputs/sqliteLoad.md +++ b/docs/docs/inputs/sqlite.md @@ -1,12 +1,12 @@ --- -title: sqliteLoad +title: sqlite menu: main: parent: inputs weight: 100 --- -# sqliteLoad +# sqlite Extract data from an sqlite file @@ -14,7 +14,7 @@ Extract data from an sqlite file | Name | Type | Description | |-------|---|--------| -| input | string | Path to the SQLite file | +| path | string | Path to the SQLite file | | query | string | SQL select statement based input | ## Example @@ -24,7 +24,7 @@ Extract data from an sqlite file inputs: sqlQuery: sqliteLoad: - input: "{{config.sqlite}}" + path: "{{params.sqlite}}" query: "select * from drug_mechanism as a LEFT JOIN MECHANISM_REFS as b on a.MEC_ID=b.MEC_ID LEFT JOIN TARGET_COMPONENTS as c on a.TID=c.TID LEFT JOIN COMPONENT_SEQUENCES as d on c.COMPONENT_ID=d.COMPONENT_ID LEFT JOIN MOLECULE_DICTIONARY as e on a.MOLREGNO=e.MOLREGNO" ``` \ No newline at end of file diff --git a/docs/docs/inputs/sqliteload/index.html b/docs/docs/inputs/sqliteload/index.html deleted file mode 100644 index fd759fb..0000000 --- a/docs/docs/inputs/sqliteload/index.html +++ /dev/null @@ -1,410 +0,0 @@ - - - - - - - - - - - sqliteLoad · Sifter - - - - - - - - - - - - - - - - - - - - - - - -
- - - -
-

sqliteLoad

-

Extract data from an sqlite file

-

Parameters

- - - - - - - - - - - - - - - - - - - - -
NameTypeDescription
inputstringPath to the SQLite file
querystringSQL select statement based input
-

Example

-

-inputs:
-  sqlQuery:
-    sqliteLoad:
-      input: "{{config.sqlite}}"
-      query: "select * from drug_mechanism as a LEFT JOIN MECHANISM_REFS as b on a.MEC_ID=b.MEC_ID LEFT JOIN TARGET_COMPONENTS as c on a.TID=c.TID LEFT JOIN COMPONENT_SEQUENCES as d on c.COMPONENT_ID=d.COMPONENT_ID LEFT JOIN MOLECULE_DICTIONARY as e on a.MOLREGNO=e.MOLREGNO"
-
-
- -
- - diff --git a/website/content/docs/inputs/tableLoad.md b/docs/docs/inputs/table.md similarity index 82% rename from website/content/docs/inputs/tableLoad.md rename to docs/docs/inputs/table.md index 57eaf28..ce754b1 100644 --- a/website/content/docs/inputs/tableLoad.md +++ b/docs/docs/inputs/table.md @@ -1,12 +1,12 @@ --- -title: tableLoad +title: table menu: main: parent: inputs weight: 100 --- -# tableLoad +# table Extract data from tabular file, includiong TSV and CSV files. @@ -14,7 +14,7 @@ Extract data from tabular file, includiong TSV and CSV files. | Name | Type | Description | |-------|---|--------| -| input | string | File to be transformed | +| path | string | File to be transformed | | rowSkip | int | Number of header rows to skip | | columns | []string | Manually set names of columns | | extraColumns | string | Columns beyond originally declared columns will be placed in this array | @@ -25,13 +25,15 @@ Extract data from tabular file, includiong TSV and CSV files. ```yaml -config: - gafFile: ../../source/go/goa_human.gaf.gz +params: + gafFile: + default: ../../source/go/goa_human.gaf.gz + type: File inputs: gafLoad: tableLoad: - input: "{{config.gafFile}}" + path: "{{params.gafFile}}" columns: - db - id diff --git a/docs/docs/inputs/tableload/index.html b/docs/docs/inputs/tableload/index.html deleted file mode 100644 index 4341148..0000000 --- a/docs/docs/inputs/tableload/index.html +++ /dev/null @@ -1,445 +0,0 @@ - - - - - - - - - - - tableLoad · Sifter - - - - - - - - - - - - - - - - - - - - - - - -
- - - -
-

tableLoad

-

Extract data from tabular file, includiong TSV and CSV files.

-

Parameters

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
NameTypeDescription
inputstringFile to be transformed
rowSkipintNumber of header rows to skip
columns[]stringManually set names of columns
extraColumnsstringColumns beyond originally declared columns will be placed in this array
sepstringSeparator \t for TSVs or , for CSVs
-

Example

-

-config:
-  gafFile: ../../source/go/goa_human.gaf.gz
-
-inputs:
-  gafLoad:
-    tableLoad:
-      input: "{{config.gafFile}}"
-      columns:
-        - db
-        - id
-        - symbol
-        - qualifier
-        - goID
-        - reference
-        - evidenceCode
-        - from
-        - aspect
-        - name
-        - synonym
-        - objectType
-        - taxon
-        - date
-        - assignedBy
-        - extension
-        - geneProduct
-
-
- -
- - diff --git a/website/content/docs/inputs/xmlLoad.md b/docs/docs/inputs/xml.md similarity index 67% rename from website/content/docs/inputs/xmlLoad.md rename to docs/docs/inputs/xml.md index ed8c306..66cc092 100644 --- a/website/content/docs/inputs/xmlLoad.md +++ b/docs/docs/inputs/xml.md @@ -1,19 +1,19 @@ --- -title: xmlLoad +title: xml menu: main: parent: inputs weight: 100 --- -# xmlLoad +# xml Load an XML file ## Parameters | name | Description | | --- | --- | -| input | Path to input file | +| path | Path to input file | ## Example @@ -21,5 +21,5 @@ Load an XML file inputs: loader: xmlLoad: - input: "{{config.xmlPath}}" + path: "{{params.xmlPath}}" ``` \ No newline at end of file diff --git a/docs/docs/inputs/xmlload/index.html b/docs/docs/inputs/xmlload/index.html deleted file mode 100644 index 0266f3f..0000000 --- a/docs/docs/inputs/xmlload/index.html +++ /dev/null @@ -1,401 +0,0 @@ - - - - - - - - - - - xmlLoad · Sifter - - - - - - - - - - - - - - - - - - - - - - - -
- - - -
-

xmlLoad

-

Load an XML file

-

Parameters

- - - - - - - - - - - - - -
nameDescription
inputPath to input file
-

Example

-
inputs:
-  loader:
-    xmlLoad:
-      input: "{{config.xmlPath}}"
-
-
- -
- - diff --git a/website/content/docs/transforms/graphBuild.md b/docs/docs/outputs/graphBuild.md similarity index 77% rename from website/content/docs/transforms/graphBuild.md rename to docs/docs/outputs/graphBuild.md index d2bef50..15bdd89 100644 --- a/website/content/docs/transforms/graphBuild.md +++ b/docs/docs/outputs/graphBuild.md @@ -6,7 +6,7 @@ menu: weight: 100 --- -# graphBuild +# Output: graphBuild Build graph elements from JSON objects using the JSON Schema graph extensions. @@ -14,6 +14,6 @@ Build graph elements from JSON objects using the JSON Schema graph extensions. # example ```yaml - graphBuild: - schema: "{{config.allelesSchema}}" + schema: "{{params.allelesSchema}}" title: Allele ``` \ No newline at end of file diff --git a/docs/docs/outputs/json.md b/docs/docs/outputs/json.md new file mode 100644 index 0000000..d6e93e2 --- /dev/null +++ b/docs/docs/outputs/json.md @@ -0,0 +1,26 @@ +--- +title: json +menu: + main: + parent: transforms + weight: 100 +--- + +# Output: json + +Send data to output file. The naming of the file is `outdir`/`path` + +## Parameters + +| name | Type | Description | +| --- | --- | --- | +| path | string | Path to output file | + +## example + +```yaml +output: + outfile: + json: + path: protein_compound_association.ndjson +``` \ No newline at end of file diff --git a/website/content/docs/transforms/tableWrite.md b/docs/docs/outputs/tableWrite.md similarity index 100% rename from website/content/docs/transforms/tableWrite.md rename to docs/docs/outputs/tableWrite.md diff --git a/docs/docs/playbook/index.html b/docs/docs/playbook/index.html deleted file mode 100644 index 4fa4897..0000000 --- a/docs/docs/playbook/index.html +++ /dev/null @@ -1,382 +0,0 @@ - - - - - - - - - - - Sifter Pipeline File · Sifter - - - - - - - - - - - - - - - - - - - - - - - -
- - - -
-

Pipeline File

-

An sifter pipeline file is in YAML format and describes an entire processing pipelines. -If is composed of the following sections: config, inputs, pipelines, outputs. In addition, -for tracking, the file will also include name and class entries.

-

-class: sifter
-name: <script name>
-outdir: <where output files should go, relative to this file>
-
-config:
-  <config key>: <config value>
-  <config key>: <config value> 
-  # values that are referenced in pipeline parameters for 
-  # files will be treated like file paths and be 
-  # translated to full paths
-
-inputs:
-  <input name>:
-    <input driver>:
-      <driver config>
-
-pipelines:
-  <pipeline name>:
-    # all pipelines must start with a from step
-    - from: <name of input or pipeline> 
-    - <transform name>:
-       <transform parameters>
-
-outputs:
-  <output name>:
-    <output driver>:
-      <driver config>
-
-
- -
- - diff --git a/docs/docs/schema.md b/docs/docs/schema.md new file mode 100644 index 0000000..6faf8bd --- /dev/null +++ b/docs/docs/schema.md @@ -0,0 +1,149 @@ +--- +title: Schema +--- + +# Sifter Playbook Schema + +This document provides a comprehensive description of the Sifter Playbook format, its input methods (extractors), and its transformation steps. + +## Playbook Structure + +A Playbook is a YAML file that defines an ETL pipeline. + +| Field | Type | Description | +| :--- | :--- | :--- | +| `class` | string | Should be `sifter`. | +| `name` | string | Unique name of the playbook. | +| `docs` | string | Documentation string for the playbook. | +| `outdir` | string | Default output directory for emitted files. | +| `params` | map | Configuration variables with optional defaults and types (`File`, `Dir`). | +| `inputs` | map | Named extractor definitions. | +| `outputs` | map | Named outputs definitions. | +| `pipelines` | map | Named transformation pipelines (arrays of steps). | + +--- + +## Parameters (`params`) + + +Parameters allow playbooks to be parameterized. They are defined in the `params` section of the playbook YAML file. + +### Params Syntax +```yaml +params: + variableName: + type: File # or Dir + default: "path/to/default" +``` + +### Supported Types +- `File`: Represents a file path +- `Dir`: Represents a directory path + +```yaml +params: + inputDir: + type: Dir + default: "./data/input" + outputDir: + type: Dir + default: "./data/output" + schemaFile: + type: File + default: "./config/schema.json" +``` + + +## Input Methods (Extractors) + +Extractors produce a stream of messages from various sources. + +### `table` +Loads data from a delimited file (TSV/CSV). +- `path`: Path to the file. +- `rowSkip`: Number of header rows to skip. +- `columns`: Optional list of column names. +- `extraColumns`: Field name to store any columns beyond the declared ones. +- `sep`: Separator (default `\t` for TSVs, `,` for CSVs). + +### `json` +Loads data from a JSON file (standard or line-delimited). +- `path`: Path to the file. +- `multiline`: Load file as a single multiline JSON object. + +### `avro` +Loads data from an Avro object file. +- `path`: Path to the file. + +### `xml` +Loads and parses XML data. +- `path`: Path to the file. +- `level`: Depth level to start breaking XML into discrete messages. + +### `sqlite` +Loads data from a SQLite database. +- `path`: Path to the database file. +- `query`: SQL SELECT statement. + +### `transpose` +Loads a TSV and transposes it (making rows from columns). +- `path`: Path to the file. +- `rowSkip`: Rows to skip. +- `sep`: Separator. +- `useDB`: Use a temporary disk database for large transpositions. + +### `plugin` (Extractor) +Runs an external command that produces JSON messages to stdout. +- `commandLine`: The command to execute. + +### `embedded` (Extractor) +Load data from embedded structure. +- No parameters required. + +### `glob` (Extractor) +Scan files using `*` based glob statement and open all files as input. +- `path`: Path of avro object file to transform. +- `storeFilename`: Store value of filename in parameter each row. +- `xml`: xmlLoad data. +- `table`: Run transform pipeline on a TSV or CSV. +- `json`: Run a transform pipeline on a multi line json file. +- `avro`: Load data from avro file. + +--- + +## Transformation Steps + +Transformation pipelines are arrays of steps. Each step can be one of the following: + +### Core Processing +- `from`: Start a pipeline from a named input or another pipeline. +- `emit`: Write messages to a JSON file. Fields: `name`, `useName` (bool). +- `objectValidate`: Validate messages against a JSON schema. Fields: `title`, `schema` (directory), `uri`. +- `debug`: Print message contents to stdout. Fields: `label`, `format`. +- `plugin` (Transform): Pipe messages through an external script via stdin/stdout. Fields: `commandLine`. + +### Mapping and Projection +- `project`: Map templates into new fields. Fields: `mapping` (key-template pairs), `rename` (simple rename). +- `map`: Apply a Python/GPython function to each record. Fields: `method` (function name), `python` (code string), `gpython` (path or code). +- `flatMap`: Similar to `map`, but flattens list responses into multiple messages. +- `fieldParse`: Parse a string field (e.g. `key1=val1;key2=val2`) into individual keys. Fields: `field`, `sep`. +- `fieldType`: Cast fields to specific types (`int`, `float`, `list`). Represented as a map of `fieldName: type`. + +### Filtering and Cleaning +- `filter`: Drop messages based on criteria. Fields: `field`, `value`, `match`, `check` (`exists`/`hasValue`/`not`), or `python`/`gpython` code. +- `clean`: Remove fields. Fields: `fields` (list of kept fields), `removeEmpty` (bool), `storeExtra` (target field for extras). +- `dropNull`: Remove fields with `null` values from a message. +- `distinct`: Only emit messages with a unique value once. Field: `value` (template). + +### Grouping and Lookups +- `reduce`: Merge messages sharing a key. Fields: `field` (key), `method`, `python`/`gpython`, `init` (initial data). +- `accumulate`: Group all messages sharing a key into a list. Fields: `field` (key), `dest` (target list field). +- `lookup`: Join data from external files (TSV/JSON). Fields: `tsv`, `json`, `replace`, `lookup`, `copy` (mapping of fields to copy). +- `intervalIntersect`: Match genomic intervals. Fields: `match` (CHR), `start`, `end`, `field` (dest), `json` (source file). + +### Specialized +- `hash`: Generate a hash of a field. Fields: `field` (dest), `value` (template), `method` (`md5`, `sha1`, `sha256`). +- `uuid`: Generate a UUID. Fields: `field`, `value` (seed), `namespace`. +- `graphBuild`: Convert messages into graph vertices and edges using schema definitions. Fields: `schema`, `title`. +- `tableWrite`: Write specific fields to a delimited output file. Fields: `output`, `columns`, `sep`, `header`, `skipColumnHeader`. +- `split`: Split a single message into multiple based on a list field. diff --git a/website/content/docs/transforms/accumulate.md b/docs/docs/transforms/accumulate.md similarity index 100% rename from website/content/docs/transforms/accumulate.md rename to docs/docs/transforms/accumulate.md diff --git a/docs/docs/transforms/accumulate/index.html b/docs/docs/transforms/accumulate/index.html deleted file mode 100644 index 06bda8c..0000000 --- a/docs/docs/transforms/accumulate/index.html +++ /dev/null @@ -1,407 +0,0 @@ - - - - - - - - - - - accumulate · Sifter - - - - - - - - - - - - - - - - - - - - - - - -
- - - -
-

accumulate

-

Gather sequential rows into a single record, based on matching a field

-

Parameters

- - - - - - - - - - - - - - - - - - - - -
nameTypeDescription
fieldstring (field path)Field used to match rows
deststringfield to store accumulated records
-

Example

-
  - accumulate:
-      field: model_id
-      dest: rows   
-
-
- -
- - diff --git a/website/content/docs/transforms/clean.md b/docs/docs/transforms/clean.md similarity index 100% rename from website/content/docs/transforms/clean.md rename to docs/docs/transforms/clean.md diff --git a/docs/docs/transforms/clean/index.html b/docs/docs/transforms/clean/index.html deleted file mode 100644 index 69bd418..0000000 --- a/docs/docs/transforms/clean/index.html +++ /dev/null @@ -1,413 +0,0 @@ - - - - - - - - - - - clean · Sifter - - - - - - - - - - - - - - - - - - - - - - - -
- - - -
-

clean

-

Remove fields that don’t appear in the desingated list.

-

Parameters

- - - - - - - - - - - - - - - - - - - - - - - - - -
nameTypeDescription
fields[] stringFields to keep
removeEmptyboolFields with empty values will also be removed
storeExtrastringField name to store removed fields
-

Example

-
    - clean:
-        fields:
-          - id
-          - synonyms
-
-
- -
- - diff --git a/website/content/docs/transforms/debug.md b/docs/docs/transforms/debug.md similarity index 83% rename from website/content/docs/transforms/debug.md rename to docs/docs/transforms/debug.md index 5c87eae..e8479aa 100644 --- a/website/content/docs/transforms/debug.md +++ b/docs/docs/transforms/debug.md @@ -1,9 +1,5 @@ --- title: debug -menu: - main: - parent: transforms - weight: 100 --- # debug diff --git a/docs/docs/transforms/debug/index.html b/docs/docs/transforms/debug/index.html deleted file mode 100644 index 58cd159..0000000 --- a/docs/docs/transforms/debug/index.html +++ /dev/null @@ -1,405 +0,0 @@ - - - - - - - - - - - debug · Sifter - - - - - - - - - - - - - - - - - - - - - - - -
- - - -
-

debug

-

Print out copy of stream to logging

-

Parameters

- - - - - - - - - - - - - - - - - - - - -
nameTypeDescription
labelstringLabel for log output
formatboolUse multiline spaced output
-

Example

-
    - debug: {}
-
-
- -
- - diff --git a/website/content/docs/transforms/distinct.md b/docs/docs/transforms/distinct.md similarity index 100% rename from website/content/docs/transforms/distinct.md rename to docs/docs/transforms/distinct.md diff --git a/docs/docs/transforms/distinct/index.html b/docs/docs/transforms/distinct/index.html deleted file mode 100644 index 092a2e0..0000000 --- a/docs/docs/transforms/distinct/index.html +++ /dev/null @@ -1,401 +0,0 @@ - - - - - - - - - - - distinct · Sifter - - - - - - - - - - - - - - - - - - - - - - - -
- - - -
-

distinct

-

Using templated value, allow only the first record for each distinct key

-

Parameters

- - - - - - - - - - - - - - - -
nameTypeDescription
valuestringKey used for distinct value
-

Example

-
    - distinct:
-        value: "{{row.key}}"
-
-
- -
- - diff --git a/docs/docs/transforms/emit/index.html b/docs/docs/transforms/emit/index.html deleted file mode 100644 index a0db035..0000000 --- a/docs/docs/transforms/emit/index.html +++ /dev/null @@ -1,401 +0,0 @@ - - - - - - - - - - - emit · Sifter - - - - - - - - - - - - - - - - - - - - - - - -
- - - -
-

emit

-

Send data to output file. The naming of the file is outdir/script name.pipeline name.emit name.json.gz

-

Parameters

- - - - - - - - - - - - - - - -
nameTypeDescription
namestringName of emit value
-

example

-
    - emit:
-        name: protein_compound_association
-
-
- -
- - diff --git a/docs/docs/transforms/fieldParse.md b/docs/docs/transforms/fieldParse.md new file mode 100644 index 0000000..4be41ff --- /dev/null +++ b/docs/docs/transforms/fieldParse.md @@ -0,0 +1,26 @@ +--- +title: fieldParse +menu: + main: + parent: transforms + weight: 100 +--- + +# fieldParse + +Parse a string field (e.g. `key1=val1;key2=val2`) into individual keys. + +## Parameters + +| Name | Type | Description | +| --- | --- | --- | +| field | string | The field containing the string to be parsed | +| sep | string | Separator character used to split the string | + +## Example + +```yaml + - fieldParse: + field: attributes + sep: ";" +``` diff --git a/website/content/docs/transforms/fieldProcess.md b/docs/docs/transforms/fieldProcess.md similarity index 100% rename from website/content/docs/transforms/fieldProcess.md rename to docs/docs/transforms/fieldProcess.md diff --git a/website/content/docs/transforms/fieldType.md b/docs/docs/transforms/fieldType.md similarity index 100% rename from website/content/docs/transforms/fieldType.md rename to docs/docs/transforms/fieldType.md diff --git a/docs/docs/transforms/fieldparse/index.html b/docs/docs/transforms/fieldparse/index.html deleted file mode 100644 index 122316b..0000000 --- a/docs/docs/transforms/fieldparse/index.html +++ /dev/null @@ -1,379 +0,0 @@ - - - - - - - - - - - fieldParse · Sifter - - - - - - - - - - - - - - - - - - - - - - - -
- - - -
- -
- -
- - diff --git a/docs/docs/transforms/fieldprocess/index.html b/docs/docs/transforms/fieldprocess/index.html deleted file mode 100644 index c2085b0..0000000 --- a/docs/docs/transforms/fieldprocess/index.html +++ /dev/null @@ -1,415 +0,0 @@ - - - - - - - - - - - fieldProcess · Sifter - - - - - - - - - - - - - - - - - - - - - - - -
- - - -
-

fieldProcess

-

Create stream of objects based on the contents of a field. If the selected field is an array -each of the items in the array will become an independent row.

-

Parameters

- - - - - - - - - - - - - - - - - - - - - - - - - -
nameTypeDescription
fieldstringName of field to be processed
mappingmap[string]stringProject templated values into child element
itemFieldstringIf processing an array of non-dict elements, create a dict as {itemField:element}
-

example

-
    - fieldProcess:
-        field: portions
-        mapping:
-          sample: "{{row.sample_id}}"
-          project_id: "{{row.project_id}}"
-
-
- -
- - diff --git a/docs/docs/transforms/fieldtype/index.html b/docs/docs/transforms/fieldtype/index.html deleted file mode 100644 index fb7cf6f..0000000 --- a/docs/docs/transforms/fieldtype/index.html +++ /dev/null @@ -1,391 +0,0 @@ - - - - - - - - - - - fieldType · Sifter - - - - - - - - - - - - - - - - - - - - - - - -
- - - -
-

fieldType

-

Set field to specific type, ie cast as float or integer

-

example

-

-    - fieldType:
-        t_depth: int
-        t_ref_count: int
-        t_alt_count: int
-        n_depth: int
-        n_ref_count: int
-        n_alt_count: int
-        start: int
-
-
- -
- - diff --git a/website/content/docs/transforms/filter.md b/docs/docs/transforms/filter.md similarity index 100% rename from website/content/docs/transforms/filter.md rename to docs/docs/transforms/filter.md diff --git a/docs/docs/transforms/filter/index.html b/docs/docs/transforms/filter/index.html deleted file mode 100644 index f47fe08..0000000 --- a/docs/docs/transforms/filter/index.html +++ /dev/null @@ -1,437 +0,0 @@ - - - - - - - - - - - filter · Sifter - - - - - - - - - - - - - - - - - - - - - - - -
- - - -
-

filter

-

Filter rows in stream using a number of different methods

-

Parameters

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
nameTypeDescription
fieldstring (field path)Field used to match rows
valuestring (template string)Template string to match against
matchstringString to match against
checkstringHow to check value, ’exists’ or ‘hasValue’
methodstringMethod name
pythonstringPython code string
gpythonstringPython code string run using (https://github.com/go-python/gpython)
-

Example

-

Field based match

-
    - filter:
-        field: table
-        match: source_statistics
-

Check based match

-
    - filter:
-        field: uniprot
-        check: hasValue
-
-
- -
- - diff --git a/docs/docs/transforms/flatmap.md b/docs/docs/transforms/flatmap.md new file mode 100644 index 0000000..e4a096a --- /dev/null +++ b/docs/docs/transforms/flatmap.md @@ -0,0 +1,46 @@ +--- +title: flatMap +menu: + main: + parent: transforms + weight: 15 +--- + +# flatMap + +Flatten an array field into separate messages, each containing a single element of the array. + +## Parameters + +| Parameter | Type | Description | +|-----------|--------|------------| +| `field` | string | Path to the array field to be flattened (e.g., `{{row.samples}}`). | +| `dest` | string | Optional name of the field to store the flattened element (defaults to the same field name). | +| `keep` | bool | If `true`, keep the original array alongside the flattened messages. | + +## Example + +```yaml +- flatMap: + field: "{{row.samples}}" + dest: sample +``` + +Given an input message: + +```json +{ "id": "P001", "samples": ["S1", "S2", "S3"] } +``` + +The step emits three messages: + +```json +{ "id": "P001", "sample": "S1" } +{ "id": "P001", "sample": "S2" } +{ "id": "P001", "sample": "S3" } +``` + +## See also + +- [filter](filter.md) – conditionally emit messages. +- [map](map.md) – apply a function to each flattened message. diff --git a/docs/docs/transforms/flatmap/index.html b/docs/docs/transforms/flatmap/index.html deleted file mode 100644 index 9e38b55..0000000 --- a/docs/docs/transforms/flatmap/index.html +++ /dev/null @@ -1,379 +0,0 @@ - - - - - - - - - - - flatMap · Sifter - - - - - - - - - - - - - - - - - - - - - - - -
- - - -
- -
- -
- - diff --git a/docs/docs/transforms/from.md b/docs/docs/transforms/from.md new file mode 100644 index 0000000..da38940 --- /dev/null +++ b/docs/docs/transforms/from.md @@ -0,0 +1,25 @@ +--- +title: from +menu: + main: + parent: transforms + weight: 100 +--- + +# from + +Start a pipeline from a named input or another pipeline. + +## Parameters + +| Name | Type | Description | +| --- | --- | --- | +| source | string | Name of the input or pipeline to start from | + +## Example + +```yaml +pipelines: + profileProcess: + - from: profileReader +``` \ No newline at end of file diff --git a/docs/docs/transforms/from/index.html b/docs/docs/transforms/from/index.html deleted file mode 100644 index bad5bb8..0000000 --- a/docs/docs/transforms/from/index.html +++ /dev/null @@ -1,393 +0,0 @@ - - - - - - - - - - - from · Sifter - - - - - - - - - - - - - - - - - - - - - - - -
- - - -
-

from

-

Parmeters

-

Name of data source

-

Example

-

-
-inputs:
-  profileReader:
-    tableLoad:
-      input: "{{config.profiles}}"
-
-pipelines:
-  profileProcess:
-    - from: profileReader
-
-
- -
- - diff --git a/docs/docs/transforms/graphbuild/index.html b/docs/docs/transforms/graphbuild/index.html deleted file mode 100644 index 0f02b57..0000000 --- a/docs/docs/transforms/graphbuild/index.html +++ /dev/null @@ -1,385 +0,0 @@ - - - - - - - - - - - graphBuild · Sifter - - - - - - - - - - - - - - - - - - - - - - - -
- - - -
-

graphBuild

-

Build graph elements from JSON objects using the JSON Schema graph extensions.

-

example

-
      - graphBuild:
-          schema: "{{config.allelesSchema}}"
-          title: Allele
-
-
- -
- - diff --git a/website/content/docs/transforms/hash.md b/docs/docs/transforms/hash.md similarity index 100% rename from website/content/docs/transforms/hash.md rename to docs/docs/transforms/hash.md diff --git a/docs/docs/transforms/hash/index.html b/docs/docs/transforms/hash/index.html deleted file mode 100644 index 546781c..0000000 --- a/docs/docs/transforms/hash/index.html +++ /dev/null @@ -1,412 +0,0 @@ - - - - - - - - - - - hash · Sifter - - - - - - - - - - - - - - - - - - - - - - - -
- - - -
-

hash

-

Parameters

- - - - - - - - - - - - - - - - - - - - - - - - - -
nameTypeDescription
fieldstringField to store hash value
valuestringTemplated string of value to be hashed
methodstringHashing method: sha1/sha256/md5
-

example

-
   - hash:
-      value: "{{row.contents}}"
-      field: contents-sha1
-      method: sha1
-
-
- -
- - diff --git a/docs/docs/transforms/index.html b/docs/docs/transforms/index.html deleted file mode 100644 index a514d38..0000000 --- a/docs/docs/transforms/index.html +++ /dev/null @@ -1,380 +0,0 @@ - - - - - - - - - - - Pipeline Steps · Sifter - - - - - - - - - - - - - - - - - - - - - - - -
- - - -
-

Transforms alter the data

- -
- -
- - diff --git a/website/content/docs/transforms/lookup.md b/docs/docs/transforms/lookup.md similarity index 94% rename from website/content/docs/transforms/lookup.md rename to docs/docs/transforms/lookup.md index c96b8e6..7395665 100644 --- a/website/content/docs/transforms/lookup.md +++ b/docs/docs/transforms/lookup.md @@ -25,12 +25,12 @@ Using key from current row, get values from a reference source ### JSON file based lookup -The JSON file defined by `config.doseResponseFile` is opened and loaded into memory, using the `experiment_id` field as a primary key. +The JSON file defined by `params.doseResponseFile` is opened and loaded into memory, using the `experiment_id` field as a primary key. ```yaml - lookup: json: - input: "{{config.doseResponseFile}}" + input: "{{params.doseResponseFile}}" key: experiment_id lookup: "{{row.experiment_id}}" copy: diff --git a/docs/docs/transforms/lookup/index.html b/docs/docs/transforms/lookup/index.html deleted file mode 100644 index 77e6db6..0000000 --- a/docs/docs/transforms/lookup/index.html +++ /dev/null @@ -1,467 +0,0 @@ - - - - - - - - - - - lookup · Sifter - - - - - - - - - - - - - - - - - - - - - - - -
- - - -
-

lookup

-

Using key from current row, get values from a reference source

-

Parameters

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
nameTypeDescription
replacestring (field path)Field to replace
lookupstring (template string)Key to use for looking up data
copymap[string]stringCopy values from record that was found by lookup. The Key/Value record uses the Key as the destination field and copies the field from the retrieved records using the field named in Value
tsvTSVTableTSV translation table file
jsonJSONTableJSON data file
tableLookupTableInline lookup table
pipelinePipelineLookupUse output of a pipeline as a lookup table
-

Example

-

JSON file based lookup

-

The JSON file defined by config.doseResponseFile is opened and loaded into memory, using the experiment_id field as a primary key.

-
    - lookup:
-        json:
-          input: "{{config.doseResponseFile}}"
-          key: experiment_id
-        lookup: "{{row.experiment_id}}"
-        copy:
-          curve: curve
-

Pipeline output lookup

-

Prepare a table in the pipelines tableGen. Then in recordProcess use that table, indexed by the field primary_key and lookup the value {{row.table_id}} to copy in the contents of the other_data field from the table and add it to the row as my_data.

-

-pipelines:
-
-  tableGen:
-    - from: dataFile
-    #some set of transforms to prepair data
-    #records look like { "primary_key" : "bob", "other_data": "red" }
-
-  recordProcess:
-    - from: recordFile
-    - lookup:
-        pipeline:
-          from: tableGen
-          key: primary_key
-        lookup: "{{row.table_id}}"
-        copy:
-          my_data: other_data
-

Example data:

-

tableGen

-
{ "primary_key" : "bob", "other_data": "red" }
-{ "primary_key" : "alice", "other_data": "blue" }
-

recordProcess input

-
{"id" : "record_1", "table_id":"alice" }
-{"id" : "record_2", "table_id":"bob" }
-

recordProcess output

-
{"id" : "record_1", "table_id":"alice", "my_data" : "blue" }
-{"id" : "record_2", "table_id":"bob", "my_data" : "red" }
-
-
- -
- - diff --git a/website/content/docs/transforms/map.md b/docs/docs/transforms/map.md similarity index 100% rename from website/content/docs/transforms/map.md rename to docs/docs/transforms/map.md diff --git a/docs/docs/transforms/map/index.html b/docs/docs/transforms/map/index.html deleted file mode 100644 index b93585f..0000000 --- a/docs/docs/transforms/map/index.html +++ /dev/null @@ -1,421 +0,0 @@ - - - - - - - - - - - map · Sifter - - - - - - - - - - - - - - - - - - - - - - - -
- - - -
-

map

-

Run function on every row

-

Parameters

- - - - - - - - - - - - - - - - - - - - - -
nameDescription
methodName of function to call
pythonPython code to be run
gpythonPython code to be run using GPython
-

Example

-
    - map:
-        method: response
-        gpython: |
-          def response(x):
-            s = sorted(x["curve"].items(), key=lambda x:float(x[0]))
-            x['dose_um'] = []
-            x['response'] = []
-            for d, r in s:
-              try:
-                dn = float(d)
-                rn = float(r)
-                x['dose_um'].append(dn)
-                x['response'].append(rn)
-              except ValueError:
-                pass
-            return x          
-
-
- -
- - diff --git a/website/content/docs/transforms/objectValidate.md b/docs/docs/transforms/objectValidate.md similarity index 91% rename from website/content/docs/transforms/objectValidate.md rename to docs/docs/transforms/objectValidate.md index 331c2d4..0210ed9 100644 --- a/website/content/docs/transforms/objectValidate.md +++ b/docs/docs/transforms/objectValidate.md @@ -22,5 +22,5 @@ Use JSON schema to validate row contents ``` - objectValidate: title: Aliquot - schema: "{{config.schema}}" + schema: "{{params.schema}}" ``` \ No newline at end of file diff --git a/docs/docs/transforms/objectvalidate/index.html b/docs/docs/transforms/objectvalidate/index.html deleted file mode 100644 index e2b6b43..0000000 --- a/docs/docs/transforms/objectvalidate/index.html +++ /dev/null @@ -1,407 +0,0 @@ - - - - - - - - - - - objectValidate · Sifter - - - - - - - - - - - - - - - - - - - - - - - -
- - - -
-

objectValidate

-

Use JSON schema to validate row contents

-

parameters

- - - - - - - - - - - - - - - - - - - - -
nameTypeDescription
titlestringTitle of object to use for validation
schemastringPath to JSON schema definition
-

example

-
    - objectValidate:
-        title: Aliquot
-        schema: "{{config.schema}}"
-
-
- -
- - diff --git a/website/content/docs/transforms/plugin.md b/docs/docs/transforms/plugin.md similarity index 100% rename from website/content/docs/transforms/plugin.md rename to docs/docs/transforms/plugin.md diff --git a/docs/docs/transforms/plugin/index.html b/docs/docs/transforms/plugin/index.html deleted file mode 100644 index d286eb5..0000000 --- a/docs/docs/transforms/plugin/index.html +++ /dev/null @@ -1,425 +0,0 @@ - - - - - - - - - - - transform plugin · Sifter - - - - - - - - - - - - - - - - - - - - - - - -
- - - -
-

plugin

-

Invoke external program for data processing

-

Parameters

- - - - - - - - - - - - - -
nameDescription
commandLineCommand line program to be called
-

The command line can be written in any language. Sifter and the -plugin communicate via NDJSON. Sifter streams the input to the program via -STDIN and the plugin returns results via STDOUT. Any loggin or additional -data must be sent to STDERR, or it will interupt the stream of messages. -The command line code is executed using the base directory of the -sifter file as the working directory.

-

Example

-
    - plugin:
-        commandLine: "../../util/calc_fingerprint.py"
-

In this case, the plugin code is

-
#!/usr/bin/env python
-
-import sys
-import json
-from rdkit import Chem
-from rdkit.Chem import AllChem
-
-for line in sys.stdin:
-    row = json.loads(line)
-    if "canonical_smiles" in row:
-        smiles = row["canonical_smiles"]
-        m = Chem.MolFromSmiles(smiles)
-        try:
-            fp = AllChem.GetMorganFingerprintAsBitVect(m, radius=2)
-            fingerprint = list(fp)
-            row["morgan_fingerprint_2"] = fingerprint
-        except:
-            pass
-    print(json.dumps(row))
-
-
- -
- - diff --git a/website/content/docs/transforms/project.md b/docs/docs/transforms/project.md similarity index 100% rename from website/content/docs/transforms/project.md rename to docs/docs/transforms/project.md diff --git a/docs/docs/transforms/project/index.html b/docs/docs/transforms/project/index.html deleted file mode 100644 index b87ac10..0000000 --- a/docs/docs/transforms/project/index.html +++ /dev/null @@ -1,408 +0,0 @@ - - - - - - - - - - - project · Sifter - - - - - - - - - - - - - - - - - - - - - - - -
- - - -
-

project

-

Populate row with templated values

-

parameters

- - - - - - - - - - - - - - - - - - - - -
nameTypeDescription
mappingmap[string]anyNew fields to be generated from template
renamemap[string]stringRename field (no template engine)
-

Example

-
    - project:
-        mapping:
-          type: sample
-          id: "{{row.sample_id}}"
-
-
- -
- - diff --git a/website/content/docs/transforms/reduce.md b/docs/docs/transforms/reduce.md similarity index 100% rename from website/content/docs/transforms/reduce.md rename to docs/docs/transforms/reduce.md diff --git a/docs/docs/transforms/reduce/index.html b/docs/docs/transforms/reduce/index.html deleted file mode 100644 index 5f9a11a..0000000 --- a/docs/docs/transforms/reduce/index.html +++ /dev/null @@ -1,428 +0,0 @@ - - - - - - - - - - - reduce · Sifter - - - - - - - - - - - - - - - - - - - - - - - -
- - - -
-

reduce

-

Using key from rows, reduce matched records into a single entry

-

Parameters

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
nameTypeDescription
fieldstring (field path)Field used to match rows
methodstringMethod name
pythonstringPython code string
gpythonstringPython code string run using (https://github.com/go-python/gpython)
initmap[string]anyData to use for first reduce
-

Example

-
    - reduce:
-        field: dataset_name
-        method: merge
-        init: { "compounds" : [] }
-        gpython: |
-
-          def merge(x,y):
-            x["compounds"] = list(set(y["compounds"]+x["compounds"]))
-            return x
-
-
- -
- - diff --git a/website/content/docs/transforms/regexReplace.md b/docs/docs/transforms/regexReplace.md similarity index 100% rename from website/content/docs/transforms/regexReplace.md rename to docs/docs/transforms/regexReplace.md diff --git a/docs/docs/transforms/regexreplace/index.html b/docs/docs/transforms/regexreplace/index.html deleted file mode 100644 index 1d20965..0000000 --- a/docs/docs/transforms/regexreplace/index.html +++ /dev/null @@ -1,379 +0,0 @@ - - - - - - - - - - - regexReplace · Sifter - - - - - - - - - - - - - - - - - - - - - - - -
- - - -
- -
- -
- - diff --git a/website/content/docs/transforms/split.md b/docs/docs/transforms/split.md similarity index 100% rename from website/content/docs/transforms/split.md rename to docs/docs/transforms/split.md diff --git a/docs/docs/transforms/split/index.html b/docs/docs/transforms/split/index.html deleted file mode 100644 index 597538a..0000000 --- a/docs/docs/transforms/split/index.html +++ /dev/null @@ -1,407 +0,0 @@ - - - - - - - - - - - split · Sifter - - - - - - - - - - - - - - - - - - - - - - - -
- - - -
-

split

-

Split a field using string sep

-

Parameters

- - - - - - - - - - - - - - - - - - - - -
nameTypeDescription
fieldstringField to the split
sepstringString to use for splitting
-

Example

-
    - split:
-        field: methods
-        sep: ";"
-
-
- -
- - diff --git a/docs/docs/transforms/tablewrite/index.html b/docs/docs/transforms/tablewrite/index.html deleted file mode 100644 index 6158a1a..0000000 --- a/docs/docs/transforms/tablewrite/index.html +++ /dev/null @@ -1,379 +0,0 @@ - - - - - - - - - - - tableWrite · Sifter - - - - - - - - - - - - - - - - - - - - - - - -
- - - -
- -
- -
- - diff --git a/docs/docs/transforms/uuid.md b/docs/docs/transforms/uuid.md new file mode 100644 index 0000000..d47dee1 --- /dev/null +++ b/docs/docs/transforms/uuid.md @@ -0,0 +1,27 @@ +--- +title: uuid +menu: + main: + parent: transforms + weight: 100 +--- + +# uuid + +Generate a UUID for a field. + +## Parameters + +| Name | Type | Description | +| --- | --- | --- | +| field | string | Destination field name for the UUID | +| value | string | Seed value used to generate the UUID | +| namespace | string | UUID namespace (optional) | + +## Example + +```yaml + - uuid: + field: id + value: "{{row.name}}" +``` \ No newline at end of file diff --git a/docs/docs/transforms/uuid/index.html b/docs/docs/transforms/uuid/index.html deleted file mode 100644 index 28de436..0000000 --- a/docs/docs/transforms/uuid/index.html +++ /dev/null @@ -1,379 +0,0 @@ - - - - - - - - - - - uuid · Sifter - - - - - - - - - - - - - - - - - - - - - - - -
- - - -
- -
- -
- - diff --git a/docs/index.html b/docs/index.html deleted file mode 100644 index 120ebf1..0000000 --- a/docs/index.html +++ /dev/null @@ -1,71 +0,0 @@ - - - - - - - - - - - - - Sifter - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-
- - -
-
-

SIFTER

-

Sifter is a Extract Tranform Load (ETL) engine. It can be used to -Extract from a number of different data resources, including TSV files, SQLDump -files and external databases. It includes a pipeline description language to -define a set of Transform steps to create object messages that can be -validated using a JSON schema data.

-

Example of sifter code

- -
-
-
- - - - diff --git a/website/content/_index.md b/docs/index.md similarity index 88% rename from website/content/_index.md rename to docs/index.md index 5c37e23..6ad7988 100644 --- a/website/content/_index.md +++ b/docs/index.md @@ -7,5 +7,3 @@ files and external databases. It includes a pipeline description language to define a set of Transform steps to create object messages that can be validated using a JSON schema data. - -![Example of sifter code](sifter_example.png) \ No newline at end of file diff --git a/docs/index.xml b/docs/index.xml deleted file mode 100644 index d3388c1..0000000 --- a/docs/index.xml +++ /dev/null @@ -1,263 +0,0 @@ - - - - Sifter - https://bmeg.github.io/sifter/ - Recent content on Sifter - Hugo -- gohugo.io - en-us - - - accumulate - https://bmeg.github.io/sifter/docs/transforms/accumulate/ - Mon, 01 Jan 0001 00:00:00 +0000 - https://bmeg.github.io/sifter/docs/transforms/accumulate/ - accumulate Gather sequential rows into a single record, based on matching a field Parameters name Type Description field string (field path) Field used to match rows dest string field to store accumulated records Example - accumulate: field: model_id dest: rows - - - avroLoad - https://bmeg.github.io/sifter/docs/inputs/avroload/ - Mon, 01 Jan 0001 00:00:00 +0000 - https://bmeg.github.io/sifter/docs/inputs/avroload/ - avroLoad Load an AvroFile Parameters name Description input Path to input file - - - clean - https://bmeg.github.io/sifter/docs/transforms/clean/ - Mon, 01 Jan 0001 00:00:00 +0000 - https://bmeg.github.io/sifter/docs/transforms/clean/ - clean Remove fields that don&rsquo;t appear in the desingated list. Parameters name Type Description fields [] string Fields to keep removeEmpty bool Fields with empty values will also be removed storeExtra string Field name to store removed fields Example - clean: fields: - id - synonyms - - - debug - https://bmeg.github.io/sifter/docs/transforms/debug/ - Mon, 01 Jan 0001 00:00:00 +0000 - https://bmeg.github.io/sifter/docs/transforms/debug/ - debug Print out copy of stream to logging Parameters name Type Description label string Label for log output format bool Use multiline spaced output Example - debug: {} - - - distinct - https://bmeg.github.io/sifter/docs/transforms/distinct/ - Mon, 01 Jan 0001 00:00:00 +0000 - https://bmeg.github.io/sifter/docs/transforms/distinct/ - distinct Using templated value, allow only the first record for each distinct key Parameters name Type Description value string Key used for distinct value Example - distinct: value: &#34;{{row.key}}&#34; - - - embedded - https://bmeg.github.io/sifter/docs/inputs/embedded/ - Mon, 01 Jan 0001 00:00:00 +0000 - https://bmeg.github.io/sifter/docs/inputs/embedded/ - embedded Load data from embedded structure Example inputs: data: embedded: - { &#34;name&#34; : &#34;Alice&#34;, &#34;age&#34;: 28 } - { &#34;name&#34; : &#34;Bob&#34;, &#34;age&#34;: 27 } - - - emit - https://bmeg.github.io/sifter/docs/transforms/emit/ - Mon, 01 Jan 0001 00:00:00 +0000 - https://bmeg.github.io/sifter/docs/transforms/emit/ - emit Send data to output file. The naming of the file is outdir/script name.pipeline name.emit name.json.gz Parameters name Type Description name string Name of emit value example - emit: name: protein_compound_association - - - Example - https://bmeg.github.io/sifter/docs/example/ - Mon, 01 Jan 0001 00:00:00 +0000 - https://bmeg.github.io/sifter/docs/example/ - Example Pipeline Our first task will be to convert a ZIP code TSV into a set of county level entries. The input file looks like: ZIP,COUNTYNAME,STATE,STCOUNTYFP,CLASSFP 36003,Autauga County,AL,01001,H1 36006,Autauga County,AL,01001,H1 36067,Autauga County,AL,01001,H1 36066,Autauga County,AL,01001,H1 36703,Autauga County,AL,01001,H1 36701,Autauga County,AL,01001,H1 36091,Autauga County,AL,01001,H1 First is the header of the pipeline. This declares the unique name of the pipeline and it&rsquo;s output directory. name: zipcode_map outdir: ./ docs: Converts zipcode TSV into graph elements Next the configuration is declared. - - - fieldParse - https://bmeg.github.io/sifter/docs/transforms/fieldparse/ - Mon, 01 Jan 0001 00:00:00 +0000 - https://bmeg.github.io/sifter/docs/transforms/fieldparse/ - - - - fieldProcess - https://bmeg.github.io/sifter/docs/transforms/fieldprocess/ - Mon, 01 Jan 0001 00:00:00 +0000 - https://bmeg.github.io/sifter/docs/transforms/fieldprocess/ - fieldProcess Create stream of objects based on the contents of a field. If the selected field is an array each of the items in the array will become an independent row. Parameters name Type Description field string Name of field to be processed mapping map[string]string Project templated values into child element itemField string If processing an array of non-dict elements, create a dict as {itemField:element} example - fieldProcess: field: portions mapping: sample: &#34;{{row. - - - fieldType - https://bmeg.github.io/sifter/docs/transforms/fieldtype/ - Mon, 01 Jan 0001 00:00:00 +0000 - https://bmeg.github.io/sifter/docs/transforms/fieldtype/ - fieldType Set field to specific type, ie cast as float or integer example - fieldType: t_depth: int t_ref_count: int t_alt_count: int n_depth: int n_ref_count: int n_alt_count: int start: int - - - filter - https://bmeg.github.io/sifter/docs/transforms/filter/ - Mon, 01 Jan 0001 00:00:00 +0000 - https://bmeg.github.io/sifter/docs/transforms/filter/ - filter Filter rows in stream using a number of different methods Parameters name Type Description field string (field path) Field used to match rows value string (template string) Template string to match against match string String to match against check string How to check value, &rsquo;exists&rsquo; or &lsquo;hasValue&rsquo; method string Method name python string Python code string gpython string Python code string run using (https://github.com/go-python/gpython) Example Field based match - filter: field: table match: source_statistics Check based match - - - flatMap - https://bmeg.github.io/sifter/docs/transforms/flatmap/ - Mon, 01 Jan 0001 00:00:00 +0000 - https://bmeg.github.io/sifter/docs/transforms/flatmap/ - - - - from - https://bmeg.github.io/sifter/docs/transforms/from/ - Mon, 01 Jan 0001 00:00:00 +0000 - https://bmeg.github.io/sifter/docs/transforms/from/ - from Parmeters Name of data source Example inputs: profileReader: tableLoad: input: &#34;{{config.profiles}}&#34; pipelines: profileProcess: - from: profileReader - - - glob - https://bmeg.github.io/sifter/docs/inputs/glob/ - Mon, 01 Jan 0001 00:00:00 +0000 - https://bmeg.github.io/sifter/docs/inputs/glob/ - glob Scan files using * based glob statement and open all files as input. Parameters Name Description storeFilename Store value of filename in parameter each row input Path of avro object file to transform xmlLoad xmlLoad configutation tableLoad Run transform pipeline on a TSV or CSV jsonLoad Run a transform pipeline on a multi line json file avroLoad Load data from avro file Example inputs: pubmedRead: glob: input: &#34;{{config.baseline}}/*.xml.gz&#34; xmlLoad: {} - - - graphBuild - https://bmeg.github.io/sifter/docs/transforms/graphbuild/ - Mon, 01 Jan 0001 00:00:00 +0000 - https://bmeg.github.io/sifter/docs/transforms/graphbuild/ - graphBuild Build graph elements from JSON objects using the JSON Schema graph extensions. example - graphBuild: schema: &#34;{{config.allelesSchema}}&#34; title: Allele - - - hash - https://bmeg.github.io/sifter/docs/transforms/hash/ - Mon, 01 Jan 0001 00:00:00 +0000 - https://bmeg.github.io/sifter/docs/transforms/hash/ - hash Parameters name Type Description field string Field to store hash value value string Templated string of value to be hashed method string Hashing method: sha1/sha256/md5 example - hash: value: &#34;{{row.contents}}&#34; field: contents-sha1 method: sha1 - - - input plugin - https://bmeg.github.io/sifter/docs/inputs/plugin/ - Mon, 01 Jan 0001 00:00:00 +0000 - https://bmeg.github.io/sifter/docs/inputs/plugin/ - plugin Run user program for customized data extraction. Example inputs: oboData: plugin: commandLine: ../../util/obo_reader.py {{config.oboFile}} The plugin program is expected to output JSON messages, one per line, to STDOUT that will then be passed to the transform pipelines. Example Plugin The obo_reader.py plugin, it reads a OBO file, such as the kind the describe the GeneOntology, and emits the records as single line JSON messages. #!/usr/bin/env python import re import sys import json re_section = re. - - - Inputs - https://bmeg.github.io/sifter/docs/inputs/ - Mon, 01 Jan 0001 00:00:00 +0000 - https://bmeg.github.io/sifter/docs/inputs/ - Every playbook consists of a series of inputs. - - - jsonLoad - https://bmeg.github.io/sifter/docs/inputs/jsonload/ - Mon, 01 Jan 0001 00:00:00 +0000 - https://bmeg.github.io/sifter/docs/inputs/jsonload/ - jsonLoad Load data from a JSON file. Default behavior expects a single dictionary per line. Each line is a seperate entry. The multiline parameter reads all of the lines of the files and returns a single object. Parameters name Description input Path of JSON file to transform multiline Load file as a single multiline JSON object Example inputs: caseData: jsonLoad: input: &#34;{{config.casesJSON}}&#34; - - - lookup - https://bmeg.github.io/sifter/docs/transforms/lookup/ - Mon, 01 Jan 0001 00:00:00 +0000 - https://bmeg.github.io/sifter/docs/transforms/lookup/ - lookup Using key from current row, get values from a reference source Parameters name Type Description replace string (field path) Field to replace lookup string (template string) Key to use for looking up data copy map[string]string Copy values from record that was found by lookup. The Key/Value record uses the Key as the destination field and copies the field from the retrieved records using the field named in Value tsv TSVTable TSV translation table file json JSONTable JSON data file table LookupTable Inline lookup table pipeline PipelineLookup Use output of a pipeline as a lookup table Example JSON file based lookup The JSON file defined by config. - - - map - https://bmeg.github.io/sifter/docs/transforms/map/ - Mon, 01 Jan 0001 00:00:00 +0000 - https://bmeg.github.io/sifter/docs/transforms/map/ - map Run function on every row Parameters name Description method Name of function to call python Python code to be run gpython Python code to be run using GPython Example - map: method: response gpython: | def response(x): s = sorted(x[&#34;curve&#34;].items(), key=lambda x:float(x[0])) x[&#39;dose_um&#39;] = [] x[&#39;response&#39;] = [] for d, r in s: try: dn = float(d) rn = float(r) x[&#39;dose_um&#39;].append(dn) x[&#39;response&#39;].append(rn) except ValueError: pass return x - - - objectValidate - https://bmeg.github.io/sifter/docs/transforms/objectvalidate/ - Mon, 01 Jan 0001 00:00:00 +0000 - https://bmeg.github.io/sifter/docs/transforms/objectvalidate/ - objectValidate Use JSON schema to validate row contents parameters name Type Description title string Title of object to use for validation schema string Path to JSON schema definition example - objectValidate: title: Aliquot schema: &#34;{{config.schema}}&#34; - - - Overview - https://bmeg.github.io/sifter/docs/ - Mon, 01 Jan 0001 00:00:00 +0000 - https://bmeg.github.io/sifter/docs/ - Sifter pipelines Sifter pipelines process steams of nested JSON messages. Sifter comes with a number of file extractors that operate as inputs to these pipelines. The pipeline engine connects togeather arrays of transform steps into directed acylic graph that is processed in parallel. Example Message: { &#34;firstName&#34; : &#34;bob&#34;, &#34;age&#34; : &#34;25&#34; &#34;friends&#34; : [ &#34;Max&#34;, &#34;Alex&#34;] } Once a stream of messages are produced, that can be run through a transform pipeline. - - - Pipeline Steps - https://bmeg.github.io/sifter/docs/transforms/ - Mon, 01 Jan 0001 00:00:00 +0000 - https://bmeg.github.io/sifter/docs/transforms/ - Transforms alter the data - - - project - https://bmeg.github.io/sifter/docs/transforms/project/ - Mon, 01 Jan 0001 00:00:00 +0000 - https://bmeg.github.io/sifter/docs/transforms/project/ - project Populate row with templated values parameters name Type Description mapping map[string]any New fields to be generated from template rename map[string]string Rename field (no template engine) Example - project: mapping: type: sample id: &#34;{{row.sample_id}}&#34; - - - reduce - https://bmeg.github.io/sifter/docs/transforms/reduce/ - Mon, 01 Jan 0001 00:00:00 +0000 - https://bmeg.github.io/sifter/docs/transforms/reduce/ - reduce Using key from rows, reduce matched records into a single entry Parameters name Type Description field string (field path) Field used to match rows method string Method name python string Python code string gpython string Python code string run using (https://github.com/go-python/gpython) init map[string]any Data to use for first reduce Example - reduce: field: dataset_name method: merge init: { &#34;compounds&#34; : [] } gpython: | def merge(x,y): x[&#34;compounds&#34;] = list(set(y[&#34;compounds&#34;]+x[&#34;compounds&#34;])) return x - - - regexReplace - https://bmeg.github.io/sifter/docs/transforms/regexreplace/ - Mon, 01 Jan 0001 00:00:00 +0000 - https://bmeg.github.io/sifter/docs/transforms/regexreplace/ - - - - split - https://bmeg.github.io/sifter/docs/transforms/split/ - Mon, 01 Jan 0001 00:00:00 +0000 - https://bmeg.github.io/sifter/docs/transforms/split/ - split Split a field using string sep Parameters name Type Description field string Field to the split sep string String to use for splitting Example - split: field: methods sep: &#34;;&#34; - - - sqldump - https://bmeg.github.io/sifter/docs/inputs/sqldump/ - Mon, 01 Jan 0001 00:00:00 +0000 - https://bmeg.github.io/sifter/docs/inputs/sqldump/ - sqlDump Scan file produced produced from sqldump. Parameters Name Type Description input string Path to the SQL dump file tables []string Names of tables to read out Example inputs: database: sqldumpLoad: input: &#34;{{config.sql}}&#34; tables: - cells - cell_tissues - dose_responses - drugs - drug_annots - experiments - profiles - - - sqliteLoad - https://bmeg.github.io/sifter/docs/inputs/sqliteload/ - Mon, 01 Jan 0001 00:00:00 +0000 - https://bmeg.github.io/sifter/docs/inputs/sqliteload/ - sqliteLoad Extract data from an sqlite file Parameters Name Type Description input string Path to the SQLite file query string SQL select statement based input Example inputs: sqlQuery: sqliteLoad: input: &#34;{{config.sqlite}}&#34; query: &#34;select * from drug_mechanism as a LEFT JOIN MECHANISM_REFS as b on a.MEC_ID=b.MEC_ID LEFT JOIN TARGET_COMPONENTS as c on a.TID=c.TID LEFT JOIN COMPONENT_SEQUENCES as d on c.COMPONENT_ID=d.COMPONENT_ID LEFT JOIN MOLECULE_DICTIONARY as e on a.MOLREGNO=e.MOLREGNO&#34; - - - tableLoad - https://bmeg.github.io/sifter/docs/inputs/tableload/ - Mon, 01 Jan 0001 00:00:00 +0000 - https://bmeg.github.io/sifter/docs/inputs/tableload/ - tableLoad Extract data from tabular file, includiong TSV and CSV files. Parameters Name Type Description input string File to be transformed rowSkip int Number of header rows to skip columns []string Manually set names of columns extraColumns string Columns beyond originally declared columns will be placed in this array sep string Separator \t for TSVs or , for CSVs Example config: gafFile: ../../source/go/goa_human.gaf.gz inputs: gafLoad: tableLoad: input: &#34;{{config.gafFile}}&#34; columns: - db - id - symbol - qualifier - goID - reference - evidenceCode - from - aspect - name - synonym - objectType - taxon - date - assignedBy - extension - geneProduct - - - tableWrite - https://bmeg.github.io/sifter/docs/transforms/tablewrite/ - Mon, 01 Jan 0001 00:00:00 +0000 - https://bmeg.github.io/sifter/docs/transforms/tablewrite/ - - - - transform plugin - https://bmeg.github.io/sifter/docs/transforms/plugin/ - Mon, 01 Jan 0001 00:00:00 +0000 - https://bmeg.github.io/sifter/docs/transforms/plugin/ - plugin Invoke external program for data processing Parameters name Description commandLine Command line program to be called The command line can be written in any language. Sifter and the plugin communicate via NDJSON. Sifter streams the input to the program via STDIN and the plugin returns results via STDOUT. Any loggin or additional data must be sent to STDERR, or it will interupt the stream of messages. The command line code is executed using the base directory of the sifter file as the working directory. - - - uuid - https://bmeg.github.io/sifter/docs/transforms/uuid/ - Mon, 01 Jan 0001 00:00:00 +0000 - https://bmeg.github.io/sifter/docs/transforms/uuid/ - - - - xmlLoad - https://bmeg.github.io/sifter/docs/inputs/xmlload/ - Mon, 01 Jan 0001 00:00:00 +0000 - https://bmeg.github.io/sifter/docs/inputs/xmlload/ - xmlLoad Load an XML file Parameters name Description input Path to input file Example inputs: loader: xmlLoad: input: &#34;{{config.xmlPath}}&#34; - - - diff --git a/docs/sitemap.xml b/docs/sitemap.xml deleted file mode 100644 index 1295605..0000000 --- a/docs/sitemap.xml +++ /dev/null @@ -1,85 +0,0 @@ - - - - https://bmeg.github.io/sifter/ - - https://bmeg.github.io/sifter/docs/transforms/accumulate/ - - https://bmeg.github.io/sifter/docs/inputs/avroload/ - - https://bmeg.github.io/sifter/categories/ - - https://bmeg.github.io/sifter/docs/transforms/clean/ - - https://bmeg.github.io/sifter/docs/transforms/debug/ - - https://bmeg.github.io/sifter/docs/transforms/distinct/ - - https://bmeg.github.io/sifter/docs/ - - https://bmeg.github.io/sifter/docs/inputs/embedded/ - - https://bmeg.github.io/sifter/docs/transforms/emit/ - - https://bmeg.github.io/sifter/docs/example/ - - https://bmeg.github.io/sifter/docs/transforms/fieldparse/ - - https://bmeg.github.io/sifter/docs/transforms/fieldprocess/ - - https://bmeg.github.io/sifter/docs/transforms/fieldtype/ - - https://bmeg.github.io/sifter/docs/transforms/filter/ - - https://bmeg.github.io/sifter/docs/transforms/flatmap/ - - https://bmeg.github.io/sifter/docs/transforms/from/ - - https://bmeg.github.io/sifter/docs/inputs/glob/ - - https://bmeg.github.io/sifter/docs/transforms/graphbuild/ - - https://bmeg.github.io/sifter/docs/transforms/hash/ - - https://bmeg.github.io/sifter/docs/inputs/plugin/ - - https://bmeg.github.io/sifter/docs/inputs/ - - https://bmeg.github.io/sifter/docs/inputs/jsonload/ - - https://bmeg.github.io/sifter/docs/transforms/lookup/ - - https://bmeg.github.io/sifter/docs/transforms/map/ - - https://bmeg.github.io/sifter/docs/transforms/objectvalidate/ - - https://bmeg.github.io/sifter/docs/ - - https://bmeg.github.io/sifter/docs/transforms/ - - https://bmeg.github.io/sifter/docs/transforms/project/ - - https://bmeg.github.io/sifter/docs/transforms/reduce/ - - https://bmeg.github.io/sifter/docs/transforms/regexreplace/ - - https://bmeg.github.io/sifter/docs/transforms/split/ - - https://bmeg.github.io/sifter/docs/inputs/sqldump/ - - https://bmeg.github.io/sifter/docs/inputs/sqliteload/ - - https://bmeg.github.io/sifter/docs/inputs/tableload/ - - https://bmeg.github.io/sifter/docs/transforms/tablewrite/ - - https://bmeg.github.io/sifter/tags/ - - https://bmeg.github.io/sifter/docs/transforms/plugin/ - - https://bmeg.github.io/sifter/docs/transforms/uuid/ - - https://bmeg.github.io/sifter/docs/inputs/xmlload/ - - diff --git a/docs/tags/index.xml b/docs/tags/index.xml deleted file mode 100644 index 41241ef..0000000 --- a/docs/tags/index.xml +++ /dev/null @@ -1,11 +0,0 @@ - - - - Tags on Sifter - https://bmeg.github.io/sifter/tags/ - Recent content in Tags on Sifter - Hugo -- gohugo.io - en-us - - - diff --git a/evaluate/template.go b/evaluate/template.go index ba0f859..91f8fa7 100644 --- a/evaluate/template.go +++ b/evaluate/template.go @@ -20,8 +20,8 @@ func init() { }) } -func ExpressionString(expression string, config map[string]string, row map[string]interface{}) (string, error) { - d := map[string]interface{}{"config": config} +func ExpressionString(expression string, params map[string]string, row map[string]interface{}) (string, error) { + d := map[string]interface{}{"params": params} if row != nil { d["row"] = row } diff --git a/examples/cbio.yaml b/examples/cbio.yaml index 79e52f6..ea36b31 100644 --- a/examples/cbio.yaml +++ b/examples/cbio.yaml @@ -1,127 +1,144 @@ -class: PlayBook +class: sifter name: CBioPortal -inputs: - tar: - type: File - geneTable: - type: File - -schema: bmeg-dictionary/gdcdictionary/schemas +params: + tar: "" + geneTable: "" + schema: bmeg-dictionary/gdcdictionary/schemas -steps: - - desc: Untar - untar: - input: "{{inputs.tar}}" - - desc: Loading Patient List +inputs: + untar: + plugin: + commandLine: tar -xzf {{params.tar}} + + patientReader: tableLoad: input: data_clinical_patient.txt - transform: - - debug: {} - - project: - mapping: - id : "{{row.PATIENT_ID}}" - submitter_id : "{{row.PATIENT_ID}}" - type: "case" - experiments: - submitter_id: "TCGA" - - objectCreate: - class: case - - desc: Loading Sample List + + sampleReader: tableLoad: input: data_clinical_sample.txt - transform: - - fork: - transform: - - - - project: - mapping: - id : "{{row.SAMPLE_ID}}" - submitter_id : "{{row.SAMPLE_ID}}" - cases: - submitter_id: "{{row.PATIENT_ID}}" - type: "sample" - - objectCreate: - class: sample - - - - project: - mapping: - id : "{{row.SAMPLE_ID}}-0000" - submitter_id : "{{row.SAMPLE_ID}}-0000" - samples: - submitter_id: "{{row.SAMPLE_ID}}" - type: "aliquot" - - objectCreate: - class: aliquot - - fileGlob: - files: [ data_RNA_Seq_expression_median.txt, data_RNA_Seq_V2_expression_median.txt ] - limit: 1 - inputName: rnaFile - steps: - - desc: Transpose RNA file - transposeFile: - input: "{{inputs.rnaFile}}" - output: data_RNA_Seq_expression_median_transpose.txt - - - desc: Loading RNA File - tableLoad: - input: data_RNA_Seq_expression_median_transpose.txt - rowSkip: 1 - transform: - - project: - mapping: - id: "gexp:{{row.Entrez_Gene_Id}}" #after the transpose, the index column header is `Entrez_Gene_Id` - aliquot_id: "{{row.Entrez_Gene_Id}}-0000" - - tableProject: - input: "{{inputs.geneTable}}" - - map: - method: nodeMap - python: > - def nodeMap(x): - values = {} - for k, v in x.items(): - if k != "id" and k != "aliquot_id" and k != "Entrez_Gene_Id": - values[k] = v - return { - "id" : x["id"], - "aliquot_id" : x["aliquot_id"], - "metric" : "OTHER", - "values": values - } - - objectCreate: - class: gene_expression - - desc: Loading Mutations + + rnaReader: + transposeLoad: + input: data_RNA_Seq_expression_median.txt + rowSkip: 1 + + mutationReader: tableLoad: input: data_mutations_extended.txt - transform: - - alleleID: - dst: allele_id - prefix: "Allele:" - genome: GRCh37 - chromosome: "{{row.Chromosome}}" - start: "{{row.Start_Position}}" - end: "{{row.End_Position}}" - reference_bases: "{{row.Reference_Allele}}" - alternate_bases: "{{row.Tumor_Seq_Allele1}}" - - project: - mapping: - aliquot: "{{row.Tumor_Sample_Barcode}}-0000" - ref: "{{row.Reference_Allele}}" - alt: "{{row.Tumor_Seq_Allele1}}" - ensembl_transcript: "{{row.Transcript_ID}}" - - objectCreate: - class: somatic_variant - - project: - mapping: - genome: "{{row.NCBI_Build}}" - chromosome: "{{row.Chromosome}}" - start: "{{row.Start_Position}}" - end: "{{row.End_Position}}" - strand: "{{row.Strand}}" - reference_bases: "{{row.Reference_Allele}}" - alternate_bases: "{{row.Tumor_Seq_Allele1}}" - hugo_symbol: "{{row.Hugo_Symbol}}" - effect: "{{row.Variant_Classification}}" - - objectCreate: - class: allele + +pipelines: + cases: + - from: patientReader + - project: + mapping: + id: "{{row.PATIENT_ID}}" + submitter_id: "{{row.PATIENT_ID}}" + type: "case" + experiments: + submitter_id: "TCGA" + - objectValidate: + title: Case + schema: "{{params.schema}}" + - emit: + name: case + + samples: + - from: sampleReader + - project: + mapping: + id: "{{row.SAMPLE_ID}}" + submitter_id: "{{row.SAMPLE_ID}}" + cases: + submitter_id: "{{row.PATIENT_ID}}" + type: "sample" + - objectValidate: + title: Sample + schema: "{{params.schema}}" + - emit: + name: sample + + aliquots: + - from: sampleReader + - project: + mapping: + id: "{{row.SAMPLE_ID}}-0000" + submitter_id: "{{row.SAMPLE_ID}}-0000" + samples: + submitter_id: "{{row.SAMPLE_ID}}" + type: "aliquot" + - objectValidate: + title: Aliquot + schema: "{{params.schema}}" + - emit: + name: aliquot + + gene_expression: + - from: rnaReader + - project: + mapping: + id: "gexp:{{row.Entrez_Gene_Id}}" + aliquot_id: "{{row.Entrez_Gene_Id}}-0000" + - lookup: + tsv: + input: "{{params.geneTable}}" + lookup: "{{row.Entrez_Gene_Id}}" + - map: + method: nodeMap + python: | + def nodeMap(x): + values = {} + for k, v in x.items(): + if k not in ["id", "aliquot_id", "Entrez_Gene_Id"]: + values[k] = v + return { + "id": x["id"], + "aliquot_id": x["aliquot_id"], + "metric": "OTHER", + "values": values + } + - emit: + name: gene_expression + + mutations: + - from: mutationReader + - map: + method: alleleID + python: | + import hashlib + def alleleID(row): + s = "GRCh37" + row["Chromosome"] + str(row["Start_Position"]) + str(row["End_Position"]) + row["Reference_Allele"] + row["Tumor_Seq_Allele1"] + row["allele_id"] = "Allele:" + hashlib.sha1(s.encode()).hexdigest() + return row + - project: + mapping: + aliquot: "{{row.Tumor_Sample_Barcode}}-0000" + ref: "{{row.Reference_Allele}}" + alt: "{{row.Tumor_Seq_Allele1}}" + ensembl_transcript: "{{row.Transcript_ID}}" + - objectValidate: + title: SomaticVariant + schema: "{{params.schema}}" + - emit: + name: somatic_variant + + alleles: + - from: mutationReader + - project: + mapping: + genome: "{{row.NCBI_Build}}" + chromosome: "{{row.Chromosome}}" + start: "{{row.Start_Position}}" + end: "{{row.End_Position}}" + strand: "{{row.Strand}}" + reference_bases: "{{row.Reference_Allele}}" + alternate_bases: "{{row.Tumor_Seq_Allele1}}" + hugo_symbol: "{{row.Hugo_Symbol}}" + effect: "{{row.Variant_Classification}}" + - objectValidate: + title: Allele + schema: "{{params.schema}}" + - emit: + name: allele diff --git a/examples/gdc-convert.yaml b/examples/gdc-convert.yaml index 1488240..c5c16ee 100644 --- a/examples/gdc-convert.yaml +++ b/examples/gdc-convert.yaml @@ -1,85 +1,98 @@ -class: Playbook -name: GDCCpnvert +class: sifter +name: GDCConvert -inputs: {} +params: + schema: + type: path + default: bmeg-dictionary/gdcdictionary/schemas -schema: bmeg-dictionary/gdcdictionary/schemas - -steps: - - desc: Scrape GDC Projects - script: - dockerImage: bmeg/sifter-gdc-scan - command: [/opt/gdc-scan.py, projects] - - desc: Scrape GDC Cases - script: - dockerImage: bmeg/sifter-gdc-scan - command: [/opt/gdc-scan.py, cases] - - desc: Loading ProjectData +inputs: + projects_scrape: + plugin: + commandLine: docker run --rm bmeg/sifter-gdc-scan /opt/gdc-scan.py projects + cases_scrape: + plugin: + commandLine: docker run --rm bmeg/sifter-gdc-scan /opt/gdc-scan.py cases + projects_data: jsonLoad: input: out.projects.json - transform: - - fork: - transform: - - - - project: - mapping: - code: "{{row.project_id}}" - programs: "{{row.program.name}}" - - objectCreate: - class: project - - - - project: - mapping: - code: "{{row.project_id}}" - programs: "{{row.program.name}}" - submitter_id: "{{row.program.name}}" - projects: "{{row.project_id}}" - type: experiment - - objectCreate: - class: experiment - - desc: Loading CaseData + cases_data: jsonLoad: input: out.case.json - transform: - - project: - mapping: - studies: "{{row.project.project_id}}" - experiments: "exp:{{row.project.project_id}}" - type: case - - objectCreate: - class: case - - fieldProcess: - field: samples - mapping: - cases: "{{row.id}}" - steps: - #- debug: {} - - project: - mapping: - type: sample - id: "{{row.sample_id}}" - - objectCreate: - class: sample - - fieldProcess: - field: portions - mapping: - samples: "{{row.id}}" - steps: - - fieldProcess: - field: analytes - mapping: - samples: "{{row.samples}}" - steps: - - fieldProcess: - field: aliquots - mapping: - samples: "{{row.samples}}" - steps: - - project: - mapping: - type: aliquot - id: "{{row.aliquot_id}}" - - objectCreate: - class: aliquot + +pipelines: + projects: + - from: projects_data + - project: + mapping: + code: "{{row.project_id}}" + programs: "{{row.program.name}}" + - objectValidate: + title: project + schema: "{{params.schema}}" + - emit: + name: project + + experiments: + - from: projects_data + - project: + mapping: + code: "{{row.project_id}}" + programs: "{{row.program.name}}" + submitter_id: "{{row.program.name}}" + projects: "{{row.project_id}}" + type: experiment + - objectValidate: + title: experiment + schema: "{{params.schema}}" + - emit: + name: experiment + + cases: + - from: cases_data + - project: + mapping: + studies: "{{row.project.project_id}}" + experiments: "exp:{{row.project.project_id}}" + type: case + - objectValidate: + title: case + schema: "{{params.schema}}" + - emit: + name: case + + samples: + - from: cases_data + - fieldProcess: + field: samples + - project: + mapping: + type: sample + id: "{{row.sample_id}}" + - objectValidate: + title: sample + schema: "{{params.schema}}" + - emit: + name: sample + + aliquots: + - from: cases_data + - fieldProcess: + field: samples + - fieldProcess: + field: portions + - fieldProcess: + field: analytes + - fieldProcess: + field: aliquots + - project: + mapping: + type: aliquot + id: "{{row.aliquot_id}}" + - objectValidate: + title: aliquot + schema: "{{params.schema}}" + - emit: + name: aliquot diff --git a/examples/gene-table.yaml b/examples/gene-table.yaml index 973a2e5..5126308 100644 --- a/examples/gene-table.yaml +++ b/examples/gene-table.yaml @@ -1,19 +1,14 @@ -class: Playbook +class: sifter +name: gene-table - -desc: > - This takes a Gene TSV, filters rows, selects columns and outputs a 2 - column TSV into the working directory +params: + geneTSV: ftp://ftp.ncbi.nih.gov/gene/DATA/gene2ensembl.gz inputs: - geneTSV: - type: File - default: ftp://ftp.ncbi.nih.gov/gene/DATA/gene2ensembl.gz - -steps: - - tableLoad: - input: "{{inputs.geneTSV}}" + geneReader: + tableLoad: + input: "{{params.geneTSV}}" columns: - tax_id - GeneID @@ -22,13 +17,15 @@ steps: - Ensembl_rna_identifier - protein_accession.version - Ensembl_protein_identifier - transform: - - filter: - field: row.tax_id - match: "9606" - steps: - - tableWrite: - output: "gene.table" - columns: - - GeneID - - Ensembl_gene_identifier + +pipelines: + transform: + - from: geneReader + - filter: + field: tax_id + match: "9606" + - tableWrite: + output: gene.table + columns: + - GeneID + - Ensembl_gene_identifier diff --git a/examples/genome.yaml b/examples/genome.yaml index 564a7c7..510ee81 100644 --- a/examples/genome.yaml +++ b/examples/genome.yaml @@ -1,22 +1,15 @@ -class: Playbook +class: sifter name: RefGenome -inputs: - gtf: - type: File - default: ftp://ftp.ensembl.org/pub/grch37/release-96/gff3/homo_sapiens/Homo_sapiens.GRCh37.87.gff3.gz - - schema: - type: Directory - default: ./bmeg-dictionary/gdcdictionary/schemas +params: + gtfPath: ftp://ftp.ensembl.org/pub/grch37/release-96/gff3/homo_sapiens/Homo_sapiens.GRCh37.87.gff3.gz + schema: ./bmeg-dictionary/gdcdictionary/schemas -schema: "{{inputs.schema}}" - -steps: - - desc: GTF Seq +inputs: + gtfReader: tableLoad: - input: "{{inputs.gtf}}" + input: "{{params.gtfPath}}" columns: - seqid - source @@ -27,60 +20,73 @@ steps: - strand - phase - attributes - transform: - - fieldMap: - col: attributes - sep: ";" - - fieldType: - start: int - end: int - - filter: - field: row.type - match: exon - steps: - - regexReplace: - col: "{{row.attributes.Parent}}" - regex: "^transcript:" - replace: "" - dst: transcript_id - - project: - mapping: - exon_id : "{{row.attributes.exon_id}}" - - map: - method: mapList - python: > - def mapList(x): - x['transcript_id'] = [x['transcript_id']] - return x - - reduce: - field: "{{row.exon_id}}" - method: merge - python: > - def merge(x,y): - x['transcript_id'] = x['transcript_id'] + y['transcript_id'] - return x - - objectCreate: - class: exon - - filter: - field: row.type - match: gene - steps: - - project: - mapping: - gene_id : "{{row.attributes.gene_id}}" - - objectCreate: - class: gene - - filter: - field: row.type - match: mRNA - steps: - - regexReplace: - col: "{{row.attributes.Parent}}" - regex: "^gene:" - replace: "" - dst: gene_id - - project: - mapping: - transcript_id : "{{row.attributes.transcript_id}}" - - objectCreate: - class: transcript + +pipelines: + transform: + - from: gtfReader + - fieldParse: + field: attributes + sep: ";" + - fieldType: + start: integer + end: integer + + exons: + - from: transform + - filter: + field: type + match: exon + - regexReplace: + field: Parent + regex: "^transcript:" + replace: "" + dst: transcript_id + - project: + mapping: + exon_id: "{{row.exon_id}}" + transcript_id: ["{{row.transcript_id}}"] + - reduce: + field: exon_id + method: merge + python: | + def merge(x, y): + x['transcript_id'] = x['transcript_id'] + y['transcript_id'] + return x + - objectValidate: + title: exon + schema: "{{params.schema}}" + - emit: + name: exon + + genes: + - from: transform + - filter: + field: type + match: gene + - project: + mapping: + gene_id: "{{row.gene_id}}" + - objectValidate: + title: gene + schema: "{{params.schema}}" + - emit: + name: gene + + transcripts: + - from: transform + - filter: + field: type + match: mRNA + - regexReplace: + field: Parent + regex: "^gene:" + replace: "" + dst: gene_id + - project: + mapping: + transcript_id: "{{row.transcript_id}}" + - objectValidate: + title: transcript + schema: "{{params.schema}}" + - emit: + name: transcript diff --git a/examples/hugo-ensembl.yaml b/examples/hugo-ensembl.yaml index 8f75ae9..295f68d 100644 --- a/examples/hugo-ensembl.yaml +++ b/examples/hugo-ensembl.yaml @@ -1,29 +1,25 @@ -class: Playbook +class: sifter +name: hugo-ensembl - -desc: > - This takes a Gene TSV, filters rows, selects columns and outputs a 2 - column TSV into the working directory +params: + hugoJSON: ftp://ftp.ebi.ac.uk/pub/databases/genenames/hgnc/json/locus_types/gene_with_protein_product.json inputs: - hugoJSON: - type: File - default: ftp://ftp.ebi.ac.uk/pub/databases/genenames/hgnc/json/locus_types/gene_with_protein_product.json + hugoReader: + jsonLoad: + input: "{{params.hugoJSON}}" -steps: - - jsonLoad: - input: "{{inputs.hugoJSON}}" - transform: - - fieldProcess: - field: response.docs - steps: - - filter: - field: "ensembl_gene_id" - exists: True - steps: - - tableWrite: - output: "hugo-ensembl.table" - columns: - - symbol - - ensembl_gene_id +pipelines: + transform: + - from: hugoReader + - fieldProcess: + field: response.docs + - filter: + field: ensembl_gene_id + check: exists + - tableWrite: + output: hugo-ensembl.table + columns: + - symbol + - ensembl_gene_id diff --git a/examples/vcfload.yaml b/examples/vcfload.yaml deleted file mode 100644 index 3901cc2..0000000 --- a/examples/vcfload.yaml +++ /dev/null @@ -1,27 +0,0 @@ - -class: Playbook -name: VCFLoad - -inputs: - clinvar_vcf: - type: File - default: ftp://ftp.ncbi.nlm.nih.gov/pub/clinvar/vcf_GRCh37/clinvar_20190422.vcf.gz - dbsnp_vcf: - type: File - default: ftp://ftp.ncbi.nih.gov/snp/organisms/human_9606/VCF/ - -steps: - - desc: Loading VCF - vcfLoad: - # VCF from ftp://ftp.ncbi.nlm.nih.gov/pub/clinvar/vcf_GRCh37/clinvar_20190422.vcf.gz - input: "{{inputs.clinvar_vcf}}" - label: Annotation - edgeLabel: inAllele - idTemplate: "Clinvar:{{row.ID}}" - infoMap: - CLNDN: disease - - desc: DB SNP VCF - # VCF from ftp://ftp.ncbi.nih.gov/snp/organisms/human_9606/VCF/ - vcfLoad: - emitAllele: true - input: "{{inputs.dbsnp_vcf}}" diff --git a/extractors/avro_load.go b/extractors/avro_load.go index dc2975d..3308e44 100644 --- a/extractors/avro_load.go +++ b/extractors/avro_load.go @@ -13,13 +13,13 @@ import ( ) type AvroLoadStep struct { - Input string `json:"input" jsonschema_description:"Path of avro object file to transform"` + Path string `json:"path" jsonschema_description:"Path of avro object file to transform"` } func (ml *AvroLoadStep) Start(task task.RuntimeTask) (chan map[string]interface{}, error) { logger.Debug("Starting Avro Load") - input, err := evaluate.ExpressionString(ml.Input, task.GetConfig(), nil) + input, err := evaluate.ExpressionString(ml.Path, task.GetConfig(), nil) if err != nil { return nil, err } @@ -57,10 +57,10 @@ func (ml *AvroLoadStep) Start(task task.RuntimeTask) (chan map[string]interface{ return procChan, nil } -func (ml *AvroLoadStep) GetConfigFields() []config.Variable { - out := []config.Variable{} - for _, s := range evaluate.ExpressionIDs(ml.Input) { - out = append(out, config.Variable{Type: "File", Name: config.TrimPrefix(s)}) +func (ml *AvroLoadStep) GetRequiredParams() []config.ParamRequest { + out := []config.ParamRequest{} + for _, s := range evaluate.ExpressionIDs(ml.Path) { + out = append(out, config.ParamRequest{Type: "File", Name: config.TrimPrefix(s)}) } return out } diff --git a/extractors/glob_load.go b/extractors/glob_load.go index 8c57c01..2f5d2eb 100644 --- a/extractors/glob_load.go +++ b/extractors/glob_load.go @@ -14,12 +14,12 @@ import ( type GlobLoadStep struct { StoreFilename string `json:"storeFilename"` StoreFilepath string `json:"storeFilepath"` - Input string `json:"input" jsonschema_description:"Path of avro object file to transform"` + Path string `json:"path" jsonschema_description:"Path of avro object file to transform"` Parallelize bool `json:"parallelize"` - XMLLoad *XMLLoadStep `json:"xmlLoad"` - TableLoad *TableLoadStep `json:"tableLoad" jsonschema_description:"Run transform pipeline on a TSV or CSV"` - JSONLoad *JSONLoadStep `json:"jsonLoad" jsonschema_description:"Run a transform pipeline on a multi line json file"` - AvroLoad *AvroLoadStep `json:"avroLoad" jsonschema_description:"Load data from avro file"` + XMLLoad *XMLLoadStep `json:"xml"` + TableLoad *TableLoadStep `json:"table" jsonschema_description:"Run transform pipeline on a TSV or CSV"` + JSONLoad *JSONLoadStep `json:"json" jsonschema_description:"Run a transform pipeline on a multi line json file"` + AvroLoad *AvroLoadStep `json:"avro" jsonschema_description:"Load data from avro file"` } type fileSource struct { @@ -28,7 +28,7 @@ type fileSource struct { } func (gl *GlobLoadStep) Start(task task.RuntimeTask) (chan map[string]interface{}, error) { - input, err := evaluate.ExpressionString(gl.Input, task.GetConfig(), nil) + input, err := evaluate.ExpressionString(gl.Path, task.GetConfig(), nil) if err != nil { return nil, err } @@ -53,15 +53,15 @@ func (gl *GlobLoadStep) Start(task task.RuntimeTask) (chan map[string]interface{ var a Source if gl.XMLLoad != nil { t := *gl.XMLLoad - t.Input = f + t.Path = f a = &t } else if gl.JSONLoad != nil { t := *gl.JSONLoad - t.Input = f + t.Path = f a = &t } else if gl.TableLoad != nil { t := *gl.TableLoad - t.Input = f + t.Path = f a = &t } sources <- fileSource{source: a, file: f} @@ -97,10 +97,10 @@ func (gl *GlobLoadStep) Start(task task.RuntimeTask) (chan map[string]interface{ return nil, fmt.Errorf("not found") } -func (gl *GlobLoadStep) GetConfigFields() []config.Variable { - out := []config.Variable{} - for _, s := range evaluate.ExpressionIDs(gl.Input) { - out = append(out, config.Variable{Type: "File", Name: config.TrimPrefix(s)}) +func (gl *GlobLoadStep) GetRequiredParams() []config.ParamRequest { + out := []config.ParamRequest{} + for _, s := range evaluate.ExpressionIDs(gl.Path) { + out = append(out, config.ParamRequest{Type: "File", Name: config.TrimPrefix(s)}) } return out } diff --git a/extractors/interface.go b/extractors/interface.go index 33d2892..9655dc8 100644 --- a/extractors/interface.go +++ b/extractors/interface.go @@ -14,16 +14,16 @@ type Source interface { type Extractor struct { Description string `json:"description" jsonschema_description:"Human Readable description of step"` - XMLLoad *XMLLoadStep `json:"xmlLoad"` - TableLoad *TableLoadStep `json:"tableLoad" jsonschema_description:"Run transform pipeline on a TSV or CSV"` - JSONLoad *JSONLoadStep `json:"jsonLoad" jsonschema_description:"Run a transform pipeline on a multi line json file"` - SQLDumpLoad *SQLDumpStep `json:"sqldumpLoad" jsonschema_description:"Parse the content of a SQL dump to find insert and run a transform pipeline"` - GripperLoad *GripperLoadStep `json:"gripperLoad" jsonschema_description:"Use a GRIPPER server to get data and run a transform pipeline"` - AvroLoad *AvroLoadStep `json:"avroLoad" jsonschema_description:"Load data from avro file"` + XMLLoad *XMLLoadStep `json:"xml"` + TableLoad *TableLoadStep `json:"table" jsonschema_description:"Run transform pipeline on a TSV or CSV"` + JSONLoad *JSONLoadStep `json:"json" jsonschema_description:"Run a transform pipeline on a multi line json file"` + SQLDumpLoad *SQLDumpStep `json:"sqldump" jsonschema_description:"Parse the content of a SQL dump to find insert and run a transform pipeline"` + GripperLoad *GripperLoadStep `json:"gripper" jsonschema_description:"Use a GRIPPER server to get data and run a transform pipeline"` + AvroLoad *AvroLoadStep `json:"avro" jsonschema_description:"Load data from avro file"` Embedded *EmbeddedLoader `json:"embedded"` Glob *GlobLoadStep `json:"glob"` - SQLiteLoad *SQLiteStep `json:"sqliteLoad"` - TransposeLoad *TransposeLoadStep `json:"transposeLoad"` + SQLiteLoad *SQLiteStep `json:"sqlite"` + TransposeLoad *TransposeLoadStep `json:"transpose"` Plugin *PluginLoadStep `json:"plugin"` //Untar *UntarStep `json:"untar" jsonschema_description:"Untar a file"` //FileGlob *FileGlobStep `json:"fileGlob" jsonschema_description:"Scan a directory and run a ETL pipeline on each of the files"` @@ -44,15 +44,15 @@ func (ex *Extractor) Start(t task.RuntimeTask) (chan map[string]interface{}, err return nil, fmt.Errorf(("Extractor not defined")) } -func (ex *Extractor) GetConfigFields() []config.Variable { - out := []config.Variable{} +func (ex *Extractor) GetRequiredParams() []config.ParamRequest { + out := []config.ParamRequest{} v := reflect.ValueOf(ex).Elem() for i := 0; i < v.NumField(); i++ { f := v.Field(i) x := f.Interface() if z, ok := x.(config.Configurable); ok { if !f.IsNil() { - out = append(out, z.GetConfigFields()...) + out = append(out, z.GetRequiredParams()...) } } } diff --git a/extractors/json_load.go b/extractors/json_load.go index 6f4ccb6..d2faa8f 100644 --- a/extractors/json_load.go +++ b/extractors/json_load.go @@ -15,14 +15,14 @@ import ( ) type JSONLoadStep struct { - Input string `json:"input" jsonschema_description:"Path of multiline JSON file to transform"` + Path string `json:"path" jsonschema_description:"Path of multiline JSON file to transform"` Transform transform.Pipe `json:"transform" jsonschema_description:"Transformation Pipeline"` Multiline bool `json:"multiline" jsonschema_description:"Load file as a single multiline JSON object"` } func (ml *JSONLoadStep) Start(task task.RuntimeTask) (chan map[string]interface{}, error) { logger.Debug("Starting JSON Load") - input, err := evaluate.ExpressionString(ml.Input, task.GetConfig(), nil) + input, err := evaluate.ExpressionString(ml.Path, task.GetConfig(), nil) if err != nil { return nil, err } @@ -67,10 +67,10 @@ func (ml *JSONLoadStep) Start(task task.RuntimeTask) (chan map[string]interface{ return procChan, nil } -func (ml *JSONLoadStep) GetConfigFields() []config.Variable { - out := []config.Variable{} - for _, s := range evaluate.ExpressionIDs(ml.Input) { - out = append(out, config.Variable{Type: "File", Name: config.TrimPrefix(s)}) +func (ml *JSONLoadStep) GetRequiredParams() []config.ParamRequest { + out := []config.ParamRequest{} + for _, s := range evaluate.ExpressionIDs(ml.Path) { + out = append(out, config.ParamRequest{Type: "File", Name: config.TrimPrefix(s)}) } return out } diff --git a/extractors/plugin_load.go b/extractors/plugin_load.go index 74893cb..f6c38e5 100644 --- a/extractors/plugin_load.go +++ b/extractors/plugin_load.go @@ -81,10 +81,10 @@ func (ml *PluginLoadStep) Start(task task.RuntimeTask) (chan map[string]interfac return procChan, nil } -func (ml *PluginLoadStep) GetConfigFields() []config.Variable { - out := []config.Variable{} +func (ml *PluginLoadStep) GetRequiredParams() []config.ParamRequest { + out := []config.ParamRequest{} for _, s := range evaluate.ExpressionIDs(ml.CommandLine) { - out = append(out, config.Variable{Type: "File", Name: config.TrimPrefix(s)}) + out = append(out, config.ParamRequest{Type: "File", Name: config.TrimPrefix(s)}) } return out } diff --git a/extractors/sqldump_step.go b/extractors/sqldump_step.go index dd06014..cd346b0 100644 --- a/extractors/sqldump_step.go +++ b/extractors/sqldump_step.go @@ -15,13 +15,13 @@ import ( ) type SQLDumpStep struct { - Input string `json:"input" jsonschema_description:"Path to the SQL dump file"` + Path string `json:"path" jsonschema_description:"Path to the SQL dump file"` Tables []string `json:"tables" jsonschema_description:"Array of transforms for the different tables in the SQL dump"` } func (ml *SQLDumpStep) Start(task task.RuntimeTask) (chan map[string]interface{}, error) { - input, err := evaluate.ExpressionString(ml.Input, task.GetConfig(), nil) + input, err := evaluate.ExpressionString(ml.Path, task.GetConfig(), nil) if err != nil { return nil, err } @@ -114,10 +114,10 @@ func (ml *SQLDumpStep) Start(task task.RuntimeTask) (chan map[string]interface{} return out, nil } -func (ml *SQLDumpStep) GetConfigFields() []config.Variable { - out := []config.Variable{} - for _, s := range evaluate.ExpressionIDs(ml.Input) { - out = append(out, config.Variable{Type: "File", Name: config.TrimPrefix(s)}) +func (ml *SQLDumpStep) GetRequiredParams() []config.ParamRequest { + out := []config.ParamRequest{} + for _, s := range evaluate.ExpressionIDs(ml.Path) { + out = append(out, config.ParamRequest{Type: "File", Name: config.TrimPrefix(s)}) } return out } diff --git a/extractors/sqlite_load.go b/extractors/sqlite_load.go index de38204..4877d42 100644 --- a/extractors/sqlite_load.go +++ b/extractors/sqlite_load.go @@ -64,10 +64,10 @@ func (ml *SQLiteStep) Start(task task.RuntimeTask) (chan map[string]interface{}, return procChan, nil } -func (ml *SQLiteStep) GetConfigFields() []config.Variable { - out := []config.Variable{} +func (ml *SQLiteStep) GetRequiredParams() []config.ParamRequest { + out := []config.ParamRequest{} for _, s := range evaluate.ExpressionIDs(ml.Input) { - out = append(out, config.Variable{Type: "File", Name: config.TrimPrefix(s)}) + out = append(out, config.ParamRequest{Type: "File", Name: config.TrimPrefix(s)}) } return out } diff --git a/extractors/tabular_load.go b/extractors/tabular_load.go index 5f8b9f4..d24bbb3 100644 --- a/extractors/tabular_load.go +++ b/extractors/tabular_load.go @@ -16,7 +16,7 @@ import ( ) type TableLoadStep struct { - Input string `json:"input" jsonschema_description:"TSV to be transformed"` + Path string `json:"path" jsonschema_description:"TSV to be transformed"` RowSkip int `json:"rowSkip" jsonschema_description:"Number of header rows to skip"` Columns []string `json:"columns" jsonschema_description:"Manually set names of columns"` ExtraColumns string `json:"extraColumns" jsonschema_description:"Columns beyond originally declared columns will be placed in this array"` @@ -50,7 +50,7 @@ func buildUniqueArray(src []string) []string { func (ml *TableLoadStep) Start(task task.RuntimeTask) (chan map[string]interface{}, error) { logger.Info("Starting Table Load") - input, err := evaluate.ExpressionString(ml.Input, task.GetConfig(), nil) + input, err := evaluate.ExpressionString(ml.Path, task.GetConfig(), nil) if err != nil { return nil, err } @@ -75,8 +75,7 @@ func (ml *TableLoadStep) Start(task task.RuntimeTask) (chan map[string]interface } else { inputStream = gfile } - } - if err != nil { + } else { return nil, err } @@ -160,10 +159,10 @@ func (ml *TableLoadStep) Start(task task.RuntimeTask) (chan map[string]interface return procChan, nil } -func (ml *TableLoadStep) GetConfigFields() []config.Variable { - out := []config.Variable{} - for _, s := range evaluate.ExpressionIDs(ml.Input) { - out = append(out, config.Variable{Type: "File", Name: config.TrimPrefix(s)}) +func (ml *TableLoadStep) GetRequiredParams() []config.ParamRequest { + out := []config.ParamRequest{} + for _, s := range evaluate.ExpressionIDs(ml.Path) { + out = append(out, config.ParamRequest{Type: "File", Name: config.TrimPrefix(s)}) } return out } diff --git a/extractors/transpose_load.go b/extractors/transpose_load.go index 436a064..d282c62 100644 --- a/extractors/transpose_load.go +++ b/extractors/transpose_load.go @@ -20,7 +20,7 @@ import ( ) type TransposeLoadStep struct { - Input string `json:"input" jsonschema_description:"TSV to be transformed"` + Path string `json:"path" jsonschema_description:"TSV to be transformed"` RowSkip int `json:"rowSkip" jsonschema_description:"Number of header rows to skip"` Sep string `json:"sep" jsonschema_description:"Separator \\t for TSVs or , for CSVs"` UseDB bool `json:"useDB" jsonschema_description:"Do transpose without caching matrix in memory. Takes longer but works on large files"` @@ -28,7 +28,7 @@ type TransposeLoadStep struct { } func (ml *TransposeLoadStep) Start(task task.RuntimeTask) (chan map[string]interface{}, error) { - input, err := evaluate.ExpressionString(ml.Input, task.GetConfig(), nil) + input, err := evaluate.ExpressionString(ml.Path, task.GetConfig(), nil) if err != nil { return nil, err } @@ -48,10 +48,10 @@ func (ml *TransposeLoadStep) Start(task task.RuntimeTask) (chan map[string]inter return out, nil } -func (ml *TransposeLoadStep) GetConfigFields() []config.Variable { - out := []config.Variable{} - for _, s := range evaluate.ExpressionIDs(ml.Input) { - out = append(out, config.Variable{Type: "File", Name: config.TrimPrefix(s)}) +func (ml *TransposeLoadStep) GetRequiredParams() []config.ParamRequest { + out := []config.ParamRequest{} + for _, s := range evaluate.ExpressionIDs(ml.Path) { + out = append(out, config.ParamRequest{Type: "File", Name: config.TrimPrefix(s)}) } return out } @@ -271,7 +271,7 @@ func transposeInDB(workdir string, c csvReader, out chan map[string]any) error { } func transposeInTable(workdir string, fieldSize int, c csvReader, out chan map[string]any) error { - + defer close(out) table, err := os.Create(filepath.Join(workdir, "transpose")) if err != nil { return err @@ -316,12 +316,14 @@ func transposeInTable(workdir string, fieldSize int, c csvReader, out chan map[s columns := []string{} for row := int64(0); row < rowCount; row++ { buf := make([]byte, fieldSize) - table.ReadAt(buf, row*stepSize) - tmp := bytes.Split(buf, []byte{0}) - if err == nil { - columns = append(columns, string(tmp[0])) - } else { + n, err := table.ReadAt(buf, row*stepSize) + if err != nil && err != io.EOF { logger.Error("Column error", "message", err) + continue + } + if n > 0 { + tmp := bytes.Split(buf[:n], []byte{0}) + columns = append(columns, string(tmp[0])) } } @@ -341,7 +343,6 @@ func transposeInTable(workdir string, fieldSize int, c csvReader, out chan map[s out <- record } table.Close() - close(out) os.RemoveAll(workdir) return nil } diff --git a/extractors/xml_step.go b/extractors/xml_step.go index d7653d0..b3743b6 100644 --- a/extractors/xml_step.go +++ b/extractors/xml_step.go @@ -19,12 +19,12 @@ import ( ) type XMLLoadStep struct { - Input string `json:"input"` + Path string `json:"path"` Level int `json:"level"` } func (ml *XMLLoadStep) Start(task task.RuntimeTask) (chan map[string]any, error) { - input, err := evaluate.ExpressionString(ml.Input, task.GetConfig(), nil) + input, err := evaluate.ExpressionString(ml.Path, task.GetConfig(), nil) if err != nil { logger.Error("Error open xml", "error", err) return nil, err @@ -110,10 +110,10 @@ func (ml *XMLLoadStep) Start(task task.RuntimeTask) (chan map[string]any, error) return procChan, nil } -func (ml *XMLLoadStep) GetConfigFields() []config.Variable { - out := []config.Variable{} - for _, s := range evaluate.ExpressionIDs(ml.Input) { - out = append(out, config.Variable{Type: "File", Name: config.TrimPrefix(s)}) +func (ml *XMLLoadStep) GetRequiredParams() []config.ParamRequest { + out := []config.ParamRequest{} + for _, s := range evaluate.ExpressionIDs(ml.Path) { + out = append(out, config.ParamRequest{Type: "File", Name: config.TrimPrefix(s)}) } return out } diff --git a/go.sum b/go.sum index d45911b..60b9b20 100644 --- a/go.sum +++ b/go.sum @@ -22,8 +22,6 @@ github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= github.com/bitly/go-simplejson v0.5.0 h1:6IH+V8/tVMab511d5bn4M7EwGXZf9Hj6i2xSwkNEM+Y= github.com/bitly/go-simplejson v0.5.0/go.mod h1:cXHtHw4XUPsvGaxgjIAn8PhEWG9NfngEKAMDJEczWVA= -github.com/bmeg/flame v0.0.0-20231228021014-450efb0021a6 h1:7cacs+zs6d9oP0ylXMrmb6PlH8773WEnj/yohQRZkiA= -github.com/bmeg/flame v0.0.0-20231228021014-450efb0021a6/go.mod h1:XA9wVP0ORyATjjwdbwkjq/yaimsnreixGUR62sGMWsY= github.com/bmeg/flame v0.0.0-20250730184845-2dc57651053c h1:ktjBmJUTaHjRoabJio6+P/GPGNjm640gQi3OfcSlnBI= github.com/bmeg/flame v0.0.0-20250730184845-2dc57651053c/go.mod h1:telO7T1SSr9Ejv9nm//xcSMCQWynGFll08bnGMwBO/o= github.com/bmeg/golib v0.0.0-20200725232156-e799a31439fc h1:/0v/ZcXYjGs44InmjECrls31onIbVKVu1Q/E2cmnCEU= diff --git a/graphplan/build_template.go b/graphplan/build_template.go deleted file mode 100644 index e564a3c..0000000 --- a/graphplan/build_template.go +++ /dev/null @@ -1,140 +0,0 @@ -package graphplan - -import ( - "fmt" - "os" - "path/filepath" - "text/template" - - "github.com/bmeg/sifter/evaluate" - "github.com/bmeg/sifter/logger" - "github.com/bmeg/sifter/playbook" - "github.com/bmeg/sifter/task" -) - -type ObjectConvertStep struct { - Name string - Input string - Class string - Schema string -} - -type GraphBuildStep struct { - Name string - Outdir string - Objects []ObjectConvertStep -} - -var graphScript string = ` - -name: {{.Name}} -class: sifter - -outdir: {{.Outdir}} - -config: -{{range .Objects}} - {{.Name}}: {{.Input}} - {{.Name}}Schema: {{.Schema}} -{{end}} - -inputs: -{{range .Objects}} - {{.Name}}: - jsonLoad: - input: "{{ "{{config." }}{{.Name}}{{"}}"}}" -{{end}} - -pipelines: -{{range .Objects}} - {{.Name}}-graph: - - from: {{.Name}} - - graphBuild: - schema: "{{ "{{config."}}{{.Name}}Schema{{ "}}" }}" - title: {{.Class}} -{{end}} -` - -func contains(n string, c []string) bool { - for _, c := range c { - if n == c { - return true - } - } - return false -} - -func uniqueName(name string, used []string) string { - if !contains(name, used) { - return name - } - for i := 1; ; i++ { - f := fmt.Sprintf("%s_%d", name, i) - if !contains(f, used) { - return f - } - } -} - -func NewGraphBuild(pb *playbook.Playbook, scriptOutDir, dataDir string, objectExclude []string) error { - userInputs := map[string]string{} - localInputs, _ := pb.PrepConfig(userInputs, filepath.Dir(pb.GetPath())) - - task := task.NewTask(pb.Name, filepath.Dir(pb.GetPath()), filepath.Dir(pb.GetPath()), pb.GetDefaultOutDir(), localInputs) - - convertName := fmt.Sprintf("%s-graph", pb.Name) - - gb := GraphBuildStep{Name: convertName, Objects: []ObjectConvertStep{}, Outdir: dataDir} - - for pname, p := range pb.Pipelines { - emitName := "" - for _, s := range p { - if s.Emit != nil { - emitName = s.Emit.Name - } - } - if emitName != "" { - for _, s := range p { - if s.ObjectValidate != nil { - if !contains(s.ObjectValidate.Title, objectExclude) { - schema, _ := evaluate.ExpressionString(s.ObjectValidate.Schema, task.GetConfig(), map[string]any{}) - outdir := pb.GetDefaultOutDir() - outname := fmt.Sprintf("%s.%s.%s.json.gz", pb.Name, pname, emitName) - - outpath := filepath.Join(outdir, outname) - outpath, _ = filepath.Rel(scriptOutDir, outpath) - - schemaPath, _ := filepath.Rel(scriptOutDir, schema) - - _ = schemaPath - - objCreate := ObjectConvertStep{Name: pname, Input: outpath, Class: s.ObjectValidate.Title, Schema: schemaPath} - gb.Objects = append(gb.Objects, objCreate) - } - } - } - } - } - - if len(gb.Objects) > 0 { - tmpl, err := template.New("graphscript").Parse(graphScript) - if err != nil { - panic(err) - } - - outPath := filepath.Join(scriptOutDir, fmt.Sprintf("%s.yaml", pb.Name)) - outfile, err := os.Create(outPath) - if err != nil { - logger.Error("File Error", "error", err) - } - - logger.Info("Summary", "ObjectFound", len(gb.Objects), "outPath", outPath) - - err = tmpl.Execute(outfile, gb) - outfile.Close() - if err != nil { - logger.Error("Template Error", "error", err) - } - } - return nil -} diff --git a/loader/counter.go b/loader/counter.go index 485b476..e71ae16 100644 --- a/loader/counter.go +++ b/loader/counter.go @@ -44,7 +44,7 @@ func (cd *CountDataEmitter) Close() { cd.d.Close() } -func (cd *CountDataEmitter) Emit(name string, e map[string]interface{}, useName bool) error { +func (cd *CountDataEmitter) Emit(name string, e map[string]interface{}) error { cd.cl.increment() - return cd.d.Emit(name, e, useName) + return cd.d.Emit(name, e) } diff --git a/loader/dir.go b/loader/dir.go index 3d62ecc..e334247 100644 --- a/loader/dir.go +++ b/loader/dir.go @@ -7,6 +7,7 @@ import ( "os" "path" "path/filepath" + "strings" "sync" "github.com/bmeg/sifter/logger" @@ -65,14 +66,14 @@ func (s *DirLoader) Close() { s.dout = map[string]io.WriteCloser{} } -func (s *DirDataLoader) Emit(name string, v map[string]interface{}, useName bool) error { +func (s *DirDataLoader) Emit(name string, v map[string]interface{}) error { s.dl.mux.Lock() defer s.dl.mux.Unlock() f, ok := s.dl.dout[name] if !ok { // log.Printf("output path %s", outputPath) - opath := path.Join(s.dl.dir, name+".json.gz") + opath := path.Join(s.dl.dir, name) logger.Info("Creating emit file", "name", name, "path", opath) @@ -80,7 +81,11 @@ func (s *DirDataLoader) Emit(name string, v map[string]interface{}, useName bool if err != nil { return err } - f = gzip.NewWriter(j) + if strings.HasSuffix(opath, ".gz") { + f = gzip.NewWriter(j) + } else { + f = j + } s.dl.dout[name] = f } if v != nil { diff --git a/loader/emitter.go b/loader/emitter.go index 49bd50d..c7d81ff 100644 --- a/loader/emitter.go +++ b/loader/emitter.go @@ -6,7 +6,7 @@ import ( ) type DataEmitter interface { - Emit(name string, e map[string]interface{}, useName bool) error + Emit(name string, e map[string]interface{}) error Close() } diff --git a/loader/stdout.go b/loader/stdout.go index ec60903..6c78b70 100644 --- a/loader/stdout.go +++ b/loader/stdout.go @@ -21,7 +21,7 @@ type StdoutEmitter struct { func (s StdoutEmitter) Close() {} -func (s StdoutEmitter) Emit(name string, v map[string]interface{}, useName bool) error { +func (s StdoutEmitter) Emit(name string, v map[string]interface{}) error { if v != nil { o, _ := json.Marshal(v) fmt.Printf("%s\t%s\n", name, o) diff --git a/mkdocs.yml b/mkdocs.yml new file mode 100644 index 0000000..0327120 --- /dev/null +++ b/mkdocs.yml @@ -0,0 +1,22 @@ +site_name: Sifter + +docs_dir: docs + +plugins: + - search + - awesome-nav + +theme: + name: material + palette: + - scheme: default + primary: custom + features: + - navigation.indexes + # - navigation.footer + - content.code.copy + - navigation.tabs + # - navigation.sections + - navigation.top + - header.autohide + - navigation.tabs diff --git a/playbook/execute.go b/playbook/execute.go index c312205..ed126ab 100644 --- a/playbook/execute.go +++ b/playbook/execute.go @@ -21,35 +21,62 @@ func fileExists(filename string) bool { } */ -func (pb *Playbook) PrepConfig(inputs map[string]string, workdir string) (map[string]string, error) { - workdir, _ = filepath.Abs(workdir) - missing := map[string]bool{} - out := map[string]string{} - for _, v := range pb.GetConfigFields() { - if val, ok := inputs[v.Name]; ok { +func (pb *Playbook) PrepConfig(inputParams map[string]string, workdir string) (map[string]string, error) { + + playbookParams := map[string]string{} + for k, v := range pb.Params { + if _, ok := inputParams[k]; ok { if v.IsFile() || v.IsDir() { - var defaultPath = val - if !filepath.IsAbs(val) { - defaultPath = filepath.Join(workdir, val) + var defaultPath = inputParams[k] + if !filepath.IsAbs(inputParams[k]) { + defaultPath = filepath.Join(workdir, inputParams[k]) } - out[v.Name], _ = filepath.Abs(defaultPath) + playbookParams[k], _ = filepath.Abs(defaultPath) } else { - out[v.Name] = val + playbookParams[k] = inputParams[k] } + } else { + if v.Default != nil { + if v.IsFile() || v.IsDir() { + var defaultPath = fmt.Sprintf("%v", v.Default) + if !filepath.IsAbs(defaultPath) { + dirPath := filepath.Dir(pb.path) + defaultPath = filepath.Join(dirPath, defaultPath) + } + playbookParams[k], _ = filepath.Abs(defaultPath) + } else { + playbookParams[k] = fmt.Sprintf("%v", v.Default) + } + } else { + return nil, fmt.Errorf("parameter %s not defined", k) + } + } + } + + workdir, _ = filepath.Abs(workdir) + missing := map[string]bool{} + out := map[string]string{} + for _, v := range pb.GetRequiredParams() { + if val, ok := playbookParams[v.Name]; ok { + out[v.Name] = val logger.Debug("input: ", v.Name, out[v.Name]) - } else if val, ok := pb.Config[v.Name]; ok { - if val != nil { + } else if p, ok := pb.Params[v.Name]; ok { + if p.Default != nil { + val := fmt.Sprintf("%v", p.Default) if v.IsFile() || v.IsDir() { - defaultPath := filepath.Join(filepath.Dir(pb.path), *val) + var defaultPath = val + if !filepath.IsAbs(val) { + defaultPath = filepath.Join(filepath.Dir(pb.path), val) + } out[v.Name], _ = filepath.Abs(defaultPath) } else { - out[v.Name] = *val + out[v.Name] = val } } else { missing[v.Name] = true } } else { - return nil, fmt.Errorf("config %s not defined", v.Name) + return nil, fmt.Errorf("parameter %s not defined", v.Name) } } if len(missing) > 0 { @@ -59,6 +86,7 @@ func (pb *Playbook) PrepConfig(inputs map[string]string, workdir string) (map[st } return nil, fmt.Errorf("missing inputs: %s", strings.Join(o, ",")) } + logger.Debug("prep config inputs", "config", out) return out, nil } @@ -104,6 +132,7 @@ func (pb *Playbook) Execute(task task.RuntimeTask) error { outNodes := map[string]flame.Emitter[map[string]any]{} inNodes := map[string]flame.Receiver[map[string]any]{} + outputs := map[string]OutputProcessor{} for n, v := range pb.Inputs { logger.Debug("Setting Up", "name", n) @@ -121,11 +150,10 @@ func (pb *Playbook) Execute(task task.RuntimeTask) error { joins := []joinStruct{} for k, v := range pb.Pipelines { - sub := task.SubTask(k) var lastStep flame.Emitter[map[string]any] var firstStep flame.Receiver[map[string]any] for i, s := range v { - b, err := s.Init(sub) + b, err := s.Init(task) if err != nil { logger.Error("Pipeline error", "name", k, "error", err) return err @@ -302,6 +330,37 @@ func (pb *Playbook) Execute(task task.RuntimeTask) error { } } + for k, v := range pb.Outputs { + if v.JSON != nil { + proc, err := v.JSON.Init(task) + if err == nil { + if srcNode, ok := outNodes[v.JSON.From]; ok { + s := flame.AddSink(wf, proc.Process) + outputs[k] = proc + s.Connect(srcNode) + } + } + } else if v.Table != nil { + proc, err := v.Table.Init(task) + if err == nil { + if srcNode, ok := outNodes[v.Table.From]; ok { + s := flame.AddSink(wf, proc.Process) + outputs[k] = proc + s.Connect(srcNode) + } + } + } else if v.Graph != nil { + proc, err := v.Graph.Init(task) + if err == nil { + if srcNode, ok := outNodes[v.Graph.From]; ok { + s := flame.AddSink(wf, proc.Process) + outputs[k] = proc + s.Connect(srcNode) + } + } + } + } + //log.Printf("WF: %#v", wf) wf.Start() @@ -313,6 +372,10 @@ func (pb *Playbook) Execute(task task.RuntimeTask) error { procs[p].Close() } + for k := range outputs { + outputs[k].Close() + } + task.Close() return nil } diff --git a/playbook/inspect.go b/playbook/inspect.go index 5c2feb6..f82bd6f 100644 --- a/playbook/inspect.go +++ b/playbook/inspect.go @@ -1,58 +1,36 @@ package playbook import ( - "fmt" "path/filepath" "github.com/bmeg/sifter/config" "github.com/bmeg/sifter/task" ) -func (pb *Playbook) GetConfigFields() []config.Variable { - out := []config.Variable{} +func (pb *Playbook) GetRequiredParams() []config.ParamRequest { + out := []config.ParamRequest{} for _, v := range pb.Inputs { - out = append(out, v.GetConfigFields()...) + out = append(out, v.GetRequiredParams()...) } for _, v := range pb.Pipelines { for _, s := range v { - out = append(out, s.GetConfigFields()...) + out = append(out, s.GetRequiredParams()...) } } return out } -func (pb *Playbook) GetOutputs(task task.RuntimeTask) (map[string][]string, error) { - out := map[string][]string{} - //inputs := task.GetInputs() - - for k, v := range pb.Pipelines { - for _, s := range v { - fArray := []string{} - for _, fileName := range s.GetOutputs() { - filePath := filepath.Join(pb.GetOutDir(task), fileName) - fArray = append(fArray, filePath) - } - if len(fArray) > 0 { - out[k] = fArray - } - } - } - return out, nil -} - -func (pb *Playbook) GetEmitters(task task.RuntimeTask) (map[string]string, error) { +func (pb *Playbook) GetOutputs(task task.RuntimeTask) (map[string]string, error) { out := map[string]string{} + //inputs := task.GetInputs() - for k, v := range pb.Pipelines { - for _, s := range v { - for _, e := range s.GetEmitters() { - fileName := fmt.Sprintf("%s.%s.%s.json.gz", pb.Name, k, e) - filePath := filepath.Join(pb.GetOutDir(task), fileName) - out[k+"."+e] = filePath - } + for k, v := range pb.Outputs { + for _, o := range v.GetOutputs(task) { + filePath := filepath.Join(pb.GetOutDir(task), o) + out[k] = filePath } } return out, nil diff --git a/playbook/load.go b/playbook/load.go index a3b6b9c..11ef5f2 100644 --- a/playbook/load.go +++ b/playbook/load.go @@ -17,14 +17,21 @@ type Loader interface { Load() chan gripql.GraphElement } +type Output struct { + JSON *OutputJSON `json:"json"` + Table *OutputTable `json:"table"` + Graph *OutputGraph `json:"graph"` +} + type Playbook struct { + Inputs map[string]extractors.Extractor `json:"inputs" jsonschema_description:"Steps of the transformation"` + Outputs map[string]Output `json:"outputs"` Class string `json:"class"` Name string `json:"name" jsonschema_description:"Unique name of the playbook"` MemMB int `json:"memMB"` //annotation of potential memory usage, for build Snakefile Docs string `json:"docs"` Outdir string `json:"outdir"` - Config config.Config `json:"config,omitempty" jsonschema_description:"Configuration for Playbook"` - Inputs map[string]extractors.Extractor `json:"inputs" jsonschema_description:"Steps of the transformation"` + Params config.Params `json:"params,omitempty" jsonschema_description:"Parameters for Playbook"` Pipelines map[string]transform.Pipe `json:"pipelines"` path string } diff --git a/transform/graph_build.go b/playbook/output_graph.go similarity index 64% rename from transform/graph_build.go rename to playbook/output_graph.go index d92bbfe..34d43ee 100644 --- a/transform/graph_build.go +++ b/playbook/output_graph.go @@ -1,20 +1,25 @@ -package transform +package playbook import ( + "path/filepath" + "github.com/bmeg/grip/gripql" "github.com/bmeg/jsonschemagraph/graph" "github.com/bmeg/sifter/config" "github.com/bmeg/sifter/evaluate" "github.com/bmeg/sifter/logger" + "github.com/bmeg/sifter/playbook/refs" "github.com/bmeg/sifter/task" ) type EdgeFix struct { - Method string `json:"method"` - GPython *CodeBlock `json:"gpython"` + Method string `json:"method"` + GPython *refs.CodeBlock `json:"gpython"` } -type GraphBuildStep struct { +type OutputGraph struct { + From string `json:"from"` + Path string `json:"path"` Schema string `json:"schema"` Title string `json:"title"` Clean bool `json:"clean"` @@ -22,19 +27,32 @@ type GraphBuildStep struct { EdgeFix *EdgeFix `json:"edgeFix"` } +func (oj *OutputGraph) GetOutputs(task task.RuntimeTask) []string { + output, err := evaluate.ExpressionString(oj.Path, task.GetConfig(), nil) + if err != nil { + return []string{} + } + outputPath := filepath.Join(task.OutDir(), output) + logger.Debug("table output %s %s", task.OutDir(), output) + return []string{outputPath + ".edge", outputPath + ".vertex"} +} + type graphBuildProcess struct { - config GraphBuildStep + config OutputGraph task task.RuntimeTask sch graph.GraphSchema class string + edgeName string + verrtexName string + edgeFix evaluate.Processor objectCount int vertexCount int edgeCount int } -func (ts GraphBuildStep) Init(task task.RuntimeTask) (Processor, error) { +func (ts OutputGraph) Init(task task.RuntimeTask) (OutputProcessor, error) { path, err := evaluate.ExpressionString(ts.Schema, task.GetConfig(), nil) if err != nil { @@ -45,10 +63,17 @@ func (ts GraphBuildStep) Init(task task.RuntimeTask) (Processor, error) { if err != nil { return nil, err } + + output, err := evaluate.ExpressionString(ts.Path, task.GetConfig(), nil) + + //TODO: make this more flexible + edgeName := output + ".edge.json.gz" + vertexName := output + ".vertex.json.gz" + //force the two emitters to be created. nil messages don't get emitted //but the output file will be created - task.Emit("vertex", nil, false) - task.Emit("edge", nil, false) + task.Emit(vertexName, nil) + task.Emit(edgeName, nil) var edgeFix evaluate.Processor if ts.EdgeFix != nil { @@ -63,14 +88,24 @@ func (ts GraphBuildStep) Init(task task.RuntimeTask) (Processor, error) { edgeFix = c } } - return &graphBuildProcess{ts, task, sc, ts.Title, edgeFix, 0, 0, 0}, nil + return &graphBuildProcess{ + config: ts, + task: task, + sch: sc, + edgeName: edgeName, + verrtexName: vertexName, + class: ts.Title, + edgeFix: edgeFix, + objectCount: 0, + vertexCount: 0, + edgeCount: 0}, nil } -func (ts GraphBuildStep) GetConfigFields() []config.Variable { - out := []config.Variable{} +func (ts OutputGraph) GetRequiredParams() []config.ParamRequest { + out := []config.ParamRequest{} if ts.Schema != "" { for _, s := range evaluate.ExpressionIDs(ts.Schema) { - out = append(out, config.Variable{Type: config.Dir, Name: config.TrimPrefix(s)}) + out = append(out, config.ParamRequest{Type: "File", Name: config.TrimPrefix(s)}) } } return out @@ -88,15 +123,14 @@ func (ts *graphBuildProcess) Close() { "class", ts.class) } -func (ts *graphBuildProcess) Process(i map[string]interface{}) []map[string]interface{} { +func (ts *graphBuildProcess) Process(i map[string]interface{}) { - out := []map[string]any{} if o, err := ts.sch.Generate(ts.class, i, ts.config.Clean, map[string]any{}); err == nil { ts.objectCount++ for i := range o { if o[i].Vertex != nil { ts.vertexCount++ - err := ts.task.Emit("vertex", ts.vertexToMap(o[i].Vertex), false) + err := ts.task.Emit(ts.verrtexName, ts.vertexToMap(o[i].Vertex)) if err != nil { logger.Error("Emit Error: %s", err) } @@ -111,7 +145,7 @@ func (ts *graphBuildProcess) Process(i map[string]interface{}) []map[string]inte } } ts.edgeCount++ - err := ts.task.Emit("edge", edgeData, false) + err := ts.task.Emit(ts.edgeName, edgeData) if err != nil { logger.Error("Emit Error: %s", err) } @@ -121,9 +155,6 @@ func (ts *graphBuildProcess) Process(i map[string]interface{}) []map[string]inte } else { logger.Error("Graphbuild %s error : %s", ts.config.Title, err) } - - return out - } func (ts *graphBuildProcess) edgeToMap(e *gripql.Edge) map[string]interface{} { diff --git a/playbook/output_json.go b/playbook/output_json.go new file mode 100644 index 0000000..f1182bb --- /dev/null +++ b/playbook/output_json.go @@ -0,0 +1,61 @@ +package playbook + +import ( + "path/filepath" + + "github.com/bmeg/sifter/evaluate" + "github.com/bmeg/sifter/logger" + "github.com/bmeg/sifter/task" +) + +func (pout *Output) GetOutputs(task task.RuntimeTask) []string { + if pout.JSON != nil { + return pout.JSON.GetOutputs(task) + } else if pout.Graph != nil { + return pout.Graph.GetOutputs(task) + } else if pout.Table != nil { + return pout.Table.GetOutputs(task) + } + + return []string{} +} + +type OutputProcessor interface { + Process(i map[string]any) + //GetOutputs(task task.RuntimeTask) []string + Close() +} + +type OutputJSON struct { + Path string `json:"path"` + From string `json:"from"` +} + +func (oj *OutputJSON) GetOutputs(task task.RuntimeTask) []string { + output, err := evaluate.ExpressionString(oj.Path, task.GetConfig(), nil) + if err != nil { + return []string{} + } + outputPath := filepath.Join(task.OutDir(), output) + logger.Debug("table output %s %s", task.OutDir(), output) + return []string{outputPath} +} + +func (ts OutputJSON) Init(task task.RuntimeTask) (OutputProcessor, error) { + return &jsonOutputProcess{config: ts, task: task}, nil +} + +type jsonOutputProcess struct { + config OutputJSON + task task.RuntimeTask + count uint64 +} + +func (op *jsonOutputProcess) Close() { + logger.Info("Emit Summary", "name", op.config.Path, "count", op.count) +} + +func (op *jsonOutputProcess) Process(i map[string]interface{}) { + op.count++ + op.task.Emit(op.config.Path, i) +} diff --git a/transform/table_write.go b/playbook/output_table.go similarity index 74% rename from transform/table_write.go rename to playbook/output_table.go index 2c36d62..2210fb9 100644 --- a/transform/table_write.go +++ b/playbook/output_table.go @@ -1,4 +1,4 @@ -package transform +package playbook import ( "compress/gzip" @@ -14,8 +14,9 @@ import ( "github.com/bmeg/sifter/task" ) -type TableWriteStep struct { - Output string `json:"output" jsonschema_description:"Name of file to create"` +type OutputTable struct { + From string `json:"from"` + Path string `json:"path" jsonschema_description:"Name of file to create"` Columns []string `json:"columns" jsonschema_description:"Columns to be written into table file"` Header string `json:"header"` SkipColumnHeader bool `json:"skipColumnHeader"` @@ -23,20 +24,20 @@ type TableWriteStep struct { } type tableWriteProcess struct { - config *TableWriteStep + config *OutputTable columns []string out io.WriteCloser handle io.WriteCloser writer *csv.Writer } -func (tw *TableWriteStep) Init(task task.RuntimeTask) (Processor, error) { +func (tw *OutputTable) Init(task task.RuntimeTask) (OutputProcessor, error) { sep := '\t' if tw.Sep != "" { sep = rune(tw.Sep[0]) } - output, err := evaluate.ExpressionString(tw.Output, task.GetConfig(), nil) + output, err := evaluate.ExpressionString(tw.Path, task.GetConfig(), nil) if err != nil { return nil, err } @@ -64,8 +65,8 @@ func (tw *TableWriteStep) Init(task task.RuntimeTask) (Processor, error) { return &te, nil } -func (tw *TableWriteStep) GetOutputs(task task.RuntimeTask) []string { - output, err := evaluate.ExpressionString(tw.Output, task.GetConfig(), nil) +func (tw *OutputTable) GetOutputs(task task.RuntimeTask) []string { + output, err := evaluate.ExpressionString(tw.Path, task.GetConfig(), nil) if err != nil { return []string{} } @@ -78,7 +79,7 @@ func (tp *tableWriteProcess) PoolReady() bool { return false } -func (tp *tableWriteProcess) Process(i map[string]any) map[string]any { +func (tp *tableWriteProcess) Process(i map[string]any) { o := make([]string, len(tp.columns)) for j, k := range tp.columns { if v, ok := i[k]; ok { @@ -91,11 +92,10 @@ func (tp *tableWriteProcess) Process(i map[string]any) map[string]any { } } tp.writer.Write(o) - return i } func (tp *tableWriteProcess) Close() { - logger.Debug("Closing tableWriter: %s", tp.config.Output) + logger.Debug("Closing tableWriter: %s", tp.config.Path) tp.writer.Flush() tp.out.Close() tp.handle.Close() diff --git a/transform/code_block.go b/playbook/refs/code_block.go similarity index 97% rename from transform/code_block.go rename to playbook/refs/code_block.go index 015171e..2cdf62b 100644 --- a/transform/code_block.go +++ b/playbook/refs/code_block.go @@ -1,4 +1,4 @@ -package transform +package refs import ( "encoding/json" diff --git a/task/task.go b/task/task.go index 721aeb4..362e51e 100644 --- a/task/task.go +++ b/task/task.go @@ -1,18 +1,16 @@ package task import ( - "io/ioutil" + "os" "path/filepath" - "strings" "github.com/bmeg/sifter/loader" ) type RuntimeTask interface { SetName(name string) - SubTask(ext string) RuntimeTask - Emit(name string, e map[string]interface{}, useName bool) error + Emit(name string, e map[string]interface{}) error GetConfig() map[string]string AbsPath(p string) (string, error) @@ -61,18 +59,6 @@ func (m *Task) GetName() string { return m.Prefix + "." + m.Name } -func (m *Task) SubTask(ext string) RuntimeTask { - return &Task{ - Prefix: m.GetName(), - Name: ext, - Workdir: m.Workdir, - Basedir: m.Basedir, - Config: m.Config, - Emitter: m.Emitter, - Outdir: m.Outdir, - } -} - func (m *Task) GetConfig() map[string]string { return m.Config } @@ -93,7 +79,7 @@ func (m *Task) OutDir() string { } func (m *Task) TempDir() string { - name, _ := ioutil.TempDir(m.Workdir, "tmp") + name, _ := os.MkdirTemp(m.Workdir, "tmp") return name } @@ -105,12 +91,6 @@ func (m *Task) BaseDir() string { return m.Basedir } -func (m *Task) Emit(n string, e map[string]interface{}, useName bool) error { - - newName := m.GetName() + "." + n - if useName { - temp := strings.Split(n, ".") - newName = temp[len(temp)-1] - } - return m.Emitter.Emit(newName, e, useName) +func (m *Task) Emit(name string, e map[string]interface{}) error { + return m.Emitter.Emit(name, e) } diff --git a/test/command_line_test.go b/test/command_line_test.go index 69da65f..119244c 100644 --- a/test/command_line_test.go +++ b/test/command_line_test.go @@ -60,7 +60,7 @@ func TestCommandLines(t *testing.T) { // read in conf, ie config.yaml in this case for _, c := range conf { cmd := exec.Command("../sifter", "run", c.Playbook) - fmt.Printf("Running: %s\n", c.Playbook) + t.Logf("Running: %s\n", c.Playbook) err = cmd.Run() if err != nil { t.Errorf("Failed running %s: %s", c.Playbook, err) @@ -68,7 +68,7 @@ func TestCommandLines(t *testing.T) { for i, chk := range c.Outputs { // iterate through expected output paths path := filepath.Join(filepath.Dir(c.Playbook), chk) - fmt.Printf("Checking %s \n", path) + t.Logf("Checking %s \n", path) if stat, err := os.Stat(path); err == nil { if stat.Size() > 0 { file, err := os.Open(path) diff --git a/test/config.yaml b/test/config.yaml index 4405dba..9b45d65 100644 --- a/test/config.yaml +++ b/test/config.yaml @@ -4,8 +4,8 @@ - 200 - 192 outputs: - - output/pathway_commons.edges.edge.json.gz - - output/pathway_commons.nodes.node.json.gz + - output/pathway_commons.edges.json.gz + - output/pathway_commons.nodes.json.gz - playbook: examples/pathwaycommons/gene_collect.yaml LineCount: - 3 @@ -26,7 +26,7 @@ LineCount: - 10 outputs: - - output-tsv/gdc-projects.tranform.case-mondo.json.gz + - output-tsv/gdc-projects.transform.case-mondo.json.gz - playbook: examples/gdc/gdc-convert.yaml LineCount: - 0 #TODO: fix this test @@ -41,8 +41,8 @@ - 1138 - 873 outputs: - - output/sifter.edge.edge.json.gz - - output/sifter.vertex.vertex.json.gz + - output/pfb.edge.json + - output/pfb.vertex.json - playbook: examples/code-ref/Pipeline.yaml - playbook: examples/code-ref/flatMappipeline.yaml LineCount: diff --git a/test/examples/gdc/gdc-convert.yaml b/test/examples/gdc/gdc-convert.yaml index e3358f6..ddd9e68 100644 --- a/test/examples/gdc/gdc-convert.yaml +++ b/test/examples/gdc/gdc-convert.yaml @@ -2,14 +2,35 @@ name: gdc outdir: output/ -config: - cases: ../../resources/gdc-case.json.gz - schema: ../../resources/schemas +params: + cases: + type: file + default: ../../resources/gdc-case.json.gz + schema: + type: path + default: ../../resources/schemas inputs: caseData: - jsonLoad: - input: "{{config.cases}}" + json: + path: "{{params.cases}}" + +outputs: + caseFile: + json: + path: gdc.caseObject.case.json.gz + from: caseObject + caseGraph: + graph: + path: gdc.caseGraph + from: caseObject + schema: "{{params.schema}}" + title: Case + EdgeFix: + method: test + gpython: + $ref: test.py + pipelines: caseObject: @@ -21,18 +42,4 @@ pipelines: type: case - objectValidate: title: Case - schema: "{{config.schema}}" - - emit: - # Testing that this doesn't do anything - useName: False - name: case - - caseGraph: - - from: caseObject - - graphBuild: - schema: "{{config.schema}}" - title: Case - EdgeFix: - method: test - gpython: - $ref: test.py + schema: "{{params.schema}}" diff --git a/test/examples/gene-table/gene-table.yaml b/test/examples/gene-table/gene-table.yaml index 0df1f11..852fc2e 100644 --- a/test/examples/gene-table/gene-table.yaml +++ b/test/examples/gene-table/gene-table.yaml @@ -6,13 +6,15 @@ docs: > This takes a Gene TSV, filters rows, selects columns and outputs a 2 column TSV into the working directory -config: - geneTSV: ../../resources/gene2ensembl.gz +params: + geneTSV: + type: File + default: ../../resources/gene2ensembl.gz inputs: genes: - tableLoad: - input: "{{config.geneTSV}}" + table: + path: "{{params.geneTSV}}" columns: - tax_id - GeneID @@ -22,15 +24,19 @@ inputs: - protein_accession.version - Ensembl_protein_identifier +outputs: + geneTable: + table: + from: translate + path: "gene.table" + columns: + - GeneID + - Ensembl_gene_identifier + pipelines: translate: - from: genes - filter: field: tax_id match: "9606" - - tableWrite: - output: "gene.table" - columns: - - GeneID - - Ensembl_gene_identifier diff --git a/test/examples/lookup/inline-table.yaml b/test/examples/lookup/inline-table.yaml index 2bcae4f..feed4e7 100644 --- a/test/examples/lookup/inline-table.yaml +++ b/test/examples/lookup/inline-table.yaml @@ -1,15 +1,22 @@ outdir: output/ -config: - json: ../../resources/projects.json +params: + json: + type: File + default: ../../resources/projects.json inputs: jsonData: - jsonLoad: - input: "{{config.json}}" + json: + path: "{{params.json}}" +outputs: + translated: + json: + path: sifter.transform.test.json.gz + from: transform pipelines: transform: @@ -19,5 +26,3 @@ pipelines: table: TCGA-KIRC: 1 TCGA-SARC: 2 - - emit: - name: test diff --git a/test/examples/lookup/tsv-table-replace.yaml b/test/examples/lookup/tsv-table-replace.yaml index 1d38d73..85a1030 100644 --- a/test/examples/lookup/tsv-table-replace.yaml +++ b/test/examples/lookup/tsv-table-replace.yaml @@ -3,28 +3,36 @@ name: gdc-projects outdir: output-tsv/ -config: - cases: ../../resources/case.json - diseaseTSV: ../../resources/disease_table.tsv +params: + cases: + type: File + default: ../../resources/case.json + diseaseTSV: + type: File + default: ../../resources/disease_table.tsv inputs: caseReader: - jsonLoad: - input: "{{config.cases}}" + json: + path: "{{params.cases}}" + +outputs: + case-mondo: + json: + path: gdc-projects.transform.case-mondo.json.gz + from: transform pipelines: - tranform: + transform: - from: caseReader - fieldProcess: field: project - lookup: replace: disease_type tsv: - input: "{{config.diseaseTSV}}" + input: "{{params.diseaseTSV}}" header: - disease - mondo key: disease value: mondo - - emit: - name: case-mondo \ No newline at end of file diff --git a/test/examples/pathwaycommons/gene_collect.yaml b/test/examples/pathwaycommons/gene_collect.yaml index 85fb3ab..dc5db4b 100644 --- a/test/examples/pathwaycommons/gene_collect.yaml +++ b/test/examples/pathwaycommons/gene_collect.yaml @@ -1,16 +1,24 @@ outdir: output_gene -config: - sifFile: ../../resources/pathways.sif +params: + sifFile: + type: File + default: ../../resources/pathways.sif inputs: sifFile: - tableLoad: - input: "{{config.sifFile}}" + table: + path: "{{params.sifFile}}" sep: "\t" columns: [_from, _label, _to] +outputs: + sifout: + json: + path: sifout.json.gz + from: geneReduce + pipelines: geneReduce: - from: sifFile @@ -23,9 +31,3 @@ pipelines: y["_from"] = x["_from"] y["_to"].append(x["_to"]) return y - newpip: - - from: geneReduce - - emit: - useName: True - name: sifout - diff --git a/test/examples/pathwaycommons/pathway_commons.yaml b/test/examples/pathwaycommons/pathway_commons.yaml index d8d2c52..efa016f 100644 --- a/test/examples/pathwaycommons/pathway_commons.yaml +++ b/test/examples/pathwaycommons/pathway_commons.yaml @@ -2,16 +2,28 @@ name: pathway_commons outdir: output -config: - sifFile: ../../resources/pathways.sif +params: + sifFile: + type: File + default: ../../resources/pathways.sif inputs: sifFile: - tableLoad: - input: "{{config.sifFile}}" + table: + path: "{{params.sifFile}}" sep: "\t" columns: [_from, _label, _to] +outputs: + edgeFile: + json: + from: sifFile + path: pathway_commons.edges.json.gz + nodeFile: + json: + from: nodes + path: pathway_commons.nodes.json.gz + pipelines: nodes: - from: sifFile @@ -27,10 +39,3 @@ pipelines: - project: mapping: _label: "Protein" - - emit: - name: node - - edges: - - from: sifFile - - emit: - name: edge diff --git a/test/examples/pfb/transform.yaml b/test/examples/pfb/transform.yaml index 5847205..8eb35bf 100644 --- a/test/examples/pfb/transform.yaml +++ b/test/examples/pfb/transform.yaml @@ -1,13 +1,25 @@ -config: - file: ../../resources/1000G.pfb.avro +params: + file: + type: file + default: ../../resources/1000G.pfb.avro outdir: output/ inputs: pfb: - avroLoad: - input: "{{config.file}}" + avro: + path: "{{params.file}}" + +outputs: + vertexFile: + json: + path: pfb.vertex.json + from: vertex + edgeFile: + json: + path: pfb.edge.json + from: edge pipelines: transform: @@ -54,9 +66,6 @@ pipelines: o = x["object"][x["name"]] return { "gid" : x["name"] + ":" + x["id"], "label" : x["name"], "data" : o } - - emit: - name: vertex - edge: - from: transform - fieldProcess: @@ -72,5 +81,3 @@ pipelines: - to - from - label - - emit: - name: edge \ No newline at end of file diff --git a/test/resources/project.yaml b/test/resources/project.yaml index 74d68f5..477d0a7 100644 --- a/test/resources/project.yaml +++ b/test/resources/project.yaml @@ -2,14 +2,21 @@ class: Playbook -config: - genes: ./gene_with_protein_product.json +params: + genes: + type: File + default: ./gene_with_protein_product.json inputs: geneData: - jsonLoad: - input: "{{config.genes}}" + json: + path: "{{params.genes}}" +outputs: + new_ids: + json: + from: step1 + path: new_ids.ndjson pipelines: step1: @@ -17,5 +24,3 @@ pipelines: - project: mapping: _id: "{{row.ensembl_gene_id}}" - - emit: - name: new_ids diff --git a/test/resources/transpose_test.tsv b/test/resources/transpose_test.tsv new file mode 100644 index 0000000..521d356 --- /dev/null +++ b/test/resources/transpose_test.tsv @@ -0,0 +1,4 @@ +gene sample1 sample2 sample3 +EGFR 5.2 3.1 7.4 +KRAS 2.5 1.8 3.2 +TP53 6.1 4.5 5.8 diff --git a/test/unittests/playbook_examples_test.go b/test/unittests/playbook_examples_test.go deleted file mode 100644 index 605363d..0000000 --- a/test/unittests/playbook_examples_test.go +++ /dev/null @@ -1,82 +0,0 @@ -package test - -import ( - "fmt" - "os" - "testing" - - "github.com/bmeg/sifter/cmd/run" - "github.com/bmeg/sifter/loader" - - "path/filepath" - - "github.com/ghodss/yaml" -) - -type PlaybookExampleConfig struct { - Playbook string `json:"playbook"` - Inputs map[string]string `json:"inputs"` - Outputs []string `json:"outputs"` -} - -func runPlaybook(playbook string, inputs map[string]string, outdir string) error { - workDir := "./" - os.Mkdir(outdir, 0700) - driver := "dir://" + outdir - ld, err := loader.NewLoader(driver) - if err != nil { - return err - } - defer ld.Close() - - dir, _ := os.MkdirTemp(workDir, "sifterwork_") - defer os.RemoveAll(dir) - - err = run.ExecuteFile(playbook, dir, outdir, inputs) - if err != nil { - return err - } - os.RemoveAll(dir) - return nil -} - -func fileExists(filename string) bool { - info, err := os.Stat(filename) - if os.IsNotExist(err) { - return false - } - return !info.IsDir() -} - -func TestPlaybookExamples(t *testing.T) { - tests, err := filepath.Glob("test-playbook-*.yaml") - if err != nil { - t.Error(err) - } - for _, tPath := range tests { - raw, err := os.ReadFile(tPath) - if err != nil { - t.Error(fmt.Errorf("failed to read config %s", tPath)) - } - conf := PlaybookExampleConfig{} - if err := yaml.Unmarshal(raw, &conf); err != nil { - t.Error(fmt.Errorf("failed to read config %s", tPath)) - } - inputs := map[string]string{} - for k, v := range conf.Inputs { - inputs[k] = v - } - fmt.Printf("%s\n", conf) - outDir, _ := os.MkdirTemp("./", "testout_") - runPlaybook(conf.Playbook, inputs, outDir) - - for _, out := range conf.Outputs { - base := filepath.Base(out) - dst := filepath.Join(outDir, base) - if !fileExists(dst) { - t.Errorf("Output %s not produced", base) - } - } - os.RemoveAll(outDir) - } -} diff --git a/test/unittests/project_test.go b/test/unittests/project_test.go deleted file mode 100644 index 1c95be3..0000000 --- a/test/unittests/project_test.go +++ /dev/null @@ -1,34 +0,0 @@ -package test - -import ( - "io/ioutil" - "os" - "testing" - - "github.com/bmeg/sifter/cmd/run" - "github.com/bmeg/sifter/loader" -) - -func TestProject(t *testing.T) { - workDir := "./" - driver := "dir://." - ld, err := loader.NewLoader(driver) - if err != nil { - t.Error(err) - } - defer ld.Close() - - dir, err := ioutil.TempDir(workDir, "sifterwork_") - defer os.RemoveAll(dir) - - if err != nil { - t.Error(err) - } - inputs := map[string]string{} - - err = run.ExecuteFile("../resources/project.yaml", dir, "./", inputs) - if err != nil { - t.Error(err) - } - os.RemoveAll(dir) -} diff --git a/transform/emit.go b/transform/emit.go deleted file mode 100644 index 800f93f..0000000 --- a/transform/emit.go +++ /dev/null @@ -1,35 +0,0 @@ -package transform - -import ( - "github.com/bmeg/sifter/evaluate" - "github.com/bmeg/sifter/logger" - "github.com/bmeg/sifter/task" -) - -type EmitStep struct { - Name string `json:"name"` - UseName bool `json:"UseName"` -} - -type emitProcess struct { - config EmitStep - task task.RuntimeTask - count uint64 -} - -func (ts EmitStep) Init(t task.RuntimeTask) (Processor, error) { - return &emitProcess{ts, t, 0}, nil -} - -func (ts *emitProcess) Close() { - logger.Info("Emit Summary", "name", ts.config.Name, "count", ts.count) -} - -func (ts *emitProcess) Process(i map[string]interface{}) []map[string]interface{} { - name, err := evaluate.ExpressionString(ts.config.Name, ts.task.GetConfig(), i) - if err == nil { - ts.count++ - ts.task.Emit(name, i, ts.config.UseName) - } - return []map[string]any{i} -} diff --git a/transform/filter.go b/transform/filter.go index c2a21c5..5c89317 100644 --- a/transform/filter.go +++ b/transform/filter.go @@ -5,17 +5,18 @@ import ( "github.com/bmeg/sifter/evaluate" "github.com/bmeg/sifter/logger" + "github.com/bmeg/sifter/playbook/refs" "github.com/bmeg/sifter/task" ) type FilterStep struct { - Field string `json:"field"` - Value string `json:"value"` - Match string `json:"match"` - Check string `json:"check" jsonschema_description:"How to check value, 'exists' or 'hasValue'"` - Method string `json:"method"` - Python string `json:"python"` - GPython *CodeBlock `json:"gpython"` + Field string `json:"field"` + Value string `json:"value"` + Match string `json:"match"` + Check string `json:"check" jsonschema_description:"How to check value, 'exists' or 'hasValue'"` + Method string `json:"method"` + Python string `json:"python"` + GPython *refs.CodeBlock `json:"gpython"` } type filterProcessor struct { diff --git a/transform/flat_map.go b/transform/flat_map.go index 0419759..80440aa 100644 --- a/transform/flat_map.go +++ b/transform/flat_map.go @@ -7,13 +7,14 @@ import ( "github.com/bmeg/sifter/evaluate" "github.com/bmeg/sifter/logger" + "github.com/bmeg/sifter/playbook/refs" "github.com/bmeg/sifter/task" ) type FlatMapStep struct { - Method string `json:"method" jsonschema_description:"Name of function to call"` - Python string `json:"python" jsonschema_description:"Python code to be run"` - GPython *CodeBlock `json:"gpython" jsonschema_description:"Python code to be run using GPython"` + Method string `json:"method" jsonschema_description:"Name of function to call"` + Python string `json:"python" jsonschema_description:"Python code to be run"` + GPython *refs.CodeBlock `json:"gpython" jsonschema_description:"Python code to be run using GPython"` } type flatMapProcess struct { diff --git a/transform/interface.go b/transform/interface.go index 633da96..9f71779 100644 --- a/transform/interface.go +++ b/transform/interface.go @@ -61,7 +61,6 @@ type Step struct { FieldParse *FieldParseStep `json:"fieldParse" jsonschema_description:"fieldParse to run"` FieldType *FieldTypeStep `json:"fieldType" jsonschema_description:"Change type of a field (ie string -> integer)"` ObjectValidate *ObjectValidateStep `json:"objectValidate" jsonschema_description:"Validate a JSON schema based object"` - Emit *EmitStep `json:"emit" jsonschema_description:"Write to unstructured JSON file"` Filter *FilterStep `json:"filter"` Clean *CleanStep `json:"clean"` Debug *DebugStep `json:"debug" jsonschema_description:"Print message contents to stdout"` @@ -77,8 +76,6 @@ type Step struct { Lookup *LookupStep `json:"lookup"` IntervalIntersect *IntervalStep `json:"intervalIntersect"` Hash *HashStep `json:"hash"` - GraphBuild *GraphBuildStep `json:"graphBuild"` - TableWrite *TableWriteStep `json:"tableWrite"` Accumulate *AccumulateStep `json:"accumulate"` UUID *UUIDStep `json:"uuid"` } @@ -99,34 +96,17 @@ func (ts Step) Init(t task.RuntimeTask) (Processor, error) { return nil, fmt.Errorf(("Transform not defined")) } -func (ts Step) GetConfigFields() []config.Variable { - out := []config.Variable{} +func (ts Step) GetRequiredParams() []config.ParamRequest { + out := []config.ParamRequest{} v := reflect.ValueOf(ts) for i := 0; i < v.NumField(); i++ { f := v.Field(i) x := f.Interface() if z, ok := x.(config.Configurable); ok { if !f.IsNil() { - out = append(out, z.GetConfigFields()...) + out = append(out, z.GetRequiredParams()...) } } } return out } - -func (ts Step) GetEmitters() []string { - if ts.Emit != nil { - return []string{ts.Emit.Name} - } - if ts.GraphBuild != nil { - return []string{"vertex", "edge"} - } - return []string{} -} - -func (ts Step) GetOutputs() []string { - if ts.TableWrite != nil { - return []string{ts.TableWrite.Output} - } - return []string{} -} diff --git a/transform/lookup.go b/transform/lookup.go index 0d1a506..135b242 100644 --- a/transform/lookup.go +++ b/transform/lookup.go @@ -101,15 +101,15 @@ func (tr *LookupStep) Init(task task.RuntimeTask) (Processor, error) { return nil, fmt.Errorf("table input not defined") } -func (tr *LookupStep) GetConfigFields() []config.Variable { - out := []config.Variable{} +func (tr *LookupStep) GetRequiredParams() []config.ParamRequest { + out := []config.ParamRequest{} if tr.TSV != nil && tr.TSV.Input != "" { for _, s := range evaluate.ExpressionIDs(tr.TSV.Input) { - out = append(out, config.Variable{Type: config.File, Name: config.TrimPrefix(s)}) + out = append(out, config.ParamRequest{Type: "File", Name: config.TrimPrefix(s)}) } } else if tr.JSON != nil && tr.JSON.Input != "" { for _, s := range evaluate.ExpressionIDs(tr.JSON.Input) { - out = append(out, config.Variable{Type: config.File, Name: config.TrimPrefix(s)}) + out = append(out, config.ParamRequest{Type: "File", Name: config.TrimPrefix(s)}) } } return out diff --git a/transform/lookup_interval.go b/transform/lookup_interval.go index 8ebd661..5d4cb46 100644 --- a/transform/lookup_interval.go +++ b/transform/lookup_interval.go @@ -97,11 +97,11 @@ func toInt64(v any) int64 { return 0 } -func (tr *IntervalStep) GetConfigFields() []config.Variable { - out := []config.Variable{} +func (tr *IntervalStep) GetRequiredParams() []config.ParamRequest { + out := []config.ParamRequest{} if tr.JSON != nil && tr.JSON.Input != "" { for _, s := range evaluate.ExpressionIDs(tr.JSON.Input) { - out = append(out, config.Variable{Type: config.File, Name: config.TrimPrefix(s)}) + out = append(out, config.ParamRequest{Type: "File", Name: config.TrimPrefix(s)}) } } return out diff --git a/transform/mapping.go b/transform/mapping.go index 36f30c3..e95ab35 100644 --- a/transform/mapping.go +++ b/transform/mapping.go @@ -7,13 +7,14 @@ import ( "github.com/bmeg/sifter/evaluate" "github.com/bmeg/sifter/logger" + "github.com/bmeg/sifter/playbook/refs" "github.com/bmeg/sifter/task" ) type MapStep struct { - Method string `json:"method" jsonschema_description:"Name of function to call"` - Python string `json:"python" jsonschema_description:"Python code to be run"` - GPython *CodeBlock `json:"gpython" jsonschema_description:"Python code to be run using GPython"` + Method string `json:"method" jsonschema_description:"Name of function to call"` + Python string `json:"python" jsonschema_description:"Python code to be run"` + GPython *refs.CodeBlock `json:"gpython" jsonschema_description:"Python code to be run using GPython"` } type mapProcess struct { diff --git a/transform/object_validate.go b/transform/object_validate.go index 413c803..69f32c2 100644 --- a/transform/object_validate.go +++ b/transform/object_validate.go @@ -54,11 +54,11 @@ func (ts ObjectValidateStep) Init(task task.RuntimeTask) (Processor, error) { return nil, fmt.Errorf("class not configured") } -func (ts ObjectValidateStep) GetConfigFields() []config.Variable { - out := []config.Variable{} +func (ts ObjectValidateStep) GetRequiredParams() []config.ParamRequest { + out := []config.ParamRequest{} if ts.Schema != "" { for _, s := range evaluate.ExpressionIDs(ts.Schema) { - out = append(out, config.Variable{Type: config.Dir, Name: config.TrimPrefix(s)}) + out = append(out, config.ParamRequest{Type: "File", Name: config.TrimPrefix(s)}) } } return out diff --git a/transform/project.go b/transform/project.go index 02bf0c7..a836fb3 100644 --- a/transform/project.go +++ b/transform/project.go @@ -22,13 +22,13 @@ func (pr ProjectStep) Init(t task.RuntimeTask) (Processor, error) { return &projectStepProcess{pr, t}, nil } -func (pr ProjectStep) GetConfigFields() []config.Variable { - out := []config.Variable{} +func (pr ProjectStep) GetRequiredParams() []config.ParamRequest { + out := []config.ParamRequest{} for _, v := range pr.Mapping { t := scanIds(v) for i := range t { if strings.HasPrefix(t[i], "config.") { - out = append(out, config.Variable{Name: config.TrimPrefix(t[i])}) + out = append(out, config.ParamRequest{Name: config.TrimPrefix(t[i])}) } } } diff --git a/transform/reduce.go b/transform/reduce.go index 005804f..ed36c02 100644 --- a/transform/reduce.go +++ b/transform/reduce.go @@ -5,6 +5,7 @@ import ( "github.com/bmeg/sifter/evaluate" "github.com/bmeg/sifter/logger" + "github.com/bmeg/sifter/playbook/refs" "github.com/bmeg/sifter/task" ) @@ -12,7 +13,7 @@ type ReduceStep struct { Field string `json:"field"` Method string `json:"method"` Python string `json:"python"` - GPython *CodeBlock `json:"gpython"` + GPython *refs.CodeBlock `json:"gpython"` InitData *map[string]interface{} `json:"init"` } diff --git a/website/.hugo_build.lock b/website/.hugo_build.lock deleted file mode 100644 index e69de29..0000000 diff --git a/website/archetypes/default.md b/website/archetypes/default.md deleted file mode 100644 index 00e77bd..0000000 --- a/website/archetypes/default.md +++ /dev/null @@ -1,6 +0,0 @@ ---- -title: "{{ replace .Name "-" " " | title }}" -date: {{ .Date }} -draft: true ---- - diff --git a/website/config.yaml b/website/config.yaml deleted file mode 100644 index 98a395b..0000000 --- a/website/config.yaml +++ /dev/null @@ -1,4 +0,0 @@ -baseURL: https://bmeg.github.io/sifter -languageCode: en-us -title: Sifter -publishDir: ../docs/ \ No newline at end of file diff --git a/website/content/docs/inputs.md b/website/content/docs/inputs.md deleted file mode 100644 index aa8a28e..0000000 --- a/website/content/docs/inputs.md +++ /dev/null @@ -1,9 +0,0 @@ ---- -title: Inputs -menu: - main: - identifier: inputs - weight: 4 ---- - -Every playbook consists of a series of inputs. \ No newline at end of file diff --git a/website/content/docs/transforms.md b/website/content/docs/transforms.md deleted file mode 100644 index 2365e78..0000000 --- a/website/content/docs/transforms.md +++ /dev/null @@ -1,9 +0,0 @@ ---- -title: Pipeline Steps -menu: - main: - identifier: transforms - weight: 5 ---- - -Transforms alter the data \ No newline at end of file diff --git a/website/content/docs/transforms/emit.md b/website/content/docs/transforms/emit.md deleted file mode 100644 index 6b7d9ed..0000000 --- a/website/content/docs/transforms/emit.md +++ /dev/null @@ -1,24 +0,0 @@ ---- -title: emit -menu: - main: - parent: transforms - weight: 100 ---- - -# emit - -Send data to output file. The naming of the file is `outdir`/`script name`.`pipeline name`.`emit name`.json.gz - -## Parameters - -| name | Type | Description | -| --- | --- | --- | -| name | string | Name of emit value | - -## example - -```yaml - - emit: - name: protein_compound_association -``` \ No newline at end of file diff --git a/website/content/docs/transforms/fieldParse.md b/website/content/docs/transforms/fieldParse.md deleted file mode 100644 index 476e44c..0000000 --- a/website/content/docs/transforms/fieldParse.md +++ /dev/null @@ -1,8 +0,0 @@ ---- -title: fieldParse -menu: - main: - parent: transforms - weight: 100 ---- - diff --git a/website/content/docs/transforms/flatmap.md b/website/content/docs/transforms/flatmap.md deleted file mode 100644 index c880db2..0000000 --- a/website/content/docs/transforms/flatmap.md +++ /dev/null @@ -1,7 +0,0 @@ ---- -title: flatMap -menu: - main: - parent: transforms - weight: 100 ---- diff --git a/website/content/docs/transforms/from.md b/website/content/docs/transforms/from.md deleted file mode 100644 index c6f5a80..0000000 --- a/website/content/docs/transforms/from.md +++ /dev/null @@ -1,29 +0,0 @@ ---- -title: from -menu: - main: - parent: transforms - weight: 100 ---- - -# from - -## Parmeters - -Name of data source - -## Example - -```yaml - - -inputs: - profileReader: - tableLoad: - input: "{{config.profiles}}" - -pipelines: - profileProcess: - - from: profileReader - -``` \ No newline at end of file diff --git a/website/content/docs/transforms/uuid.md b/website/content/docs/transforms/uuid.md deleted file mode 100644 index 0127b15..0000000 --- a/website/content/docs/transforms/uuid.md +++ /dev/null @@ -1,7 +0,0 @@ ---- -title: uuid -menu: - main: - parent: transforms - weight: 100 ---- diff --git a/website/layouts/_default/single.html b/website/layouts/_default/single.html deleted file mode 100644 index ee52f69..0000000 --- a/website/layouts/_default/single.html +++ /dev/null @@ -1,40 +0,0 @@ -{{ partial "head.html" . }} - -
- - - -
- {{ .Content }} -
- -
- - diff --git a/website/layouts/index.html b/website/layouts/index.html deleted file mode 100644 index bc335ba..0000000 --- a/website/layouts/index.html +++ /dev/null @@ -1,18 +0,0 @@ - -{{ partial "head.html" . }} - - - -
-
-
- - -
-
- {{ .Content }} -
-
-
- -{{ partial "tail.html" . }} \ No newline at end of file diff --git a/website/layouts/partials/head.html b/website/layouts/partials/head.html deleted file mode 100644 index 5564aca..0000000 --- a/website/layouts/partials/head.html +++ /dev/null @@ -1,45 +0,0 @@ - - - - - - - - - - {{ if .IsHome }} - {{ .Site.Title }} - {{ else }} - {{ .Title }} · {{ .Site.Title }} - {{ end }} - - - - - - - - - - - - - - - - - - - - diff --git a/website/layouts/partials/tail.html b/website/layouts/partials/tail.html deleted file mode 100644 index 11a09ca..0000000 --- a/website/layouts/partials/tail.html +++ /dev/null @@ -1,3 +0,0 @@ - - - diff --git a/website/public/categories/index.xml b/website/public/categories/index.xml deleted file mode 100644 index 1de8250..0000000 --- a/website/public/categories/index.xml +++ /dev/null @@ -1,10 +0,0 @@ - - - - Categories on Sifter - http://example.org/categories/ - Recent content in Categories on Sifter - Hugo -- gohugo.io - en-us - - diff --git a/website/public/css/darcula.css b/website/public/css/darcula.css deleted file mode 100644 index be182d0..0000000 --- a/website/public/css/darcula.css +++ /dev/null @@ -1,77 +0,0 @@ -/* - -Darcula color scheme from the JetBrains family of IDEs - -*/ - - -.hljs { - display: block; - overflow-x: auto; - padding: 0.5em; - background: #2b2b2b; -} - -.hljs { - color: #bababa; -} - -.hljs-strong, -.hljs-emphasis { - color: #a8a8a2; -} - -.hljs-bullet, -.hljs-quote, -.hljs-link, -.hljs-number, -.hljs-regexp, -.hljs-literal { - color: #6896ba; -} - -.hljs-code, -.hljs-selector-class { - color: #a6e22e; -} - -.hljs-emphasis { - font-style: italic; -} - -.hljs-keyword, -.hljs-selector-tag, -.hljs-section, -.hljs-attribute, -.hljs-name, -.hljs-variable { - color: #cb7832; -} - -.hljs-params { - color: #b9b9b9; -} - -.hljs-string { - color: #6a8759; -} - -.hljs-subst, -.hljs-type, -.hljs-built_in, -.hljs-builtin-name, -.hljs-symbol, -.hljs-selector-id, -.hljs-selector-attr, -.hljs-selector-pseudo, -.hljs-template-tag, -.hljs-template-variable, -.hljs-addition { - color: #e0c46c; -} - -.hljs-comment, -.hljs-deletion, -.hljs-meta { - color: #7f7f7f; -} diff --git a/website/public/css/dark.css b/website/public/css/dark.css deleted file mode 100644 index b4724f5..0000000 --- a/website/public/css/dark.css +++ /dev/null @@ -1,63 +0,0 @@ -/* - -Dark style from softwaremaniacs.org (c) Ivan Sagalaev - -*/ - -.hljs { - display: block; - overflow-x: auto; - padding: 0.5em; - background: #444; -} - -.hljs-keyword, -.hljs-selector-tag, -.hljs-literal, -.hljs-section, -.hljs-link { - color: white; -} - -.hljs, -.hljs-subst { - color: #ddd; -} - -.hljs-string, -.hljs-title, -.hljs-name, -.hljs-type, -.hljs-attribute, -.hljs-symbol, -.hljs-bullet, -.hljs-built_in, -.hljs-addition, -.hljs-variable, -.hljs-template-tag, -.hljs-template-variable { - color: #d88; -} - -.hljs-comment, -.hljs-quote, -.hljs-deletion, -.hljs-meta { - color: #777; -} - -.hljs-keyword, -.hljs-selector-tag, -.hljs-literal, -.hljs-title, -.hljs-section, -.hljs-doctag, -.hljs-type, -.hljs-name, -.hljs-strong { - font-weight: bold; -} - -.hljs-emphasis { - font-style: italic; -} diff --git a/website/public/css/flexboxgrid.css b/website/public/css/flexboxgrid.css deleted file mode 100644 index 603506f..0000000 --- a/website/public/css/flexboxgrid.css +++ /dev/null @@ -1,960 +0,0 @@ -.container-fluid, -.container { - margin-right: auto; - margin-left: auto; -} - -.container-fluid { - padding-right: 2rem; - padding-left: 2rem; -} - -.row { - box-sizing: border-box; - display: -webkit-box; - display: -ms-flexbox; - display: flex; - -webkit-box-flex: 0; - -ms-flex: 0 1 auto; - flex: 0 1 auto; - -webkit-box-orient: horizontal; - -webkit-box-direction: normal; - -ms-flex-direction: row; - flex-direction: row; - -ms-flex-wrap: wrap; - flex-wrap: wrap; - margin-right: -0.5rem; - margin-left: -0.5rem; -} - -.row.reverse { - -webkit-box-orient: horizontal; - -webkit-box-direction: reverse; - -ms-flex-direction: row-reverse; - flex-direction: row-reverse; -} - -.col.reverse { - -webkit-box-orient: vertical; - -webkit-box-direction: reverse; - -ms-flex-direction: column-reverse; - flex-direction: column-reverse; -} - -.col-xs, -.col-xs-1, -.col-xs-2, -.col-xs-3, -.col-xs-4, -.col-xs-5, -.col-xs-6, -.col-xs-7, -.col-xs-8, -.col-xs-9, -.col-xs-10, -.col-xs-11, -.col-xs-12, -.col-xs-offset-0, -.col-xs-offset-1, -.col-xs-offset-2, -.col-xs-offset-3, -.col-xs-offset-4, -.col-xs-offset-5, -.col-xs-offset-6, -.col-xs-offset-7, -.col-xs-offset-8, -.col-xs-offset-9, -.col-xs-offset-10, -.col-xs-offset-11, -.col-xs-offset-12 { - box-sizing: border-box; - -webkit-box-flex: 0; - -ms-flex: 0 0 auto; - flex: 0 0 auto; - padding-right: 0.5rem; - padding-left: 0.5rem; -} - -.col-xs { - -webkit-box-flex: 1; - -ms-flex-positive: 1; - flex-grow: 1; - -ms-flex-preferred-size: 0; - flex-basis: 0; - max-width: 100%; -} - -.col-xs-1 { - -ms-flex-preferred-size: 8.33333333%; - flex-basis: 8.33333333%; - max-width: 8.33333333%; -} - -.col-xs-2 { - -ms-flex-preferred-size: 16.66666667%; - flex-basis: 16.66666667%; - max-width: 16.66666667%; -} - -.col-xs-3 { - -ms-flex-preferred-size: 25%; - flex-basis: 25%; - max-width: 25%; -} - -.col-xs-4 { - -ms-flex-preferred-size: 33.33333333%; - flex-basis: 33.33333333%; - max-width: 33.33333333%; -} - -.col-xs-5 { - -ms-flex-preferred-size: 41.66666667%; - flex-basis: 41.66666667%; - max-width: 41.66666667%; -} - -.col-xs-6 { - -ms-flex-preferred-size: 50%; - flex-basis: 50%; - max-width: 50%; -} - -.col-xs-7 { - -ms-flex-preferred-size: 58.33333333%; - flex-basis: 58.33333333%; - max-width: 58.33333333%; -} - -.col-xs-8 { - -ms-flex-preferred-size: 66.66666667%; - flex-basis: 66.66666667%; - max-width: 66.66666667%; -} - -.col-xs-9 { - -ms-flex-preferred-size: 75%; - flex-basis: 75%; - max-width: 75%; -} - -.col-xs-10 { - -ms-flex-preferred-size: 83.33333333%; - flex-basis: 83.33333333%; - max-width: 83.33333333%; -} - -.col-xs-11 { - -ms-flex-preferred-size: 91.66666667%; - flex-basis: 91.66666667%; - max-width: 91.66666667%; -} - -.col-xs-12 { - -ms-flex-preferred-size: 100%; - flex-basis: 100%; - max-width: 100%; -} - -.col-xs-offset-0 { - margin-left: 0; -} - -.col-xs-offset-1 { - margin-left: 8.33333333%; -} - -.col-xs-offset-2 { - margin-left: 16.66666667%; -} - -.col-xs-offset-3 { - margin-left: 25%; -} - -.col-xs-offset-4 { - margin-left: 33.33333333%; -} - -.col-xs-offset-5 { - margin-left: 41.66666667%; -} - -.col-xs-offset-6 { - margin-left: 50%; -} - -.col-xs-offset-7 { - margin-left: 58.33333333%; -} - -.col-xs-offset-8 { - margin-left: 66.66666667%; -} - -.col-xs-offset-9 { - margin-left: 75%; -} - -.col-xs-offset-10 { - margin-left: 83.33333333%; -} - -.col-xs-offset-11 { - margin-left: 91.66666667%; -} - -.start-xs { - -webkit-box-pack: start; - -ms-flex-pack: start; - justify-content: flex-start; - text-align: start; -} - -.center-xs { - -webkit-box-pack: center; - -ms-flex-pack: center; - justify-content: center; - text-align: center; -} - -.end-xs { - -webkit-box-pack: end; - -ms-flex-pack: end; - justify-content: flex-end; - text-align: end; -} - -.top-xs { - -webkit-box-align: start; - -ms-flex-align: start; - align-items: flex-start; -} - -.middle-xs { - -webkit-box-align: center; - -ms-flex-align: center; - align-items: center; -} - -.bottom-xs { - -webkit-box-align: end; - -ms-flex-align: end; - align-items: flex-end; -} - -.around-xs { - -ms-flex-pack: distribute; - justify-content: space-around; -} - -.between-xs { - -webkit-box-pack: justify; - -ms-flex-pack: justify; - justify-content: space-between; -} - -.first-xs { - -webkit-box-ordinal-group: 0; - -ms-flex-order: -1; - order: -1; -} - -.last-xs { - -webkit-box-ordinal-group: 2; - -ms-flex-order: 1; - order: 1; -} - -@media only screen and (min-width: 48em) { - .container { - width: 49rem; - } - - .col-sm, - .col-sm-1, - .col-sm-2, - .col-sm-3, - .col-sm-4, - .col-sm-5, - .col-sm-6, - .col-sm-7, - .col-sm-8, - .col-sm-9, - .col-sm-10, - .col-sm-11, - .col-sm-12, - .col-sm-offset-0, - .col-sm-offset-1, - .col-sm-offset-2, - .col-sm-offset-3, - .col-sm-offset-4, - .col-sm-offset-5, - .col-sm-offset-6, - .col-sm-offset-7, - .col-sm-offset-8, - .col-sm-offset-9, - .col-sm-offset-10, - .col-sm-offset-11, - .col-sm-offset-12 { - box-sizing: border-box; - -webkit-box-flex: 0; - -ms-flex: 0 0 auto; - flex: 0 0 auto; - padding-right: 0.5rem; - padding-left: 0.5rem; - } - - .col-sm { - -webkit-box-flex: 1; - -ms-flex-positive: 1; - flex-grow: 1; - -ms-flex-preferred-size: 0; - flex-basis: 0; - max-width: 100%; - } - - .col-sm-1 { - -ms-flex-preferred-size: 8.33333333%; - flex-basis: 8.33333333%; - max-width: 8.33333333%; - } - - .col-sm-2 { - -ms-flex-preferred-size: 16.66666667%; - flex-basis: 16.66666667%; - max-width: 16.66666667%; - } - - .col-sm-3 { - -ms-flex-preferred-size: 25%; - flex-basis: 25%; - max-width: 25%; - } - - .col-sm-4 { - -ms-flex-preferred-size: 33.33333333%; - flex-basis: 33.33333333%; - max-width: 33.33333333%; - } - - .col-sm-5 { - -ms-flex-preferred-size: 41.66666667%; - flex-basis: 41.66666667%; - max-width: 41.66666667%; - } - - .col-sm-6 { - -ms-flex-preferred-size: 50%; - flex-basis: 50%; - max-width: 50%; - } - - .col-sm-7 { - -ms-flex-preferred-size: 58.33333333%; - flex-basis: 58.33333333%; - max-width: 58.33333333%; - } - - .col-sm-8 { - -ms-flex-preferred-size: 66.66666667%; - flex-basis: 66.66666667%; - max-width: 66.66666667%; - } - - .col-sm-9 { - -ms-flex-preferred-size: 75%; - flex-basis: 75%; - max-width: 75%; - } - - .col-sm-10 { - -ms-flex-preferred-size: 83.33333333%; - flex-basis: 83.33333333%; - max-width: 83.33333333%; - } - - .col-sm-11 { - -ms-flex-preferred-size: 91.66666667%; - flex-basis: 91.66666667%; - max-width: 91.66666667%; - } - - .col-sm-12 { - -ms-flex-preferred-size: 100%; - flex-basis: 100%; - max-width: 100%; - } - - .col-sm-offset-0 { - margin-left: 0; - } - - .col-sm-offset-1 { - margin-left: 8.33333333%; - } - - .col-sm-offset-2 { - margin-left: 16.66666667%; - } - - .col-sm-offset-3 { - margin-left: 25%; - } - - .col-sm-offset-4 { - margin-left: 33.33333333%; - } - - .col-sm-offset-5 { - margin-left: 41.66666667%; - } - - .col-sm-offset-6 { - margin-left: 50%; - } - - .col-sm-offset-7 { - margin-left: 58.33333333%; - } - - .col-sm-offset-8 { - margin-left: 66.66666667%; - } - - .col-sm-offset-9 { - margin-left: 75%; - } - - .col-sm-offset-10 { - margin-left: 83.33333333%; - } - - .col-sm-offset-11 { - margin-left: 91.66666667%; - } - - .start-sm { - -webkit-box-pack: start; - -ms-flex-pack: start; - justify-content: flex-start; - text-align: start; - } - - .center-sm { - -webkit-box-pack: center; - -ms-flex-pack: center; - justify-content: center; - text-align: center; - } - - .end-sm { - -webkit-box-pack: end; - -ms-flex-pack: end; - justify-content: flex-end; - text-align: end; - } - - .top-sm { - -webkit-box-align: start; - -ms-flex-align: start; - align-items: flex-start; - } - - .middle-sm { - -webkit-box-align: center; - -ms-flex-align: center; - align-items: center; - } - - .bottom-sm { - -webkit-box-align: end; - -ms-flex-align: end; - align-items: flex-end; - } - - .around-sm { - -ms-flex-pack: distribute; - justify-content: space-around; - } - - .between-sm { - -webkit-box-pack: justify; - -ms-flex-pack: justify; - justify-content: space-between; - } - - .first-sm { - -webkit-box-ordinal-group: 0; - -ms-flex-order: -1; - order: -1; - } - - .last-sm { - -webkit-box-ordinal-group: 2; - -ms-flex-order: 1; - order: 1; - } -} - -@media only screen and (min-width: 64em) { - .container { - width: 65rem; - } - - .col-md, - .col-md-1, - .col-md-2, - .col-md-3, - .col-md-4, - .col-md-5, - .col-md-6, - .col-md-7, - .col-md-8, - .col-md-9, - .col-md-10, - .col-md-11, - .col-md-12, - .col-md-offset-0, - .col-md-offset-1, - .col-md-offset-2, - .col-md-offset-3, - .col-md-offset-4, - .col-md-offset-5, - .col-md-offset-6, - .col-md-offset-7, - .col-md-offset-8, - .col-md-offset-9, - .col-md-offset-10, - .col-md-offset-11, - .col-md-offset-12 { - box-sizing: border-box; - -webkit-box-flex: 0; - -ms-flex: 0 0 auto; - flex: 0 0 auto; - padding-right: 0.5rem; - padding-left: 0.5rem; - } - - .col-md { - -webkit-box-flex: 1; - -ms-flex-positive: 1; - flex-grow: 1; - -ms-flex-preferred-size: 0; - flex-basis: 0; - max-width: 100%; - } - - .col-md-1 { - -ms-flex-preferred-size: 8.33333333%; - flex-basis: 8.33333333%; - max-width: 8.33333333%; - } - - .col-md-2 { - -ms-flex-preferred-size: 16.66666667%; - flex-basis: 16.66666667%; - max-width: 16.66666667%; - } - - .col-md-3 { - -ms-flex-preferred-size: 25%; - flex-basis: 25%; - max-width: 25%; - } - - .col-md-4 { - -ms-flex-preferred-size: 33.33333333%; - flex-basis: 33.33333333%; - max-width: 33.33333333%; - } - - .col-md-5 { - -ms-flex-preferred-size: 41.66666667%; - flex-basis: 41.66666667%; - max-width: 41.66666667%; - } - - .col-md-6 { - -ms-flex-preferred-size: 50%; - flex-basis: 50%; - max-width: 50%; - } - - .col-md-7 { - -ms-flex-preferred-size: 58.33333333%; - flex-basis: 58.33333333%; - max-width: 58.33333333%; - } - - .col-md-8 { - -ms-flex-preferred-size: 66.66666667%; - flex-basis: 66.66666667%; - max-width: 66.66666667%; - } - - .col-md-9 { - -ms-flex-preferred-size: 75%; - flex-basis: 75%; - max-width: 75%; - } - - .col-md-10 { - -ms-flex-preferred-size: 83.33333333%; - flex-basis: 83.33333333%; - max-width: 83.33333333%; - } - - .col-md-11 { - -ms-flex-preferred-size: 91.66666667%; - flex-basis: 91.66666667%; - max-width: 91.66666667%; - } - - .col-md-12 { - -ms-flex-preferred-size: 100%; - flex-basis: 100%; - max-width: 100%; - } - - .col-md-offset-0 { - margin-left: 0; - } - - .col-md-offset-1 { - margin-left: 8.33333333%; - } - - .col-md-offset-2 { - margin-left: 16.66666667%; - } - - .col-md-offset-3 { - margin-left: 25%; - } - - .col-md-offset-4 { - margin-left: 33.33333333%; - } - - .col-md-offset-5 { - margin-left: 41.66666667%; - } - - .col-md-offset-6 { - margin-left: 50%; - } - - .col-md-offset-7 { - margin-left: 58.33333333%; - } - - .col-md-offset-8 { - margin-left: 66.66666667%; - } - - .col-md-offset-9 { - margin-left: 75%; - } - - .col-md-offset-10 { - margin-left: 83.33333333%; - } - - .col-md-offset-11 { - margin-left: 91.66666667%; - } - - .start-md { - -webkit-box-pack: start; - -ms-flex-pack: start; - justify-content: flex-start; - text-align: start; - } - - .center-md { - -webkit-box-pack: center; - -ms-flex-pack: center; - justify-content: center; - text-align: center; - } - - .end-md { - -webkit-box-pack: end; - -ms-flex-pack: end; - justify-content: flex-end; - text-align: end; - } - - .top-md { - -webkit-box-align: start; - -ms-flex-align: start; - align-items: flex-start; - } - - .middle-md { - -webkit-box-align: center; - -ms-flex-align: center; - align-items: center; - } - - .bottom-md { - -webkit-box-align: end; - -ms-flex-align: end; - align-items: flex-end; - } - - .around-md { - -ms-flex-pack: distribute; - justify-content: space-around; - } - - .between-md { - -webkit-box-pack: justify; - -ms-flex-pack: justify; - justify-content: space-between; - } - - .first-md { - -webkit-box-ordinal-group: 0; - -ms-flex-order: -1; - order: -1; - } - - .last-md { - -webkit-box-ordinal-group: 2; - -ms-flex-order: 1; - order: 1; - } -} - -@media only screen and (min-width: 75em) { - .container { - width: 76rem; - } - - .col-lg, - .col-lg-1, - .col-lg-2, - .col-lg-3, - .col-lg-4, - .col-lg-5, - .col-lg-6, - .col-lg-7, - .col-lg-8, - .col-lg-9, - .col-lg-10, - .col-lg-11, - .col-lg-12, - .col-lg-offset-0, - .col-lg-offset-1, - .col-lg-offset-2, - .col-lg-offset-3, - .col-lg-offset-4, - .col-lg-offset-5, - .col-lg-offset-6, - .col-lg-offset-7, - .col-lg-offset-8, - .col-lg-offset-9, - .col-lg-offset-10, - .col-lg-offset-11, - .col-lg-offset-12 { - box-sizing: border-box; - -webkit-box-flex: 0; - -ms-flex: 0 0 auto; - flex: 0 0 auto; - padding-right: 0.5rem; - padding-left: 0.5rem; - } - - .col-lg { - -webkit-box-flex: 1; - -ms-flex-positive: 1; - flex-grow: 1; - -ms-flex-preferred-size: 0; - flex-basis: 0; - max-width: 100%; - } - - .col-lg-1 { - -ms-flex-preferred-size: 8.33333333%; - flex-basis: 8.33333333%; - max-width: 8.33333333%; - } - - .col-lg-2 { - -ms-flex-preferred-size: 16.66666667%; - flex-basis: 16.66666667%; - max-width: 16.66666667%; - } - - .col-lg-3 { - -ms-flex-preferred-size: 25%; - flex-basis: 25%; - max-width: 25%; - } - - .col-lg-4 { - -ms-flex-preferred-size: 33.33333333%; - flex-basis: 33.33333333%; - max-width: 33.33333333%; - } - - .col-lg-5 { - -ms-flex-preferred-size: 41.66666667%; - flex-basis: 41.66666667%; - max-width: 41.66666667%; - } - - .col-lg-6 { - -ms-flex-preferred-size: 50%; - flex-basis: 50%; - max-width: 50%; - } - - .col-lg-7 { - -ms-flex-preferred-size: 58.33333333%; - flex-basis: 58.33333333%; - max-width: 58.33333333%; - } - - .col-lg-8 { - -ms-flex-preferred-size: 66.66666667%; - flex-basis: 66.66666667%; - max-width: 66.66666667%; - } - - .col-lg-9 { - -ms-flex-preferred-size: 75%; - flex-basis: 75%; - max-width: 75%; - } - - .col-lg-10 { - -ms-flex-preferred-size: 83.33333333%; - flex-basis: 83.33333333%; - max-width: 83.33333333%; - } - - .col-lg-11 { - -ms-flex-preferred-size: 91.66666667%; - flex-basis: 91.66666667%; - max-width: 91.66666667%; - } - - .col-lg-12 { - -ms-flex-preferred-size: 100%; - flex-basis: 100%; - max-width: 100%; - } - - .col-lg-offset-0 { - margin-left: 0; - } - - .col-lg-offset-1 { - margin-left: 8.33333333%; - } - - .col-lg-offset-2 { - margin-left: 16.66666667%; - } - - .col-lg-offset-3 { - margin-left: 25%; - } - - .col-lg-offset-4 { - margin-left: 33.33333333%; - } - - .col-lg-offset-5 { - margin-left: 41.66666667%; - } - - .col-lg-offset-6 { - margin-left: 50%; - } - - .col-lg-offset-7 { - margin-left: 58.33333333%; - } - - .col-lg-offset-8 { - margin-left: 66.66666667%; - } - - .col-lg-offset-9 { - margin-left: 75%; - } - - .col-lg-offset-10 { - margin-left: 83.33333333%; - } - - .col-lg-offset-11 { - margin-left: 91.66666667%; - } - - .start-lg { - -webkit-box-pack: start; - -ms-flex-pack: start; - justify-content: flex-start; - text-align: start; - } - - .center-lg { - -webkit-box-pack: center; - -ms-flex-pack: center; - justify-content: center; - text-align: center; - } - - .end-lg { - -webkit-box-pack: end; - -ms-flex-pack: end; - justify-content: flex-end; - text-align: end; - } - - .top-lg { - -webkit-box-align: start; - -ms-flex-align: start; - align-items: flex-start; - } - - .middle-lg { - -webkit-box-align: center; - -ms-flex-align: center; - align-items: center; - } - - .bottom-lg { - -webkit-box-align: end; - -ms-flex-align: end; - align-items: flex-end; - } - - .around-lg { - -ms-flex-pack: distribute; - justify-content: space-around; - } - - .between-lg { - -webkit-box-pack: justify; - -ms-flex-pack: justify; - justify-content: space-between; - } - - .first-lg { - -webkit-box-ordinal-group: 0; - -ms-flex-order: -1; - order: -1; - } - - .last-lg { - -webkit-box-ordinal-group: 2; - -ms-flex-order: 1; - order: 1; - } -} diff --git a/website/public/css/funnel.css b/website/public/css/funnel.css deleted file mode 100644 index 825d6af..0000000 --- a/website/public/css/funnel.css +++ /dev/null @@ -1,245 +0,0 @@ -.global-header { - background-color: #23241f; - padding: .3rem .5rem; -} - -.global-header-container { - display: flex; - align-items: center; -} - -.global-header-container h1, -.global-header-container h2 { - margin: 0; - padding: 0; - color: white; -} -.global-header-container h1 { - font-size: 1.2rem; -} -.global-header-container h2 { - font-size: .9rem; -} - -.global-header-container, -.homepage { - max-width: 50rem; - margin: 0 auto; -} - -.homepage-intro .col { - padding: 0 1rem; -} -.homepage-intro li { - font-size: 0.8rem; -} - -.global-header-nav { - list-style-type: none; - padding: 0; - margin: 0; - margin-left: 3rem; - flex-grow: 1; - display: flex; - align-items: center; - justify-content: center; -} - -@media only screen and (max-width: 600px) { - .global-header-container, - .global-header-nav { - flex-direction: column; - } - .global-header-home, - .global-header-ohsucb { - margin: .5rem 0; - } - .global-header-nav { - margin: 0; - } - .global-header-nav li { - margin: .5rem 0; - } - - .homepage-demo .col h1, - .homepage-demo .col p { - margin-left: .3rem; - } - - .content { - padding: 0 .7rem; - } - - .sidebar { - padding: 1rem; - padding-bottom: 0; - } - - .sidebar-nav li { - margin: .3rem 0; - } -} - -.sidebar-nav { - font-size: .9rem; -} -.sidebar-nav span.intermediate { - color: #23241f; -} - -.global-header-nav li { - display: inline-block; - padding: 0 0.5rem; - font-size: .9rem; -} - -.global-header-nav li a { - color: white; -} - -.global-header a:hover, -.global-header a:hover h1, -.global-header a:hover h2, -.global-header-nav li a:hover { - color: #9ed9ff; - text-decoration: none; -} - -.lead { - font-size: .8rem; -} - -.lead a { - color: #b8d4e0; -} - -.homepage h2 { - text-align: center; - font-size: 1.5rem; - margin-bottom: 1rem; -} - -.homepage-lead { - background-color: #f1f1f1; - padding: 2rem 2rem 1rem 2rem; - border-radius: 10px; - margin-bottom: 1rem; - text-align: center; -} - -.homepage-lead-container { - max-width: 42rem; - margin: 0 auto; -} - -.homepage-lead h1 { - margin: 0; -} - -.homepage-footer { - height: 100px; -} - -.homepage-notice { - background-color: #fffcbf; - padding: 1rem 3rem; - border-radius: 10px; - margin-top: 0; - margin-bottom: 1rem; - text-align: center; -} - -.homepage-notice h4 { - font-size: 1rem; -} - -.homepage-notice h3, -.homepage-notice p { - margin: 0; -} - -.homepage-lead .download-button, -.homepage-lead .docs-button { - padding: 10px 30px; - border-radius: 5px; - border: 0; - color: white; - font-size: .7rem; - display: inline-block; - margin: 0.1rem 0.2rem; -} -.docs-button { - border-radius: 5px; - border: 0; - color: white; - font-size: .7rem; - background-color: #4ca0ea; - padding: 10px 30px; -} - -.homepage-lead .download-button { - background-color: #29b429; -} - -.homepage .row { - width: 100%; - margin-bottom: 20px; -} - -.homepage-demo { - margin-top: 3rem; - margin-bottom: 3rem; -} - -.homepage-demo h1.demo-header { - font-size: 1.5rem; - text-align: center; - margin-bottom: 2rem; -} - -.homepage-demo h1 { - font-size: 1rem; - margin: 0; -} - -.homepage-demo p { - margin: .7rem 0; - padding-right: .7rem; - font-size: .8rem; -} - -.homepage h3 { - font-size: 1rem; -} - -.homepage-more { - text-align: center; -} - -.homepage p { - font-size: .8rem; -} - -pre { - padding: 0; -} - -.homepage-demo .section { - margin-bottom: 2rem; -} - -.homepage-demo pre { - margin: 0; -} -.homepage-demo code { - width: 100%; - display: block; - font-size: .8rem; - border-radius: 0; -} - -.optional { - font-size: 1rem; - color: #aaa; - font-style: normal; -} diff --git a/website/public/css/highlight.min.css b/website/public/css/highlight.min.css deleted file mode 100644 index 7d8be18..0000000 --- a/website/public/css/highlight.min.css +++ /dev/null @@ -1 +0,0 @@ -.hljs{display:block;overflow-x:auto;padding:0.5em;background:#F0F0F0}.hljs,.hljs-subst{color:#444}.hljs-comment{color:#888888}.hljs-keyword,.hljs-attribute,.hljs-selector-tag,.hljs-meta-keyword,.hljs-doctag,.hljs-name{font-weight:bold}.hljs-type,.hljs-string,.hljs-number,.hljs-selector-id,.hljs-selector-class,.hljs-quote,.hljs-template-tag,.hljs-deletion{color:#880000}.hljs-title,.hljs-section{color:#880000;font-weight:bold}.hljs-regexp,.hljs-symbol,.hljs-variable,.hljs-template-variable,.hljs-link,.hljs-selector-attr,.hljs-selector-pseudo{color:#BC6060}.hljs-literal{color:#78A960}.hljs-built_in,.hljs-bullet,.hljs-code,.hljs-addition{color:#397300}.hljs-meta{color:#1f7199}.hljs-meta-string{color:#4d99bf}.hljs-emphasis{font-style:italic}.hljs-strong{font-weight:bold} \ No newline at end of file diff --git a/website/public/css/html5reset.css b/website/public/css/html5reset.css deleted file mode 100755 index 3bfbb3d..0000000 --- a/website/public/css/html5reset.css +++ /dev/null @@ -1,96 +0,0 @@ -/* html5reset.css - 01/11/2011 */ - -html, body, div, span, object, iframe, -h1, h2, h3, h4, h5, h6, p, blockquote, pre, -abbr, address, cite, code, -del, dfn, em, img, ins, kbd, q, samp, -small, strong, sub, sup, var, -b, i, -dl, dt, dd, ol, ul, li, -fieldset, form, label, legend, -table, caption, tbody, tfoot, thead, tr, th, td, -article, aside, canvas, details, figcaption, figure, -footer, header, hgroup, menu, nav, section, summary, -time, mark, audio, video { - margin: 0; - padding: 0; - border: 0; - outline: 0; - font-size: 100%; - vertical-align: baseline; - background: transparent; -} - -body { - line-height: 1; -} - -article,aside,details,figcaption,figure, -footer,header,hgroup,menu,nav,section { - display: block; -} - -nav ul { - list-style: none; -} - -blockquote, q { - quotes: none; -} - -blockquote:before, blockquote:after, -q:before, q:after { - content: ''; - content: none; -} - -a { - margin: 0; - padding: 0; - font-size: 100%; - vertical-align: baseline; - background: transparent; -} - -/* change colours to suit your needs */ -ins { - background-color: #ff9; - color: #000; - text-decoration: none; -} - -/* change colours to suit your needs */ -mark { - background-color: #ff9; - color: #000; - font-style: italic; - font-weight: bold; -} - -del { - text-decoration: line-through; -} - -abbr[title], dfn[title] { - border-bottom: 1px dotted; - cursor: help; -} - -table { - border-collapse: collapse; - border-spacing: 0; -} - -/* change border colour to suit your needs */ -hr { - display: block; - height: 1px; - border: 0; - border-top: 1px solid #cccccc; - margin: 1em 0; - padding: 0; -} - -input, select { - vertical-align: middle; -} \ No newline at end of file diff --git a/website/public/css/hybrid.css b/website/public/css/hybrid.css deleted file mode 100644 index 29735a1..0000000 --- a/website/public/css/hybrid.css +++ /dev/null @@ -1,102 +0,0 @@ -/* - -vim-hybrid theme by w0ng (https://github.com/w0ng/vim-hybrid) - -*/ - -/*background color*/ -.hljs { - display: block; - overflow-x: auto; - padding: 0.5em; - background: #1d1f21; -} - -/*selection color*/ -.hljs::selection, -.hljs span::selection { - background: #373b41; -} - -.hljs::-moz-selection, -.hljs span::-moz-selection { - background: #373b41; -} - -/*foreground color*/ -.hljs { - color: #c5c8c6; -} - -/*color: fg_yellow*/ -.hljs-title, -.hljs-name { - color: #f0c674; -} - -/*color: fg_comment*/ -.hljs-comment, -.hljs-meta, -.hljs-meta .hljs-keyword { - color: #707880; -} - -/*color: fg_red*/ -.hljs-number, -.hljs-symbol, -.hljs-literal, -.hljs-deletion, -.hljs-link { - color: #cc6666 -} - -/*color: fg_green*/ -.hljs-string, -.hljs-doctag, -.hljs-addition, -.hljs-regexp, -.hljs-selector-attr, -.hljs-selector-pseudo { - color: #b5bd68; -} - -/*color: fg_purple*/ -.hljs-attribute, -.hljs-code, -.hljs-selector-id { - color: #b294bb; -} - -/*color: fg_blue*/ -.hljs-keyword, -.hljs-selector-tag, -.hljs-bullet, -.hljs-tag { - color: #81a2be; -} - -/*color: fg_aqua*/ -.hljs-subst, -.hljs-variable, -.hljs-template-tag, -.hljs-template-variable { - color: #8abeb7; -} - -/*color: fg_orange*/ -.hljs-type, -.hljs-built_in, -.hljs-builtin-name, -.hljs-quote, -.hljs-section, -.hljs-selector-class { - color: #de935f; -} - -.hljs-emphasis { - font-style: italic; -} - -.hljs-strong { - font-weight: bold; -} diff --git a/website/public/css/monokai-sublime.css b/website/public/css/monokai-sublime.css deleted file mode 100644 index 2864170..0000000 --- a/website/public/css/monokai-sublime.css +++ /dev/null @@ -1,83 +0,0 @@ -/* - -Monokai Sublime style. Derived from Monokai by noformnocontent http://nn.mit-license.org/ - -*/ - -.hljs { - display: block; - overflow-x: auto; - padding: 0.5em; - background: #23241f; -} - -.hljs, -.hljs-tag, -.hljs-subst { - color: #f8f8f2; -} - -.hljs-strong, -.hljs-emphasis { - color: #a8a8a2; -} - -.hljs-bullet, -.hljs-quote, -.hljs-number, -.hljs-regexp, -.hljs-literal, -.hljs-link { - color: #ae81ff; -} - -.hljs-code, -.hljs-title, -.hljs-section, -.hljs-selector-class { - color: #a6e22e; -} - -.hljs-strong { - font-weight: bold; -} - -.hljs-emphasis { - font-style: italic; -} - -.hljs-keyword, -.hljs-selector-tag, -.hljs-name, -.hljs-attr { - color: #f92672; -} - -.hljs-symbol, -.hljs-attribute { - color: #66d9ef; -} - -.hljs-params, -.hljs-class .hljs-title { - color: #f8f8f2; -} - -.hljs-string, -.hljs-type, -.hljs-built_in, -.hljs-builtin-name, -.hljs-selector-id, -.hljs-selector-attr, -.hljs-selector-pseudo, -.hljs-addition, -.hljs-variable, -.hljs-template-variable { - color: #e6db74; -} - -.hljs-comment, -.hljs-deletion, -.hljs-meta { - color: #75715e; -} diff --git a/website/public/css/poole.css b/website/public/css/poole.css deleted file mode 100644 index 03f9338..0000000 --- a/website/public/css/poole.css +++ /dev/null @@ -1,283 +0,0 @@ -/* - * ___ - * /\_ \ - * _____ ___ ___\//\ \ __ - * /\ '__`\ / __`\ / __`\\ \ \ /'__`\ - * \ \ \_\ \/\ \_\ \/\ \_\ \\_\ \_/\ __/ - * \ \ ,__/\ \____/\ \____//\____\ \____\ - * \ \ \/ \/___/ \/___/ \/____/\/____/ - * \ \_\ - * \/_/ - * - * Designed, built, and released under MIT license by @mdo. Learn more at - * https://github.com/poole/poole. - */ - - -/* - * Contents - * - * Body resets - * Custom type - * Messages - * Container - * Masthead - * Posts and pages - * Pagination - * Reverse layout - * Themes - */ - - -/* - * Body resets - * - * Update the foundational and global aspects of the page. - */ - -* { - -webkit-box-sizing: border-box; - -moz-box-sizing: border-box; - box-sizing: border-box; -} - -html, -body { - margin: 0; - padding: 0; -} - -html { - font-family: "Helvetica Neue", Helvetica, Arial, sans-serif; - font-size: 16px; - line-height: 1.5; -} - -body { - color: #515151; - background-color: #fff; - -webkit-text-size-adjust: 100%; - -ms-text-size-adjust: 100%; -} - -/* No `:visited` state is required by default (browsers will use `a`) */ -a { - color: #268bd2; - text-decoration: none; -} -/* `:focus` is linked to `:hover` for basic accessibility */ -a:hover, -a:focus { - text-decoration: underline; -} - -/* Headings */ -h1, h2, h3, h4, h5, h6 { - margin-bottom: .5rem; - font-weight: bold; - line-height: 1.25; - color: #313131; - text-rendering: optimizeLegibility; -} -h1 { - font-size: 2rem; -} -h2 { - margin-top: 1rem; - font-size: 1.5rem; -} -h3 { - margin-top: 1.5rem; - font-size: 1.25rem; -} -h4, h5, h6 { - margin-top: 1rem; - font-size: 1rem; -} - -/* Body text */ -p { - margin-top: 0; - margin-bottom: 1rem; -} - -strong { - color: #303030; -} - - -/* Lists */ -ul, ol, dl { - margin-top: 0; - margin-bottom: 1rem; -} - -dt { - font-weight: bold; -} -dd { - margin-bottom: .5rem; -} - -/* Misc */ -hr { - position: relative; - margin: 1.5rem 0; - border: 0; - border-top: 1px solid #eee; - border-bottom: 1px solid #fff; -} - -abbr { - font-size: 85%; - font-weight: bold; - color: #555; - text-transform: uppercase; -} -abbr[title] { - cursor: help; - border-bottom: 1px dotted #e5e5e5; -} - -/* Code */ -code, -pre { - font-family: Menlo, Monaco, "Courier New", monospace; -} -code { - padding: .25em .5em; - font-size: 85%; - color: #bf616a; - background-color: #f9f9f9; - border-radius: 3px; -} -pre { - display: block; - margin-top: 0; - margin-bottom: 1rem; - padding: 1rem; - font-size: .8rem; - line-height: 1.4; - white-space: pre; - white-space: pre-wrap; - word-break: break-all; - word-wrap: break-word; - background-color: #f9f9f9; -} -pre code { - padding: 0; - font-size: 100%; - color: inherit; - background-color: transparent; -} -.highlight { - margin-bottom: 1rem; - border-radius: 4px; -} -.highlight pre { - margin-bottom: 0; -} - -/* Quotes */ -blockquote { - padding: .5rem 1rem; - margin: .8rem 0; - color: #7a7a7a; - border-left: .25rem solid #e5e5e5; -} -blockquote p:last-child { - margin-bottom: 0; -} -@media (min-width: 30em) { - blockquote { - padding-right: 5rem; - padding-left: 1.25rem; - } -} - -img { - display: block; - margin: 0 0 1rem; - border-radius: 5px; - max-width: 100%; -} - -/* Tables */ -table { - margin-bottom: 1rem; - width: 100%; - border: 1px solid #e5e5e5; - border-collapse: collapse; -} -td, -th { - padding: .25rem .5rem; - border: 1px solid #e5e5e5; -} -tbody tr:nth-child(odd) td, -tbody tr:nth-child(odd) th { - background-color: #f9f9f9; -} - - -/* - * Custom type - * - * Extend paragraphs with `.lead` for larger introductory text. - */ - -.lead { - font-size: 1.25rem; - font-weight: 300; -} - - -/* - * Messages - * - * Show alert messages to users. You may add it to single elements like a `

`, - * or to a parent if there are multiple elements to show. - */ - -.message { - margin-bottom: 1rem; - padding: 1rem; - color: #717171; - background-color: #f9f9f9; -} - - -/* - * Masthead - * - * Super small header above the content for site name and short description. - */ - -.masthead { - padding-top: 1rem; - padding-bottom: 1rem; - margin-bottom: 3rem; -} -.masthead-title { - margin-top: 0; - margin-bottom: 0; - color: #505050; -} -.masthead-title a { - color: #505050; -} -.masthead-title small { - font-size: 75%; - font-weight: 400; - color: #c0c0c0; - letter-spacing: 0; -} - - -/* Meta data line below post title */ -.post-date { - display: block; - margin-top: -.5rem; - margin-bottom: 1rem; - color: #9a9a9a; -} diff --git a/website/public/css/syntax.css b/website/public/css/syntax.css deleted file mode 100644 index 1264b87..0000000 --- a/website/public/css/syntax.css +++ /dev/null @@ -1,66 +0,0 @@ -.hll { background-color: #ffffcc } - /*{ background: #f0f3f3; }*/ -.c { color: #999; } /* Comment */ -.err { color: #AA0000; background-color: #FFAAAA } /* Error */ -.k { color: #006699; } /* Keyword */ -.o { color: #555555 } /* Operator */ -.cm { color: #0099FF; font-style: italic } /* Comment.Multiline */ -.cp { color: #009999 } /* Comment.Preproc */ -.c1 { color: #999; } /* Comment.Single */ -.cs { color: #999; } /* Comment.Special */ -.gd { background-color: #FFCCCC; border: 1px solid #CC0000 } /* Generic.Deleted */ -.ge { font-style: italic } /* Generic.Emph */ -.gr { color: #FF0000 } /* Generic.Error */ -.gh { color: #003300; } /* Generic.Heading */ -.gi { background-color: #CCFFCC; border: 1px solid #00CC00 } /* Generic.Inserted */ -.go { color: #AAAAAA } /* Generic.Output */ -.gp { color: #000099; } /* Generic.Prompt */ -.gs { } /* Generic.Strong */ -.gu { color: #003300; } /* Generic.Subheading */ -.gt { color: #99CC66 } /* Generic.Traceback */ -.kc { color: #006699; } /* Keyword.Constant */ -.kd { color: #006699; } /* Keyword.Declaration */ -.kn { color: #006699; } /* Keyword.Namespace */ -.kp { color: #006699 } /* Keyword.Pseudo */ -.kr { color: #006699; } /* Keyword.Reserved */ -.kt { color: #007788; } /* Keyword.Type */ -.m { color: #FF6600 } /* Literal.Number */ -.s { color: #d44950 } /* Literal.String */ -.na { color: #4f9fcf } /* Name.Attribute */ -.nb { color: #336666 } /* Name.Builtin */ -.nc { color: #00AA88; } /* Name.Class */ -.no { color: #336600 } /* Name.Constant */ -.nd { color: #9999FF } /* Name.Decorator */ -.ni { color: #999999; } /* Name.Entity */ -.ne { color: #CC0000; } /* Name.Exception */ -.nf { color: #CC00FF } /* Name.Function */ -.nl { color: #9999FF } /* Name.Label */ -.nn { color: #00CCFF; } /* Name.Namespace */ -.nt { color: #2f6f9f; } /* Name.Tag */ -.nv { color: #003333 } /* Name.Variable */ -.ow { color: #000000; } /* Operator.Word */ -.w { color: #bbbbbb } /* Text.Whitespace */ -.mf { color: #FF6600 } /* Literal.Number.Float */ -.mh { color: #FF6600 } /* Literal.Number.Hex */ -.mi { color: #FF6600 } /* Literal.Number.Integer */ -.mo { color: #FF6600 } /* Literal.Number.Oct */ -.sb { color: #CC3300 } /* Literal.String.Backtick */ -.sc { color: #CC3300 } /* Literal.String.Char */ -.sd { color: #CC3300; font-style: italic } /* Literal.String.Doc */ -.s2 { color: #CC3300 } /* Literal.String.Double */ -.se { color: #CC3300; } /* Literal.String.Escape */ -.sh { color: #CC3300 } /* Literal.String.Heredoc */ -.si { color: #AA0000 } /* Literal.String.Interpol */ -.sx { color: #CC3300 } /* Literal.String.Other */ -.sr { color: #33AAAA } /* Literal.String.Regex */ -.s1 { color: #CC3300 } /* Literal.String.Single */ -.ss { color: #FFCC33 } /* Literal.String.Symbol */ -.bp { color: #336666 } /* Name.Builtin.Pseudo */ -.vc { color: #003333 } /* Name.Variable.Class */ -.vg { color: #003333 } /* Name.Variable.Global */ -.vi { color: #003333 } /* Name.Variable.Instance */ -.il { color: #FF6600 } /* Literal.Number.Integer.Long */ - -.css .o, -.css .o + .nt, -.css .nt + .nt { color: #999; } diff --git a/website/public/css/theme.css b/website/public/css/theme.css deleted file mode 100644 index af44672..0000000 --- a/website/public/css/theme.css +++ /dev/null @@ -1,223 +0,0 @@ -/* - * Originally based on the Hyde theme, but heavily modified - # and who knows what original code remains. - * - * Designed, built, and released under MIT license by @mdo. Learn more at - * https://github.com/poole/hyde. - */ - - -/* - * Global resets - * - * Update the foundational and global aspects of the page. - */ - -html { - font-family: "PT Sans", Helvetica, Arial, sans-serif; - font-size: 20px; -} - -@media (max-width: 48em) { - .main { - font-size: 16px; - } -} - -/* SECTIONS ============================================================================= */ - -.section { - clear: both; - padding: 0px; - margin: 0px; -} - -/* GROUPING ============================================================================= */ - - -.group:before, -.group:after { - content:""; - display:table; -} -.group:after { - clear:both; -} -.group { - zoom:1; /* For IE 6/7 (trigger hasLayout) */ -} - -/* GRID COLUMN SETUP ==================================================================== */ - -.col { - display: block; - float:left; - margin: 1% 0 1% 1.6%; -} - -.col:first-child { margin-left: 0; } /* all browsers except IE6 and lower */ - - -/* REMOVE MARGINS AS ALL GO FULL WIDTH AT 600 PIXELS */ - -@media only screen and (max-width: 600px) { - .col { - margin: 1% 0 1% 0%; - } -} - -/* GRID OF THREE ============================================================================= */ - - -.span_3_of_3 { - width: 100%; -} - -.span_2_of_3 { - width: 66.13%; -} - -.span_1_of_3 { - width: 32.26%; -} - - -/* GO FULL WIDTH AT LESS THAN 600 PIXELS */ - -@media only screen and (max-width: 600px) { - .span_3_of_3 { - width: 100%; - } - .span_2_of_3 { - width: 100%; - } - .span_1_of_3 { - width: 100%; - } -} - -/* GRID OF TWELVE ============================================================================= */ - -.span_12_of_12 { - width: 100%; -} - -.span_11_of_12 { - width: 91.53%; -} - -.span_10_of_12 { - width: 83.06%; -} - -.span_9_of_12 { - width: 74.6%; -} - -.span_8_of_12 { - width: 66.13%; -} - -.span_7_of_12 { - width: 57.66%; -} - -.span_6_of_12 { - width: 49.2%; -} - -.span_5_of_12 { - width: 40.73%; -} - -.span_4_of_12 { - width: 32.26%; -} - -.span_3_of_12 { - width: 23.8%; -} - -.span_2_of_12 { - width: 15.33%; -} - -.span_1_of_12 { - width: 6.86%; -} - - -/* GO FULL WIDTH AT LESS THAN 600 PIXELS */ - -@media only screen and (max-width: 600px) { - .span_12_of_12 { - width: 100%; - } - .span_11_of_12 { - width: 100%; - } - .span_10_of_12 { - width: 100%; - } - .span_9_of_12 { - width: 100%; - } - .span_8_of_12 { - width: 100%; - } - .span_7_of_12 { - width: 100%; - } - .span_6_of_12 { - width: 100%; - } - .span_5_of_12 { - width: 100%; - } - .span_4_of_12 { - width: 100%; - } - .span_3_of_12 { - width: 100%; - } - .span_2_of_12 { - width: 100%; - } - .span_1_of_12 { - width: 100%; - } -} - - -/* - * Sidebar - * - * Flexible banner for housing site name, intro, and "footer" content. Starts - * out above content in mobile and later moves to the side with wider viewports. - */ - -.sidebar { - padding: 2rem; - padding-right: 0; - color: rgba(255,255,255,.5); - font-size: 1rem; -} - -.sidebar-nav { - padding-left: 0; - list-style: none; -} -.sidebar-nav-item { - display: block; -} -a.sidebar-nav-item:hover, -a.sidebar-nav-item:focus { - text-decoration: underline; -} -.sidebar-nav-item.active { - font-weight: bold; -} -.sidebar-nav-nested { - padding-left: 1rem; - margin-bottom: 0; -} diff --git a/website/public/docs/example/index.html b/website/public/docs/example/index.html deleted file mode 100644 index f05c281..0000000 --- a/website/public/docs/example/index.html +++ /dev/null @@ -1,469 +0,0 @@ - - - - - - - - - - - Example · Sifter - - - - - - - - - - - - - - - - - - - - -

- - -
- - - -
-

Example Pipeline

-

Our first task will be to convert a ZIP code TSV into a set of county level -entries.

-

The input file looks like:

-
ZIP,COUNTYNAME,STATE,STCOUNTYFP,CLASSFP
-36003,Autauga County,AL,01001,H1
-36006,Autauga County,AL,01001,H1
-36067,Autauga County,AL,01001,H1
-36066,Autauga County,AL,01001,H1
-36703,Autauga County,AL,01001,H1
-36701,Autauga County,AL,01001,H1
-36091,Autauga County,AL,01001,H1
-

First is the header of the pipeline. This declares the -unique name of the pipeline and it’s output directory.

-
name: zipcode_map
-outdir: ./
-docs: Converts zipcode TSV into graph elements
-

Next the configuration is declared. In this case the only input is the zipcode TSV. -There is a default value, so the pipeline can be invoked without passing in -any parameters. However, to apply this pipeline to a new input file, the -input parameter zipcode could be used to define the source file.

-
config:
-  schema: ../covid19_datadictionary/gdcdictionary/schemas/
-  zipcode: ../data/ZIP-COUNTY-FIPS_2017-06.csv
-

The inputs section declares data input sources. In this pipeline, there is -only one input, which is to run the table loader.

-
inputs:
-  tableLoad:
-    input: "{{config.zipcode}}"
-    sep: ","
-

Tableload operaters of the input file that was originally passed in using the -inputs stanza. SIFTER string parsing is based on mustache template system. -To access the string passed in the template is {{config.zipcode}}. -The seperator in the file input file is a , so that is also passed in as a -parameter to the extractor.

-

The tableLoad extractor opens up the TSV and generates a one message for -every row in the file. It uses the header of the file to map the column values -into a dictionary. The first row would produce the message:

-
{
-    "ZIP" : "36003",
-    "COUNTYNAME" : "Autauga County",
-    "STATE" : "AL",
-    "STCOUNTYFP" : "01001",
-    "CLASSFP" : "H1"
-}
-

The stream of messages are then passed into the steps listed in the transform -section of the tableLoad extractor.

-

For the current tranform, we want to produce a single entry per STCOUNTYFP, -however, the file has a line per ZIP. We need to run a reduce transform, -that collects rows togeather using a field key, which in this case is "{{row.STCOUNTYFP}}", -and then runs a function merge that takes two messages, merges them togeather -and produces a single output message.

-

The two messages:

-
{ "ZIP" : "36003", "COUNTYNAME" : "Autauga County", "STATE" : "AL", "STCOUNTYFP" : "01001", "CLASSFP" : "H1"}
-{ "ZIP" : "36006", "COUNTYNAME" : "Autauga County", "STATE" : "AL", "STCOUNTYFP" : "01001", "CLASSFP" : "H1"}
-

Would be merged into the message:

-
{ "ZIP" : ["36003", "36006"], "COUNTYNAME" : "Autauga County", "STATE" : "AL", "STCOUNTYFP" : "01001", "CLASSFP" : "H1"}
-

The reduce transform step uses a block of python code to describe the function. -The method field names the function, in this case merge that will be used -as the reduce function.

-
  zipReduce:
-    - from: zipcode
-    - reduce:
-        field: STCOUNTYFP
-        method: merge
-        python: >
-          def merge(x,y):
-            a = x.get('zipcodes', []) + [x['ZIP']]
-            b = y.get('zipcodes', []) + [y['ZIP']]
-            x['zipcodes'] = a + b
-            return x
-

The original messages produced by the loader have all of the information required -by the summary_location object type as described by the JSON schema that was linked -to in the header stanza. However, the data is all under the wrong field names. -To remap the data, we use a project tranformation that uses the template engine -to project data into new files in the message. The template engine has the current -message data in the value row. So the value -FIPS:{{row.STCOUNTYFP}} is mapped into the field id.

-
  - project:
-      mapping:
-        id: "FIPS:{{row.STCOUNTYFP}}"
-        province_state: "{{row.STATE}}"
-        summary_locations: "{{row.STCOUNTYFP}}"
-        county: "{{row.COUNTYNAME}}"
-        submitter_id: "{{row.STCOUNTYFP}}"
-        type: summary_location
-        projects: []
-

Using this projection, the message:

-
{
-  "ZIP" : ["36003", "36006"],
-  "COUNTYNAME" : "Autauga County",
-  "STATE" : "AL",
-  "STCOUNTYFP" : "01001",
-  "CLASSFP" : "H1"
-}
-

would become

-
{
-  "id" : "FIPS:01001",
-  "province_state" : "AL",
-  "summary_locations" : "01001",
-  "county" : "Autauga County",
-  "submitter_id" : "01001",
-  "type" : "summary_location"
-  "projects" : [],
-  "ZIP" : ["36003", "36006"],
-  "COUNTYNAME" : "Autauga County",
-  "STATE" : "AL",
-  "STCOUNTYFP" : "01001",
-  "CLASSFP" : "H1"
-}
-

Now that the data has been remapped, we pass the data into the ‘objectCreate’ -transformation, which will read in the schema for summary_location, check the -message to make sure it matches and then output it.

-
  - objectCreate:
-        class: summary_location
-

Outputs

-

To create an output table, with two columns connecting -ZIP values to STCOUNTYFP values. The STCOUNTYFP is a county level FIPS -code, used by the census office. A single FIPS code my contain many ZIP codes, -and we can use this table later for mapping ids when loading the data into a database.

-
outputs:
-  zip2fips:
-    tableWrite:
-      from: 
-      output: zip2fips
-      columns:
-        - ZIP
-        - STCOUNTYFP
-
-
- -
- - diff --git a/website/public/docs/index.html b/website/public/docs/index.html deleted file mode 100644 index 47922a2..0000000 --- a/website/public/docs/index.html +++ /dev/null @@ -1,365 +0,0 @@ - - - - - - - - - - - Overview · Sifter - - - - - - - - - - - - - - - - - - - - - - - -
- - - -
-

Sifter pipelines

-

Sifter pipelines process steams of nested JSON messages. Sifter comes with a number of -file extractors that operate as inputs to these pipelines. The pipeline engine -connects togeather arrays of transform steps into direct acylic graph that is processed -in parallel.

-

Example Message:

-
{
-  "firstName" : "bob",
-  "age" : "25"
-  "friends" : [ "Max", "Alex"]
-}
-

Once a stream of messages are produced, that can be run through a transform -pipeline. A transform pipeline is an array of transform steps, each transform -step can represent a different way to alter the data. The array of transforms link -togeather into a pipe that makes multiple alterations to messages as they are -passed along. There are a number of different transform steps types that can -be done in a transform pipeline these include:

-
    -
  • Projection: creating new fields using a templating engine driven by existing values
  • -
  • Filtering: removing messages
  • -
  • Programmatic transformation: alter messages using an embedded python interpreter
  • -
  • Table based field translation
  • -
  • Outputing the message as a JSON Schema checked object
  • -
- -
- -
- - diff --git a/website/public/docs/index.xml b/website/public/docs/index.xml deleted file mode 100644 index 464694e..0000000 --- a/website/public/docs/index.xml +++ /dev/null @@ -1,304 +0,0 @@ - - - - Docs on Sifter - http://example.org/docs/ - Recent content in Docs on Sifter - Hugo -- gohugo.io - en-us - - accumulate - http://example.org/docs/transforms/accumulate/ - Mon, 01 Jan 0001 00:00:00 +0000 - - http://example.org/docs/transforms/accumulate/ - - - - - avroLoad - http://example.org/docs/inputs/avroload/ - Mon, 01 Jan 0001 00:00:00 +0000 - - http://example.org/docs/inputs/avroload/ - avroLoad Load an AvroFile -Parameters name Description input Path to input file - - - - clean - http://example.org/docs/transforms/clean/ - Mon, 01 Jan 0001 00:00:00 +0000 - - http://example.org/docs/transforms/clean/ - - - - - debug - http://example.org/docs/transforms/debug/ - Mon, 01 Jan 0001 00:00:00 +0000 - - http://example.org/docs/transforms/debug/ - - - - - distinct - http://example.org/docs/transforms/distinct/ - Mon, 01 Jan 0001 00:00:00 +0000 - - http://example.org/docs/transforms/distinct/ - - - - - embedded - http://example.org/docs/inputs/embedded/ - Mon, 01 Jan 0001 00:00:00 +0000 - - http://example.org/docs/inputs/embedded/ - embedded Load data from embedded structure -Example inputs: data: embedded: - { &#34;name&#34; : &#34;Alice&#34;, &#34;age&#34;: 28 } - { &#34;name&#34; : &#34;Bob&#34;, &#34;age&#34;: 27 } - - - - emit - http://example.org/docs/transforms/emit/ - Mon, 01 Jan 0001 00:00:00 +0000 - - http://example.org/docs/transforms/emit/ - - - - - Example - http://example.org/docs/example/ - Mon, 01 Jan 0001 00:00:00 +0000 - - http://example.org/docs/example/ - Example Pipeline Our first task will be to convert a ZIP code TSV into a set of county level entries. -The input file looks like: -ZIP,COUNTYNAME,STATE,STCOUNTYFP,CLASSFP 36003,Autauga County,AL,01001,H1 36006,Autauga County,AL,01001,H1 36067,Autauga County,AL,01001,H1 36066,Autauga County,AL,01001,H1 36703,Autauga County,AL,01001,H1 36701,Autauga County,AL,01001,H1 36091,Autauga County,AL,01001,H1 First is the header of the pipeline. This declares the unique name of the pipeline and it&rsquo;s output directory. -name: zipcode_map outdir: ./ docs: Converts zipcode TSV into graph elements Next the configuration is declared. - - - - fieldParse - http://example.org/docs/transforms/fieldparse/ - Mon, 01 Jan 0001 00:00:00 +0000 - - http://example.org/docs/transforms/fieldparse/ - - - - - fieldProcess - http://example.org/docs/transforms/fieldprocess/ - Mon, 01 Jan 0001 00:00:00 +0000 - - http://example.org/docs/transforms/fieldprocess/ - - - - - fieldType - http://example.org/docs/transforms/fieldtype/ - Mon, 01 Jan 0001 00:00:00 +0000 - - http://example.org/docs/transforms/fieldtype/ - - - - - filter - http://example.org/docs/transforms/filter/ - Mon, 01 Jan 0001 00:00:00 +0000 - - http://example.org/docs/transforms/filter/ - - - - - from - http://example.org/docs/transforms/from/ - Mon, 01 Jan 0001 00:00:00 +0000 - - http://example.org/docs/transforms/from/ - from Parmeters Name of data source -Example inputs: profileReader: tableLoad: input: &#34;{{config.profiles}}&#34; pipelines: profileProcess: - from: profileReader - - - - glob - http://example.org/docs/inputs/glob/ - Mon, 01 Jan 0001 00:00:00 +0000 - - http://example.org/docs/inputs/glob/ - glob Scan files using * based glob statement and open all files as input. -Parameters Name Description storeFilename Store value of filename in parameter each row input Path of avro object file to transform xmlLoad xmlLoad configutation tableLoad Run transform pipeline on a TSV or CSV jsonLoad Run a transform pipeline on a multi line json file avroLoad Load data from avro file Example inputs: pubmedRead: glob: input: &#34;{{config.baseline}}/*.xml.gz&#34; xmlLoad: {} - - - - graphBuild - http://example.org/docs/transforms/graphbuild/ - Mon, 01 Jan 0001 00:00:00 +0000 - - http://example.org/docs/transforms/graphbuild/ - - - - - gripperLoad - http://example.org/docs/inputs/gripperload/ - Mon, 01 Jan 0001 00:00:00 +0000 - - http://example.org/docs/inputs/gripperload/ - - - - - hash - http://example.org/docs/transforms/hash/ - Mon, 01 Jan 0001 00:00:00 +0000 - - http://example.org/docs/transforms/hash/ - - - - - Inputs - http://example.org/docs/inputs/ - Mon, 01 Jan 0001 00:00:00 +0000 - - http://example.org/docs/inputs/ - Every playbook consists of a series of inputs. - - - - jsonLoad - http://example.org/docs/inputs/jsonload/ - Mon, 01 Jan 0001 00:00:00 +0000 - - http://example.org/docs/inputs/jsonload/ - jsonLoad Load data from a JSON file. Default behavior expects a single dictionary per line. Each line is a seperate entry. The multiline parameter reads all of the lines of the files and returns a single object. -Parameters name Description input Path of JSON file to transform multiline Load file as a single multiline JSON object Example inputs: caseData: jsonLoad: input: &#34;{{config.casesJSON}}&#34; - - - - lookup - http://example.org/docs/transforms/lookup/ - Mon, 01 Jan 0001 00:00:00 +0000 - - http://example.org/docs/transforms/lookup/ - lookup Using key from current row, get values from a reference source -Parameters name Type Description replace string (field path) Field to replace lookup string (template string) Key to use for looking up data copy map[string]string Given lookup of structure, copy values (key) to row (value) tsv TSVTable TSV translation table file json JSONTable JSON data file table LookupTable Inline lookup table Example - lookup: json: input: &#34;{{config.doseResponseFile}}&#34; key: experiment_id lookup: &#34;{{row. - - - - map - http://example.org/docs/transforms/map/ - Mon, 01 Jan 0001 00:00:00 +0000 - - http://example.org/docs/transforms/map/ - map Run function on every row -Parameters name Description method Name of function to call python Python code to be run gpython Python code to be run using GPython Example - map: method: response gpython: | def response(x): s = sorted(x[&#34;curve&#34;].items(), key=lambda x:float(x[0])) x[&#39;dose_um&#39;] = [] x[&#39;response&#39;] = [] for d, r in s: try: dn = float(d) rn = float(r) x[&#39;dose_um&#39;].append(dn) x[&#39;response&#39;].append(rn) except ValueError: pass return x - - - - objectCreate - http://example.org/docs/transforms/objectcreate/ - Mon, 01 Jan 0001 00:00:00 +0000 - - http://example.org/docs/transforms/objectcreate/ - - - - - Pipeline Steps - http://example.org/docs/transforms/ - Mon, 01 Jan 0001 00:00:00 +0000 - - http://example.org/docs/transforms/ - Transforms alter the data - - - - project - http://example.org/docs/transforms/project/ - Mon, 01 Jan 0001 00:00:00 +0000 - - http://example.org/docs/transforms/project/ - - - - - reduce - http://example.org/docs/transforms/reduce/ - Mon, 01 Jan 0001 00:00:00 +0000 - - http://example.org/docs/transforms/reduce/ - reduce Using key from rows, reduce matched records into a single entry -Parameters name Type Description field string (field path) Field used to match rows method string Method name python string Python code string gpython string Python code string run using (https://github.com/go-python/gpython) init map[string]any Data to use for first reduce Example - reduce: field: dataset_name method: merge init: { &#34;compounds&#34; : [] } gpython: | def merge(x,y): x[&#34;compounds&#34;] = list(set(y[&#34;compounds&#34;]+x[&#34;compounds&#34;])) return x - - - - regexReplace - http://example.org/docs/transforms/regexreplace/ - Mon, 01 Jan 0001 00:00:00 +0000 - - http://example.org/docs/transforms/regexreplace/ - - - - - Sifter Pipeline File - http://example.org/docs/playbook/ - Mon, 01 Jan 0001 00:00:00 +0000 - - http://example.org/docs/playbook/ - Pipeline File An sifter pipeline file is in YAML format and describes an entire processing pipelines. If is composed of the following sections: config, inputs, pipelines, outputs. In addition, for tracking, the file will also include name and class entries. -class: sifter name: &lt;script name&gt; outdir: &lt;where output files should go, relative to this file&gt; config: &lt;config key&gt;: &lt;config value&gt; &lt;config key&gt;: &lt;config value&gt; # values that are referenced in pipeline parameters for # files will be treated like file paths and be # translated to full paths inputs: &lt;input name&gt;: &lt;input driver&gt;: &lt;driver config&gt; pipelines: &lt;pipeline name&gt;: # all pipelines must start with a from step - from: &lt;name of input or pipeline&gt; - &lt;transform name&gt;: &lt;transform parameters&gt; outputs: &lt;output name&gt;: &lt;output driver&gt;: &lt;driver config&gt; - - - - sqldump - http://example.org/docs/inputs/sqldump/ - Mon, 01 Jan 0001 00:00:00 +0000 - - http://example.org/docs/inputs/sqldump/ - sqlDump Scan file produced produced from sqldump. -Parameters Name Type Description input string Path to the SQL dump file tables []string Names of tables to read out Example inputs: database: sqldumpLoad: input: &#34;{{config.sql}}&#34; tables: - cells - cell_tissues - dose_responses - drugs - drug_annots - experiments - profiles - - - - sqliteLoad - http://example.org/docs/inputs/sqliteload/ - Mon, 01 Jan 0001 00:00:00 +0000 - - http://example.org/docs/inputs/sqliteload/ - sqliteLoad Extract data from an sqlite file -Parameters Name Type Description input string Path to the SQLite file query string SQL select statement based input Example inputs: sqlQuery: sqliteLoad: input: &#34;{{config.sqlite}}&#34; query: &#34;select * from drug_mechanism as a LEFT JOIN MECHANISM_REFS as b on a.MEC_ID=b.MEC_ID LEFT JOIN TARGET_COMPONENTS as c on a.TID=c.TID LEFT JOIN COMPONENT_SEQUENCES as d on c.COMPONENT_ID=d.COMPONENT_ID LEFT JOIN MOLECULE_DICTIONARY as e on a.MOLREGNO=e.MOLREGNO&#34; - - - - tableLoad - http://example.org/docs/inputs/tableload/ - Mon, 01 Jan 0001 00:00:00 +0000 - - http://example.org/docs/inputs/tableload/ - - - - - xmlLoad - http://example.org/docs/inputs/xmlload/ - Mon, 01 Jan 0001 00:00:00 +0000 - - http://example.org/docs/inputs/xmlload/ - xmlLoad Load an XML file -Parameters name Description input Path to input file Example inputs: loader: xmlLoad: input: &#34;{{config.xmlPath}}&#34; - - - - diff --git a/website/public/docs/inputs/avroload/index.html b/website/public/docs/inputs/avroload/index.html deleted file mode 100644 index f7faca2..0000000 --- a/website/public/docs/inputs/avroload/index.html +++ /dev/null @@ -1,358 +0,0 @@ - - - - - - - - - - - avroLoad · Sifter - - - - - - - - - - - - - - - - - - - - - - - -
- - - -
-

avroLoad

-

Load an AvroFile

-

Parameters

- - - - - - - - - - - - - -
nameDescription
inputPath to input file
- -
- -
- - diff --git a/website/public/docs/inputs/embedded/index.html b/website/public/docs/inputs/embedded/index.html deleted file mode 100644 index 8d2c1ad..0000000 --- a/website/public/docs/inputs/embedded/index.html +++ /dev/null @@ -1,349 +0,0 @@ - - - - - - - - - - - embedded · Sifter - - - - - - - - - - - - - - - - - - - - - - - -
- - - -
-

embedded

-

Load data from embedded structure

-

Example

-
inputs:
-  data:
-    embedded:
-      - { "name" : "Alice", "age": 28 }
-      - { "name" : "Bob", "age": 27 }
-
-
- -
- - diff --git a/website/public/docs/inputs/glob/index.html b/website/public/docs/inputs/glob/index.html deleted file mode 100644 index fe2e3e3..0000000 --- a/website/public/docs/inputs/glob/index.html +++ /dev/null @@ -1,385 +0,0 @@ - - - - - - - - - - - glob · Sifter - - - - - - - - - - - - - - - - - - - - - - - -
- - - -
-

glob

-

Scan files using * based glob statement and open all files -as input.

-

Parameters

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
NameDescription
storeFilenameStore value of filename in parameter each row
inputPath of avro object file to transform
xmlLoadxmlLoad configutation
tableLoadRun transform pipeline on a TSV or CSV
jsonLoadRun a transform pipeline on a multi line json file
avroLoadLoad data from avro file
-

Example

-
inputs:
-  pubmedRead:
-    glob:
-      input: "{{config.baseline}}/*.xml.gz"
-      xmlLoad: {}
-
-
- -
- - diff --git a/website/public/docs/inputs/gripperload/index.html b/website/public/docs/inputs/gripperload/index.html deleted file mode 100644 index b6bc4ae..0000000 --- a/website/public/docs/inputs/gripperload/index.html +++ /dev/null @@ -1,341 +0,0 @@ - - - - - - - - - - - gripperLoad · Sifter - - - - - - - - - - - - - - - - - - - - - - - -
- - - -
- -
- -
- - diff --git a/website/public/docs/inputs/index.html b/website/public/docs/inputs/index.html deleted file mode 100644 index 2a4a0e6..0000000 --- a/website/public/docs/inputs/index.html +++ /dev/null @@ -1,342 +0,0 @@ - - - - - - - - - - - Inputs · Sifter - - - - - - - - - - - - - - - - - - - - - - - -
- - - -
-

Every playbook consists of a series of inputs.

- -
- -
- - diff --git a/website/public/docs/inputs/jsonload/index.html b/website/public/docs/inputs/jsonload/index.html deleted file mode 100644 index faaea61..0000000 --- a/website/public/docs/inputs/jsonload/index.html +++ /dev/null @@ -1,367 +0,0 @@ - - - - - - - - - - - jsonLoad · Sifter - - - - - - - - - - - - - - - - - - - - - - - -
- - - -
-

jsonLoad

-

Load data from a JSON file. Default behavior expects a single dictionary per line. Each line is a seperate entry. The multiline parameter reads all of the lines of the files and returns a single object.

-

Parameters

- - - - - - - - - - - - - - - - - -
nameDescription
inputPath of JSON file to transform
multilineLoad file as a single multiline JSON object
-

Example

-
inputs:
-  caseData:
-    jsonLoad:
-      input: "{{config.casesJSON}}"
-
-
- -
- - diff --git a/website/public/docs/inputs/sqldump/index.html b/website/public/docs/inputs/sqldump/index.html deleted file mode 100644 index ddeaf7f..0000000 --- a/website/public/docs/inputs/sqldump/index.html +++ /dev/null @@ -1,378 +0,0 @@ - - - - - - - - - - - sqldump · Sifter - - - - - - - - - - - - - - - - - - - - - - - -
- - - -
-

sqlDump

-

Scan file produced produced from sqldump.

-

Parameters

- - - - - - - - - - - - - - - - - - - - -
NameTypeDescription
inputstringPath to the SQL dump file
tables[]stringNames of tables to read out
-

Example

-
inputs:
-  database:
-    sqldumpLoad:
-      input: "{{config.sql}}"
-      tables:
-        - cells
-        - cell_tissues
-        - dose_responses
-        - drugs
-        - drug_annots
-        - experiments
-        - profiles
-
-
- -
- - diff --git a/website/public/docs/inputs/sqliteload/index.html b/website/public/docs/inputs/sqliteload/index.html deleted file mode 100644 index c30f2c4..0000000 --- a/website/public/docs/inputs/sqliteload/index.html +++ /dev/null @@ -1,372 +0,0 @@ - - - - - - - - - - - sqliteLoad · Sifter - - - - - - - - - - - - - - - - - - - - - - - -
- - - -
-

sqliteLoad

-

Extract data from an sqlite file

-

Parameters

- - - - - - - - - - - - - - - - - - - - -
NameTypeDescription
inputstringPath to the SQLite file
querystringSQL select statement based input
-

Example

-

-inputs:
-  sqlQuery:
-    sqliteLoad:
-      input: "{{config.sqlite}}"
-      query: "select * from drug_mechanism as a LEFT JOIN MECHANISM_REFS as b on a.MEC_ID=b.MEC_ID LEFT JOIN TARGET_COMPONENTS as c on a.TID=c.TID LEFT JOIN COMPONENT_SEQUENCES as d on c.COMPONENT_ID=d.COMPONENT_ID LEFT JOIN MOLECULE_DICTIONARY as e on a.MOLREGNO=e.MOLREGNO"
-
-
- -
- - diff --git a/website/public/docs/inputs/tableload/index.html b/website/public/docs/inputs/tableload/index.html deleted file mode 100644 index 9f10d36..0000000 --- a/website/public/docs/inputs/tableload/index.html +++ /dev/null @@ -1,341 +0,0 @@ - - - - - - - - - - - tableLoad · Sifter - - - - - - - - - - - - - - - - - - - - - - - -
- - - -
- -
- -
- - diff --git a/website/public/docs/inputs/xmlload/index.html b/website/public/docs/inputs/xmlload/index.html deleted file mode 100644 index 833fd28..0000000 --- a/website/public/docs/inputs/xmlload/index.html +++ /dev/null @@ -1,363 +0,0 @@ - - - - - - - - - - - xmlLoad · Sifter - - - - - - - - - - - - - - - - - - - - - - - -
- - - -
-

xmlLoad

-

Load an XML file

-

Parameters

- - - - - - - - - - - - - -
nameDescription
inputPath to input file
-

Example

-
inputs:
-  loader:
-    xmlLoad:
-      input: "{{config.xmlPath}}"
-
-
- -
- - diff --git a/website/public/docs/playbook/index.html b/website/public/docs/playbook/index.html deleted file mode 100644 index ec46a3d..0000000 --- a/website/public/docs/playbook/index.html +++ /dev/null @@ -1,373 +0,0 @@ - - - - - - - - - - - Sifter Pipeline File · Sifter - - - - - - - - - - - - - - - - - - - - - - - -
- - - -
-

Pipeline File

-

An sifter pipeline file is in YAML format and describes an entire processing pipelines. -If is composed of the following sections: config, inputs, pipelines, outputs. In addition, -for tracking, the file will also include name and class entries.

-

-class: sifter
-name: <script name>
-outdir: <where output files should go, relative to this file>
-
-config:
-  <config key>: <config value>
-  <config key>: <config value> 
-  # values that are referenced in pipeline parameters for 
-  # files will be treated like file paths and be 
-  # translated to full paths
-
-inputs:
-  <input name>:
-    <input driver>:
-      <driver config>
-
-pipelines:
-  <pipeline name>:
-    # all pipelines must start with a from step
-    - from: <name of input or pipeline> 
-    - <transform name>:
-       <transform parameters>
-
-outputs:
-  <output name>:
-    <output driver>:
-      <driver config>
-
-
- -
- - diff --git a/website/public/docs/transforms/accumulate/index.html b/website/public/docs/transforms/accumulate/index.html deleted file mode 100644 index 38102d4..0000000 --- a/website/public/docs/transforms/accumulate/index.html +++ /dev/null @@ -1,341 +0,0 @@ - - - - - - - - - - - accumulate · Sifter - - - - - - - - - - - - - - - - - - - - - - - -
- - - -
- -
- -
- - diff --git a/website/public/docs/transforms/clean/index.html b/website/public/docs/transforms/clean/index.html deleted file mode 100644 index 8ccfff1..0000000 --- a/website/public/docs/transforms/clean/index.html +++ /dev/null @@ -1,341 +0,0 @@ - - - - - - - - - - - clean · Sifter - - - - - - - - - - - - - - - - - - - - - - - -
- - - -
- -
- -
- - diff --git a/website/public/docs/transforms/debug/index.html b/website/public/docs/transforms/debug/index.html deleted file mode 100644 index 1fd5f0f..0000000 --- a/website/public/docs/transforms/debug/index.html +++ /dev/null @@ -1,341 +0,0 @@ - - - - - - - - - - - debug · Sifter - - - - - - - - - - - - - - - - - - - - - - - -
- - - -
- -
- -
- - diff --git a/website/public/docs/transforms/distinct/index.html b/website/public/docs/transforms/distinct/index.html deleted file mode 100644 index b486fe6..0000000 --- a/website/public/docs/transforms/distinct/index.html +++ /dev/null @@ -1,341 +0,0 @@ - - - - - - - - - - - distinct · Sifter - - - - - - - - - - - - - - - - - - - - - - - -
- - - -
- -
- -
- - diff --git a/website/public/docs/transforms/emit/index.html b/website/public/docs/transforms/emit/index.html deleted file mode 100644 index 4d41462..0000000 --- a/website/public/docs/transforms/emit/index.html +++ /dev/null @@ -1,341 +0,0 @@ - - - - - - - - - - - emit · Sifter - - - - - - - - - - - - - - - - - - - - - - - -
- - - -
- -
- -
- - diff --git a/website/public/docs/transforms/fieldparse/index.html b/website/public/docs/transforms/fieldparse/index.html deleted file mode 100644 index d474476..0000000 --- a/website/public/docs/transforms/fieldparse/index.html +++ /dev/null @@ -1,341 +0,0 @@ - - - - - - - - - - - fieldParse · Sifter - - - - - - - - - - - - - - - - - - - - - - - -
- - - -
- -
- -
- - diff --git a/website/public/docs/transforms/fieldprocess/index.html b/website/public/docs/transforms/fieldprocess/index.html deleted file mode 100644 index d4bc610..0000000 --- a/website/public/docs/transforms/fieldprocess/index.html +++ /dev/null @@ -1,341 +0,0 @@ - - - - - - - - - - - fieldProcess · Sifter - - - - - - - - - - - - - - - - - - - - - - - -
- - - -
- -
- -
- - diff --git a/website/public/docs/transforms/fieldtype/index.html b/website/public/docs/transforms/fieldtype/index.html deleted file mode 100644 index 0ddd76f..0000000 --- a/website/public/docs/transforms/fieldtype/index.html +++ /dev/null @@ -1,341 +0,0 @@ - - - - - - - - - - - fieldType · Sifter - - - - - - - - - - - - - - - - - - - - - - - -
- - - -
- -
- -
- - diff --git a/website/public/docs/transforms/filter/index.html b/website/public/docs/transforms/filter/index.html deleted file mode 100644 index 5c1f9e0..0000000 --- a/website/public/docs/transforms/filter/index.html +++ /dev/null @@ -1,341 +0,0 @@ - - - - - - - - - - - filter · Sifter - - - - - - - - - - - - - - - - - - - - - - - -
- - - -
- -
- -
- - diff --git a/website/public/docs/transforms/from/index.html b/website/public/docs/transforms/from/index.html deleted file mode 100644 index 24f3715..0000000 --- a/website/public/docs/transforms/from/index.html +++ /dev/null @@ -1,355 +0,0 @@ - - - - - - - - - - - from · Sifter - - - - - - - - - - - - - - - - - - - - - - - -
- - - -
-

from

-

Parmeters

-

Name of data source

-

Example

-

-
-inputs:
-  profileReader:
-    tableLoad:
-      input: "{{config.profiles}}"
-
-pipelines:
-  profileProcess:
-    - from: profileReader
-
-
- -
- - diff --git a/website/public/docs/transforms/graphbuild/index.html b/website/public/docs/transforms/graphbuild/index.html deleted file mode 100644 index 211e52f..0000000 --- a/website/public/docs/transforms/graphbuild/index.html +++ /dev/null @@ -1,341 +0,0 @@ - - - - - - - - - - - graphBuild · Sifter - - - - - - - - - - - - - - - - - - - - - - - -
- - - -
- -
- -
- - diff --git a/website/public/docs/transforms/hash/index.html b/website/public/docs/transforms/hash/index.html deleted file mode 100644 index d4cd0fb..0000000 --- a/website/public/docs/transforms/hash/index.html +++ /dev/null @@ -1,341 +0,0 @@ - - - - - - - - - - - hash · Sifter - - - - - - - - - - - - - - - - - - - - - - - -
- - - -
- -
- -
- - diff --git a/website/public/docs/transforms/index.html b/website/public/docs/transforms/index.html deleted file mode 100644 index 3c468c2..0000000 --- a/website/public/docs/transforms/index.html +++ /dev/null @@ -1,342 +0,0 @@ - - - - - - - - - - - Pipeline Steps · Sifter - - - - - - - - - - - - - - - - - - - - - - - -
- - - -
-

Transforms alter the data

- -
- -
- - diff --git a/website/public/docs/transforms/lookup/index.html b/website/public/docs/transforms/lookup/index.html deleted file mode 100644 index 9bdf56c..0000000 --- a/website/public/docs/transforms/lookup/index.html +++ /dev/null @@ -1,393 +0,0 @@ - - - - - - - - - - - lookup · Sifter - - - - - - - - - - - - - - - - - - - - - - - -
- - - -
-

lookup

-

Using key from current row, get values from a reference source

-

Parameters

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
nameTypeDescription
replacestring (field path)Field to replace
lookupstring (template string)Key to use for looking up data
copymap[string]stringGiven lookup of structure, copy values (key) to row (value)
tsvTSVTableTSV translation table file
jsonJSONTableJSON data file
tableLookupTableInline lookup table
-

Example

-
    - lookup:
-        json:
-          input: "{{config.doseResponseFile}}"
-          key: experiment_id
-        lookup: "{{row.experiment_id}}"
-        copy:
-          curve: curve
-
-
- -
- - diff --git a/website/public/docs/transforms/map/index.html b/website/public/docs/transforms/map/index.html deleted file mode 100644 index 77d82e7..0000000 --- a/website/public/docs/transforms/map/index.html +++ /dev/null @@ -1,383 +0,0 @@ - - - - - - - - - - - map · Sifter - - - - - - - - - - - - - - - - - - - - - - - -
- - - -
-

map

-

Run function on every row

-

Parameters

- - - - - - - - - - - - - - - - - - - - - -
nameDescription
methodName of function to call
pythonPython code to be run
gpythonPython code to be run using GPython
-

Example

-
    - map:
-        method: response
-        gpython: |
-          def response(x):
-            s = sorted(x["curve"].items(), key=lambda x:float(x[0]))
-            x['dose_um'] = []
-            x['response'] = []
-            for d, r in s:
-              try:
-                dn = float(d)
-                rn = float(r)
-                x['dose_um'].append(dn)
-                x['response'].append(rn)
-              except ValueError:
-                pass
-            return x          
-
-
- -
- - diff --git a/website/public/docs/transforms/objectcreate/index.html b/website/public/docs/transforms/objectcreate/index.html deleted file mode 100644 index bb0bf68..0000000 --- a/website/public/docs/transforms/objectcreate/index.html +++ /dev/null @@ -1,341 +0,0 @@ - - - - - - - - - - - objectCreate · Sifter - - - - - - - - - - - - - - - - - - - - - - - -
- - - -
- -
- -
- - diff --git a/website/public/docs/transforms/project/index.html b/website/public/docs/transforms/project/index.html deleted file mode 100644 index cab2a30..0000000 --- a/website/public/docs/transforms/project/index.html +++ /dev/null @@ -1,341 +0,0 @@ - - - - - - - - - - - project · Sifter - - - - - - - - - - - - - - - - - - - - - - - -
- - - -
- -
- -
- - diff --git a/website/public/docs/transforms/reduce/index.html b/website/public/docs/transforms/reduce/index.html deleted file mode 100644 index b42cfd7..0000000 --- a/website/public/docs/transforms/reduce/index.html +++ /dev/null @@ -1,390 +0,0 @@ - - - - - - - - - - - reduce · Sifter - - - - - - - - - - - - - - - - - - - - - - - -
- - - -
-

reduce

-

Using key from rows, reduce matched records into a single entry

-

Parameters

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
nameTypeDescription
fieldstring (field path)Field used to match rows
methodstringMethod name
pythonstringPython code string
gpythonstringPython code string run using (https://github.com/go-python/gpython)
initmap[string]anyData to use for first reduce
-

Example

-
    - reduce:
-        field: dataset_name
-        method: merge
-        init: { "compounds" : [] }
-        gpython: |
-
-          def merge(x,y):
-            x["compounds"] = list(set(y["compounds"]+x["compounds"]))
-            return x
-
-
- -
- - diff --git a/website/public/docs/transforms/regexreplace/index.html b/website/public/docs/transforms/regexreplace/index.html deleted file mode 100644 index def1c1f..0000000 --- a/website/public/docs/transforms/regexreplace/index.html +++ /dev/null @@ -1,341 +0,0 @@ - - - - - - - - - - - regexReplace · Sifter - - - - - - - - - - - - - - - - - - - - - - - -
- - - -
- -
- -
- - diff --git a/website/public/index.html b/website/public/index.html deleted file mode 100644 index 063cb9e..0000000 --- a/website/public/index.html +++ /dev/null @@ -1,67 +0,0 @@ - - - - - - - - - - - - - Sifter - - - - - - - - - - - - - - - - - - - - - - - -
-
- - -
-
-

SIFTER

-

Sifter is a Extract Tranform Load (ETL) engine. It can be used to -Extract from a number of different data resources, including TSV files, SQLDump -files and external databases. It includes a pipeline description language to -define a set of Transform steps to create object messages that can be -validated using a JSON schema data.

- -
-
- - - - - diff --git a/website/public/index.xml b/website/public/index.xml deleted file mode 100644 index 3acd47d..0000000 --- a/website/public/index.xml +++ /dev/null @@ -1,315 +0,0 @@ - - - - Sifter - http://example.org/ - Recent content on Sifter - Hugo -- gohugo.io - en-us - - accumulate - http://example.org/docs/transforms/accumulate/ - Mon, 01 Jan 0001 00:00:00 +0000 - - http://example.org/docs/transforms/accumulate/ - - - - - avroLoad - http://example.org/docs/inputs/avroload/ - Mon, 01 Jan 0001 00:00:00 +0000 - - http://example.org/docs/inputs/avroload/ - avroLoad Load an AvroFile -Parameters name Description input Path to input file - - - - clean - http://example.org/docs/transforms/clean/ - Mon, 01 Jan 0001 00:00:00 +0000 - - http://example.org/docs/transforms/clean/ - - - - - debug - http://example.org/docs/transforms/debug/ - Mon, 01 Jan 0001 00:00:00 +0000 - - http://example.org/docs/transforms/debug/ - - - - - distinct - http://example.org/docs/transforms/distinct/ - Mon, 01 Jan 0001 00:00:00 +0000 - - http://example.org/docs/transforms/distinct/ - - - - - embedded - http://example.org/docs/inputs/embedded/ - Mon, 01 Jan 0001 00:00:00 +0000 - - http://example.org/docs/inputs/embedded/ - embedded Load data from embedded structure -Example inputs: data: embedded: - { &#34;name&#34; : &#34;Alice&#34;, &#34;age&#34;: 28 } - { &#34;name&#34; : &#34;Bob&#34;, &#34;age&#34;: 27 } - - - - emit - http://example.org/docs/transforms/emit/ - Mon, 01 Jan 0001 00:00:00 +0000 - - http://example.org/docs/transforms/emit/ - - - - - Example - http://example.org/docs/example/ - Mon, 01 Jan 0001 00:00:00 +0000 - - http://example.org/docs/example/ - Example Pipeline Our first task will be to convert a ZIP code TSV into a set of county level entries. -The input file looks like: -ZIP,COUNTYNAME,STATE,STCOUNTYFP,CLASSFP 36003,Autauga County,AL,01001,H1 36006,Autauga County,AL,01001,H1 36067,Autauga County,AL,01001,H1 36066,Autauga County,AL,01001,H1 36703,Autauga County,AL,01001,H1 36701,Autauga County,AL,01001,H1 36091,Autauga County,AL,01001,H1 First is the header of the pipeline. This declares the unique name of the pipeline and it&rsquo;s output directory. -name: zipcode_map outdir: ./ docs: Converts zipcode TSV into graph elements Next the configuration is declared. - - - - fieldParse - http://example.org/docs/transforms/fieldparse/ - Mon, 01 Jan 0001 00:00:00 +0000 - - http://example.org/docs/transforms/fieldparse/ - - - - - fieldProcess - http://example.org/docs/transforms/fieldprocess/ - Mon, 01 Jan 0001 00:00:00 +0000 - - http://example.org/docs/transforms/fieldprocess/ - - - - - fieldType - http://example.org/docs/transforms/fieldtype/ - Mon, 01 Jan 0001 00:00:00 +0000 - - http://example.org/docs/transforms/fieldtype/ - - - - - filter - http://example.org/docs/transforms/filter/ - Mon, 01 Jan 0001 00:00:00 +0000 - - http://example.org/docs/transforms/filter/ - - - - - from - http://example.org/docs/transforms/from/ - Mon, 01 Jan 0001 00:00:00 +0000 - - http://example.org/docs/transforms/from/ - from Parmeters Name of data source -Example inputs: profileReader: tableLoad: input: &#34;{{config.profiles}}&#34; pipelines: profileProcess: - from: profileReader - - - - glob - http://example.org/docs/inputs/glob/ - Mon, 01 Jan 0001 00:00:00 +0000 - - http://example.org/docs/inputs/glob/ - glob Scan files using * based glob statement and open all files as input. -Parameters Name Description storeFilename Store value of filename in parameter each row input Path of avro object file to transform xmlLoad xmlLoad configutation tableLoad Run transform pipeline on a TSV or CSV jsonLoad Run a transform pipeline on a multi line json file avroLoad Load data from avro file Example inputs: pubmedRead: glob: input: &#34;{{config.baseline}}/*.xml.gz&#34; xmlLoad: {} - - - - graphBuild - http://example.org/docs/transforms/graphbuild/ - Mon, 01 Jan 0001 00:00:00 +0000 - - http://example.org/docs/transforms/graphbuild/ - - - - - gripperLoad - http://example.org/docs/inputs/gripperload/ - Mon, 01 Jan 0001 00:00:00 +0000 - - http://example.org/docs/inputs/gripperload/ - - - - - hash - http://example.org/docs/transforms/hash/ - Mon, 01 Jan 0001 00:00:00 +0000 - - http://example.org/docs/transforms/hash/ - - - - - Inputs - http://example.org/docs/inputs/ - Mon, 01 Jan 0001 00:00:00 +0000 - - http://example.org/docs/inputs/ - Every playbook consists of a series of inputs. - - - - jsonLoad - http://example.org/docs/inputs/jsonload/ - Mon, 01 Jan 0001 00:00:00 +0000 - - http://example.org/docs/inputs/jsonload/ - jsonLoad Load data from a JSON file. Default behavior expects a single dictionary per line. Each line is a seperate entry. The multiline parameter reads all of the lines of the files and returns a single object. -Parameters name Description input Path of JSON file to transform multiline Load file as a single multiline JSON object Example inputs: caseData: jsonLoad: input: &#34;{{config.casesJSON}}&#34; - - - - lookup - http://example.org/docs/transforms/lookup/ - Mon, 01 Jan 0001 00:00:00 +0000 - - http://example.org/docs/transforms/lookup/ - lookup Using key from current row, get values from a reference source -Parameters name Type Description replace string (field path) Field to replace lookup string (template string) Key to use for looking up data copy map[string]string Given lookup of structure, copy values (key) to row (value) tsv TSVTable TSV translation table file json JSONTable JSON data file table LookupTable Inline lookup table Example - lookup: json: input: &#34;{{config.doseResponseFile}}&#34; key: experiment_id lookup: &#34;{{row. - - - - map - http://example.org/docs/transforms/map/ - Mon, 01 Jan 0001 00:00:00 +0000 - - http://example.org/docs/transforms/map/ - map Run function on every row -Parameters name Description method Name of function to call python Python code to be run gpython Python code to be run using GPython Example - map: method: response gpython: | def response(x): s = sorted(x[&#34;curve&#34;].items(), key=lambda x:float(x[0])) x[&#39;dose_um&#39;] = [] x[&#39;response&#39;] = [] for d, r in s: try: dn = float(d) rn = float(r) x[&#39;dose_um&#39;].append(dn) x[&#39;response&#39;].append(rn) except ValueError: pass return x - - - - objectCreate - http://example.org/docs/transforms/objectcreate/ - Mon, 01 Jan 0001 00:00:00 +0000 - - http://example.org/docs/transforms/objectcreate/ - - - - - Overview - http://example.org/docs/ - Mon, 01 Jan 0001 00:00:00 +0000 - - http://example.org/docs/ - Sifter pipelines Sifter pipelines process steams of nested JSON messages. Sifter comes with a number of file extractors that operate as inputs to these pipelines. The pipeline engine connects togeather arrays of transform steps into direct acylic graph that is processed in parallel. -Example Message: -{ &#34;firstName&#34; : &#34;bob&#34;, &#34;age&#34; : &#34;25&#34; &#34;friends&#34; : [ &#34;Max&#34;, &#34;Alex&#34;] } Once a stream of messages are produced, that can be run through a transform pipeline. - - - - Pipeline Steps - http://example.org/docs/transforms/ - Mon, 01 Jan 0001 00:00:00 +0000 - - http://example.org/docs/transforms/ - Transforms alter the data - - - - project - http://example.org/docs/transforms/project/ - Mon, 01 Jan 0001 00:00:00 +0000 - - http://example.org/docs/transforms/project/ - - - - - reduce - http://example.org/docs/transforms/reduce/ - Mon, 01 Jan 0001 00:00:00 +0000 - - http://example.org/docs/transforms/reduce/ - reduce Using key from rows, reduce matched records into a single entry -Parameters name Type Description field string (field path) Field used to match rows method string Method name python string Python code string gpython string Python code string run using (https://github.com/go-python/gpython) init map[string]any Data to use for first reduce Example - reduce: field: dataset_name method: merge init: { &#34;compounds&#34; : [] } gpython: | def merge(x,y): x[&#34;compounds&#34;] = list(set(y[&#34;compounds&#34;]+x[&#34;compounds&#34;])) return x - - - - regexReplace - http://example.org/docs/transforms/regexreplace/ - Mon, 01 Jan 0001 00:00:00 +0000 - - http://example.org/docs/transforms/regexreplace/ - - - - - Sifter Pipeline File - http://example.org/docs/playbook/ - Mon, 01 Jan 0001 00:00:00 +0000 - - http://example.org/docs/playbook/ - Pipeline File An sifter pipeline file is in YAML format and describes an entire processing pipelines. If is composed of the following sections: config, inputs, pipelines, outputs. In addition, for tracking, the file will also include name and class entries. -class: sifter name: &lt;script name&gt; outdir: &lt;where output files should go, relative to this file&gt; config: &lt;config key&gt;: &lt;config value&gt; &lt;config key&gt;: &lt;config value&gt; # values that are referenced in pipeline parameters for # files will be treated like file paths and be # translated to full paths inputs: &lt;input name&gt;: &lt;input driver&gt;: &lt;driver config&gt; pipelines: &lt;pipeline name&gt;: # all pipelines must start with a from step - from: &lt;name of input or pipeline&gt; - &lt;transform name&gt;: &lt;transform parameters&gt; outputs: &lt;output name&gt;: &lt;output driver&gt;: &lt;driver config&gt; - - - - sqldump - http://example.org/docs/inputs/sqldump/ - Mon, 01 Jan 0001 00:00:00 +0000 - - http://example.org/docs/inputs/sqldump/ - sqlDump Scan file produced produced from sqldump. -Parameters Name Type Description input string Path to the SQL dump file tables []string Names of tables to read out Example inputs: database: sqldumpLoad: input: &#34;{{config.sql}}&#34; tables: - cells - cell_tissues - dose_responses - drugs - drug_annots - experiments - profiles - - - - sqliteLoad - http://example.org/docs/inputs/sqliteload/ - Mon, 01 Jan 0001 00:00:00 +0000 - - http://example.org/docs/inputs/sqliteload/ - sqliteLoad Extract data from an sqlite file -Parameters Name Type Description input string Path to the SQLite file query string SQL select statement based input Example inputs: sqlQuery: sqliteLoad: input: &#34;{{config.sqlite}}&#34; query: &#34;select * from drug_mechanism as a LEFT JOIN MECHANISM_REFS as b on a.MEC_ID=b.MEC_ID LEFT JOIN TARGET_COMPONENTS as c on a.TID=c.TID LEFT JOIN COMPONENT_SEQUENCES as d on c.COMPONENT_ID=d.COMPONENT_ID LEFT JOIN MOLECULE_DICTIONARY as e on a.MOLREGNO=e.MOLREGNO&#34; - - - - tableLoad - http://example.org/docs/inputs/tableload/ - Mon, 01 Jan 0001 00:00:00 +0000 - - http://example.org/docs/inputs/tableload/ - - - - - xmlLoad - http://example.org/docs/inputs/xmlload/ - Mon, 01 Jan 0001 00:00:00 +0000 - - http://example.org/docs/inputs/xmlload/ - xmlLoad Load an XML file -Parameters name Description input Path to input file Example inputs: loader: xmlLoad: input: &#34;{{config.xmlPath}}&#34; - - - - diff --git a/website/public/sitemap.xml b/website/public/sitemap.xml deleted file mode 100644 index 3c9fad6..0000000 --- a/website/public/sitemap.xml +++ /dev/null @@ -1,78 +0,0 @@ - - - - http://example.org/ - 0 - - http://example.org/docs/transforms/accumulate/ - - http://example.org/docs/inputs/avroload/ - - http://example.org/categories/ - - http://example.org/docs/transforms/clean/ - - http://example.org/docs/transforms/debug/ - - http://example.org/docs/transforms/distinct/ - - http://example.org/docs/ - - http://example.org/docs/inputs/embedded/ - - http://example.org/docs/transforms/emit/ - - http://example.org/docs/example/ - - http://example.org/docs/transforms/fieldparse/ - - http://example.org/docs/transforms/fieldprocess/ - - http://example.org/docs/transforms/fieldtype/ - - http://example.org/docs/transforms/filter/ - - http://example.org/docs/transforms/from/ - - http://example.org/docs/inputs/glob/ - - http://example.org/docs/transforms/graphbuild/ - - http://example.org/docs/inputs/gripperload/ - - http://example.org/docs/transforms/hash/ - - http://example.org/docs/inputs/ - - http://example.org/docs/inputs/jsonload/ - - http://example.org/docs/transforms/lookup/ - - http://example.org/docs/transforms/map/ - - http://example.org/docs/transforms/objectcreate/ - - http://example.org/docs/ - - http://example.org/docs/transforms/ - - http://example.org/docs/transforms/project/ - - http://example.org/docs/transforms/reduce/ - - http://example.org/docs/transforms/regexreplace/ - - http://example.org/docs/playbook/ - - http://example.org/docs/inputs/sqldump/ - - http://example.org/docs/inputs/sqliteload/ - - http://example.org/docs/inputs/tableload/ - - http://example.org/tags/ - - http://example.org/docs/inputs/xmlload/ - - diff --git a/website/public/tags/index.xml b/website/public/tags/index.xml deleted file mode 100644 index 78658a6..0000000 --- a/website/public/tags/index.xml +++ /dev/null @@ -1,10 +0,0 @@ - - - - Tags on Sifter - http://example.org/tags/ - Recent content in Tags on Sifter - Hugo -- gohugo.io - en-us - - diff --git a/website/static/css/darcula.css b/website/static/css/darcula.css deleted file mode 100644 index be182d0..0000000 --- a/website/static/css/darcula.css +++ /dev/null @@ -1,77 +0,0 @@ -/* - -Darcula color scheme from the JetBrains family of IDEs - -*/ - - -.hljs { - display: block; - overflow-x: auto; - padding: 0.5em; - background: #2b2b2b; -} - -.hljs { - color: #bababa; -} - -.hljs-strong, -.hljs-emphasis { - color: #a8a8a2; -} - -.hljs-bullet, -.hljs-quote, -.hljs-link, -.hljs-number, -.hljs-regexp, -.hljs-literal { - color: #6896ba; -} - -.hljs-code, -.hljs-selector-class { - color: #a6e22e; -} - -.hljs-emphasis { - font-style: italic; -} - -.hljs-keyword, -.hljs-selector-tag, -.hljs-section, -.hljs-attribute, -.hljs-name, -.hljs-variable { - color: #cb7832; -} - -.hljs-params { - color: #b9b9b9; -} - -.hljs-string { - color: #6a8759; -} - -.hljs-subst, -.hljs-type, -.hljs-built_in, -.hljs-builtin-name, -.hljs-symbol, -.hljs-selector-id, -.hljs-selector-attr, -.hljs-selector-pseudo, -.hljs-template-tag, -.hljs-template-variable, -.hljs-addition { - color: #e0c46c; -} - -.hljs-comment, -.hljs-deletion, -.hljs-meta { - color: #7f7f7f; -} diff --git a/website/static/css/dark.css b/website/static/css/dark.css deleted file mode 100644 index b4724f5..0000000 --- a/website/static/css/dark.css +++ /dev/null @@ -1,63 +0,0 @@ -/* - -Dark style from softwaremaniacs.org (c) Ivan Sagalaev - -*/ - -.hljs { - display: block; - overflow-x: auto; - padding: 0.5em; - background: #444; -} - -.hljs-keyword, -.hljs-selector-tag, -.hljs-literal, -.hljs-section, -.hljs-link { - color: white; -} - -.hljs, -.hljs-subst { - color: #ddd; -} - -.hljs-string, -.hljs-title, -.hljs-name, -.hljs-type, -.hljs-attribute, -.hljs-symbol, -.hljs-bullet, -.hljs-built_in, -.hljs-addition, -.hljs-variable, -.hljs-template-tag, -.hljs-template-variable { - color: #d88; -} - -.hljs-comment, -.hljs-quote, -.hljs-deletion, -.hljs-meta { - color: #777; -} - -.hljs-keyword, -.hljs-selector-tag, -.hljs-literal, -.hljs-title, -.hljs-section, -.hljs-doctag, -.hljs-type, -.hljs-name, -.hljs-strong { - font-weight: bold; -} - -.hljs-emphasis { - font-style: italic; -} diff --git a/website/static/css/flexboxgrid.css b/website/static/css/flexboxgrid.css deleted file mode 100644 index 603506f..0000000 --- a/website/static/css/flexboxgrid.css +++ /dev/null @@ -1,960 +0,0 @@ -.container-fluid, -.container { - margin-right: auto; - margin-left: auto; -} - -.container-fluid { - padding-right: 2rem; - padding-left: 2rem; -} - -.row { - box-sizing: border-box; - display: -webkit-box; - display: -ms-flexbox; - display: flex; - -webkit-box-flex: 0; - -ms-flex: 0 1 auto; - flex: 0 1 auto; - -webkit-box-orient: horizontal; - -webkit-box-direction: normal; - -ms-flex-direction: row; - flex-direction: row; - -ms-flex-wrap: wrap; - flex-wrap: wrap; - margin-right: -0.5rem; - margin-left: -0.5rem; -} - -.row.reverse { - -webkit-box-orient: horizontal; - -webkit-box-direction: reverse; - -ms-flex-direction: row-reverse; - flex-direction: row-reverse; -} - -.col.reverse { - -webkit-box-orient: vertical; - -webkit-box-direction: reverse; - -ms-flex-direction: column-reverse; - flex-direction: column-reverse; -} - -.col-xs, -.col-xs-1, -.col-xs-2, -.col-xs-3, -.col-xs-4, -.col-xs-5, -.col-xs-6, -.col-xs-7, -.col-xs-8, -.col-xs-9, -.col-xs-10, -.col-xs-11, -.col-xs-12, -.col-xs-offset-0, -.col-xs-offset-1, -.col-xs-offset-2, -.col-xs-offset-3, -.col-xs-offset-4, -.col-xs-offset-5, -.col-xs-offset-6, -.col-xs-offset-7, -.col-xs-offset-8, -.col-xs-offset-9, -.col-xs-offset-10, -.col-xs-offset-11, -.col-xs-offset-12 { - box-sizing: border-box; - -webkit-box-flex: 0; - -ms-flex: 0 0 auto; - flex: 0 0 auto; - padding-right: 0.5rem; - padding-left: 0.5rem; -} - -.col-xs { - -webkit-box-flex: 1; - -ms-flex-positive: 1; - flex-grow: 1; - -ms-flex-preferred-size: 0; - flex-basis: 0; - max-width: 100%; -} - -.col-xs-1 { - -ms-flex-preferred-size: 8.33333333%; - flex-basis: 8.33333333%; - max-width: 8.33333333%; -} - -.col-xs-2 { - -ms-flex-preferred-size: 16.66666667%; - flex-basis: 16.66666667%; - max-width: 16.66666667%; -} - -.col-xs-3 { - -ms-flex-preferred-size: 25%; - flex-basis: 25%; - max-width: 25%; -} - -.col-xs-4 { - -ms-flex-preferred-size: 33.33333333%; - flex-basis: 33.33333333%; - max-width: 33.33333333%; -} - -.col-xs-5 { - -ms-flex-preferred-size: 41.66666667%; - flex-basis: 41.66666667%; - max-width: 41.66666667%; -} - -.col-xs-6 { - -ms-flex-preferred-size: 50%; - flex-basis: 50%; - max-width: 50%; -} - -.col-xs-7 { - -ms-flex-preferred-size: 58.33333333%; - flex-basis: 58.33333333%; - max-width: 58.33333333%; -} - -.col-xs-8 { - -ms-flex-preferred-size: 66.66666667%; - flex-basis: 66.66666667%; - max-width: 66.66666667%; -} - -.col-xs-9 { - -ms-flex-preferred-size: 75%; - flex-basis: 75%; - max-width: 75%; -} - -.col-xs-10 { - -ms-flex-preferred-size: 83.33333333%; - flex-basis: 83.33333333%; - max-width: 83.33333333%; -} - -.col-xs-11 { - -ms-flex-preferred-size: 91.66666667%; - flex-basis: 91.66666667%; - max-width: 91.66666667%; -} - -.col-xs-12 { - -ms-flex-preferred-size: 100%; - flex-basis: 100%; - max-width: 100%; -} - -.col-xs-offset-0 { - margin-left: 0; -} - -.col-xs-offset-1 { - margin-left: 8.33333333%; -} - -.col-xs-offset-2 { - margin-left: 16.66666667%; -} - -.col-xs-offset-3 { - margin-left: 25%; -} - -.col-xs-offset-4 { - margin-left: 33.33333333%; -} - -.col-xs-offset-5 { - margin-left: 41.66666667%; -} - -.col-xs-offset-6 { - margin-left: 50%; -} - -.col-xs-offset-7 { - margin-left: 58.33333333%; -} - -.col-xs-offset-8 { - margin-left: 66.66666667%; -} - -.col-xs-offset-9 { - margin-left: 75%; -} - -.col-xs-offset-10 { - margin-left: 83.33333333%; -} - -.col-xs-offset-11 { - margin-left: 91.66666667%; -} - -.start-xs { - -webkit-box-pack: start; - -ms-flex-pack: start; - justify-content: flex-start; - text-align: start; -} - -.center-xs { - -webkit-box-pack: center; - -ms-flex-pack: center; - justify-content: center; - text-align: center; -} - -.end-xs { - -webkit-box-pack: end; - -ms-flex-pack: end; - justify-content: flex-end; - text-align: end; -} - -.top-xs { - -webkit-box-align: start; - -ms-flex-align: start; - align-items: flex-start; -} - -.middle-xs { - -webkit-box-align: center; - -ms-flex-align: center; - align-items: center; -} - -.bottom-xs { - -webkit-box-align: end; - -ms-flex-align: end; - align-items: flex-end; -} - -.around-xs { - -ms-flex-pack: distribute; - justify-content: space-around; -} - -.between-xs { - -webkit-box-pack: justify; - -ms-flex-pack: justify; - justify-content: space-between; -} - -.first-xs { - -webkit-box-ordinal-group: 0; - -ms-flex-order: -1; - order: -1; -} - -.last-xs { - -webkit-box-ordinal-group: 2; - -ms-flex-order: 1; - order: 1; -} - -@media only screen and (min-width: 48em) { - .container { - width: 49rem; - } - - .col-sm, - .col-sm-1, - .col-sm-2, - .col-sm-3, - .col-sm-4, - .col-sm-5, - .col-sm-6, - .col-sm-7, - .col-sm-8, - .col-sm-9, - .col-sm-10, - .col-sm-11, - .col-sm-12, - .col-sm-offset-0, - .col-sm-offset-1, - .col-sm-offset-2, - .col-sm-offset-3, - .col-sm-offset-4, - .col-sm-offset-5, - .col-sm-offset-6, - .col-sm-offset-7, - .col-sm-offset-8, - .col-sm-offset-9, - .col-sm-offset-10, - .col-sm-offset-11, - .col-sm-offset-12 { - box-sizing: border-box; - -webkit-box-flex: 0; - -ms-flex: 0 0 auto; - flex: 0 0 auto; - padding-right: 0.5rem; - padding-left: 0.5rem; - } - - .col-sm { - -webkit-box-flex: 1; - -ms-flex-positive: 1; - flex-grow: 1; - -ms-flex-preferred-size: 0; - flex-basis: 0; - max-width: 100%; - } - - .col-sm-1 { - -ms-flex-preferred-size: 8.33333333%; - flex-basis: 8.33333333%; - max-width: 8.33333333%; - } - - .col-sm-2 { - -ms-flex-preferred-size: 16.66666667%; - flex-basis: 16.66666667%; - max-width: 16.66666667%; - } - - .col-sm-3 { - -ms-flex-preferred-size: 25%; - flex-basis: 25%; - max-width: 25%; - } - - .col-sm-4 { - -ms-flex-preferred-size: 33.33333333%; - flex-basis: 33.33333333%; - max-width: 33.33333333%; - } - - .col-sm-5 { - -ms-flex-preferred-size: 41.66666667%; - flex-basis: 41.66666667%; - max-width: 41.66666667%; - } - - .col-sm-6 { - -ms-flex-preferred-size: 50%; - flex-basis: 50%; - max-width: 50%; - } - - .col-sm-7 { - -ms-flex-preferred-size: 58.33333333%; - flex-basis: 58.33333333%; - max-width: 58.33333333%; - } - - .col-sm-8 { - -ms-flex-preferred-size: 66.66666667%; - flex-basis: 66.66666667%; - max-width: 66.66666667%; - } - - .col-sm-9 { - -ms-flex-preferred-size: 75%; - flex-basis: 75%; - max-width: 75%; - } - - .col-sm-10 { - -ms-flex-preferred-size: 83.33333333%; - flex-basis: 83.33333333%; - max-width: 83.33333333%; - } - - .col-sm-11 { - -ms-flex-preferred-size: 91.66666667%; - flex-basis: 91.66666667%; - max-width: 91.66666667%; - } - - .col-sm-12 { - -ms-flex-preferred-size: 100%; - flex-basis: 100%; - max-width: 100%; - } - - .col-sm-offset-0 { - margin-left: 0; - } - - .col-sm-offset-1 { - margin-left: 8.33333333%; - } - - .col-sm-offset-2 { - margin-left: 16.66666667%; - } - - .col-sm-offset-3 { - margin-left: 25%; - } - - .col-sm-offset-4 { - margin-left: 33.33333333%; - } - - .col-sm-offset-5 { - margin-left: 41.66666667%; - } - - .col-sm-offset-6 { - margin-left: 50%; - } - - .col-sm-offset-7 { - margin-left: 58.33333333%; - } - - .col-sm-offset-8 { - margin-left: 66.66666667%; - } - - .col-sm-offset-9 { - margin-left: 75%; - } - - .col-sm-offset-10 { - margin-left: 83.33333333%; - } - - .col-sm-offset-11 { - margin-left: 91.66666667%; - } - - .start-sm { - -webkit-box-pack: start; - -ms-flex-pack: start; - justify-content: flex-start; - text-align: start; - } - - .center-sm { - -webkit-box-pack: center; - -ms-flex-pack: center; - justify-content: center; - text-align: center; - } - - .end-sm { - -webkit-box-pack: end; - -ms-flex-pack: end; - justify-content: flex-end; - text-align: end; - } - - .top-sm { - -webkit-box-align: start; - -ms-flex-align: start; - align-items: flex-start; - } - - .middle-sm { - -webkit-box-align: center; - -ms-flex-align: center; - align-items: center; - } - - .bottom-sm { - -webkit-box-align: end; - -ms-flex-align: end; - align-items: flex-end; - } - - .around-sm { - -ms-flex-pack: distribute; - justify-content: space-around; - } - - .between-sm { - -webkit-box-pack: justify; - -ms-flex-pack: justify; - justify-content: space-between; - } - - .first-sm { - -webkit-box-ordinal-group: 0; - -ms-flex-order: -1; - order: -1; - } - - .last-sm { - -webkit-box-ordinal-group: 2; - -ms-flex-order: 1; - order: 1; - } -} - -@media only screen and (min-width: 64em) { - .container { - width: 65rem; - } - - .col-md, - .col-md-1, - .col-md-2, - .col-md-3, - .col-md-4, - .col-md-5, - .col-md-6, - .col-md-7, - .col-md-8, - .col-md-9, - .col-md-10, - .col-md-11, - .col-md-12, - .col-md-offset-0, - .col-md-offset-1, - .col-md-offset-2, - .col-md-offset-3, - .col-md-offset-4, - .col-md-offset-5, - .col-md-offset-6, - .col-md-offset-7, - .col-md-offset-8, - .col-md-offset-9, - .col-md-offset-10, - .col-md-offset-11, - .col-md-offset-12 { - box-sizing: border-box; - -webkit-box-flex: 0; - -ms-flex: 0 0 auto; - flex: 0 0 auto; - padding-right: 0.5rem; - padding-left: 0.5rem; - } - - .col-md { - -webkit-box-flex: 1; - -ms-flex-positive: 1; - flex-grow: 1; - -ms-flex-preferred-size: 0; - flex-basis: 0; - max-width: 100%; - } - - .col-md-1 { - -ms-flex-preferred-size: 8.33333333%; - flex-basis: 8.33333333%; - max-width: 8.33333333%; - } - - .col-md-2 { - -ms-flex-preferred-size: 16.66666667%; - flex-basis: 16.66666667%; - max-width: 16.66666667%; - } - - .col-md-3 { - -ms-flex-preferred-size: 25%; - flex-basis: 25%; - max-width: 25%; - } - - .col-md-4 { - -ms-flex-preferred-size: 33.33333333%; - flex-basis: 33.33333333%; - max-width: 33.33333333%; - } - - .col-md-5 { - -ms-flex-preferred-size: 41.66666667%; - flex-basis: 41.66666667%; - max-width: 41.66666667%; - } - - .col-md-6 { - -ms-flex-preferred-size: 50%; - flex-basis: 50%; - max-width: 50%; - } - - .col-md-7 { - -ms-flex-preferred-size: 58.33333333%; - flex-basis: 58.33333333%; - max-width: 58.33333333%; - } - - .col-md-8 { - -ms-flex-preferred-size: 66.66666667%; - flex-basis: 66.66666667%; - max-width: 66.66666667%; - } - - .col-md-9 { - -ms-flex-preferred-size: 75%; - flex-basis: 75%; - max-width: 75%; - } - - .col-md-10 { - -ms-flex-preferred-size: 83.33333333%; - flex-basis: 83.33333333%; - max-width: 83.33333333%; - } - - .col-md-11 { - -ms-flex-preferred-size: 91.66666667%; - flex-basis: 91.66666667%; - max-width: 91.66666667%; - } - - .col-md-12 { - -ms-flex-preferred-size: 100%; - flex-basis: 100%; - max-width: 100%; - } - - .col-md-offset-0 { - margin-left: 0; - } - - .col-md-offset-1 { - margin-left: 8.33333333%; - } - - .col-md-offset-2 { - margin-left: 16.66666667%; - } - - .col-md-offset-3 { - margin-left: 25%; - } - - .col-md-offset-4 { - margin-left: 33.33333333%; - } - - .col-md-offset-5 { - margin-left: 41.66666667%; - } - - .col-md-offset-6 { - margin-left: 50%; - } - - .col-md-offset-7 { - margin-left: 58.33333333%; - } - - .col-md-offset-8 { - margin-left: 66.66666667%; - } - - .col-md-offset-9 { - margin-left: 75%; - } - - .col-md-offset-10 { - margin-left: 83.33333333%; - } - - .col-md-offset-11 { - margin-left: 91.66666667%; - } - - .start-md { - -webkit-box-pack: start; - -ms-flex-pack: start; - justify-content: flex-start; - text-align: start; - } - - .center-md { - -webkit-box-pack: center; - -ms-flex-pack: center; - justify-content: center; - text-align: center; - } - - .end-md { - -webkit-box-pack: end; - -ms-flex-pack: end; - justify-content: flex-end; - text-align: end; - } - - .top-md { - -webkit-box-align: start; - -ms-flex-align: start; - align-items: flex-start; - } - - .middle-md { - -webkit-box-align: center; - -ms-flex-align: center; - align-items: center; - } - - .bottom-md { - -webkit-box-align: end; - -ms-flex-align: end; - align-items: flex-end; - } - - .around-md { - -ms-flex-pack: distribute; - justify-content: space-around; - } - - .between-md { - -webkit-box-pack: justify; - -ms-flex-pack: justify; - justify-content: space-between; - } - - .first-md { - -webkit-box-ordinal-group: 0; - -ms-flex-order: -1; - order: -1; - } - - .last-md { - -webkit-box-ordinal-group: 2; - -ms-flex-order: 1; - order: 1; - } -} - -@media only screen and (min-width: 75em) { - .container { - width: 76rem; - } - - .col-lg, - .col-lg-1, - .col-lg-2, - .col-lg-3, - .col-lg-4, - .col-lg-5, - .col-lg-6, - .col-lg-7, - .col-lg-8, - .col-lg-9, - .col-lg-10, - .col-lg-11, - .col-lg-12, - .col-lg-offset-0, - .col-lg-offset-1, - .col-lg-offset-2, - .col-lg-offset-3, - .col-lg-offset-4, - .col-lg-offset-5, - .col-lg-offset-6, - .col-lg-offset-7, - .col-lg-offset-8, - .col-lg-offset-9, - .col-lg-offset-10, - .col-lg-offset-11, - .col-lg-offset-12 { - box-sizing: border-box; - -webkit-box-flex: 0; - -ms-flex: 0 0 auto; - flex: 0 0 auto; - padding-right: 0.5rem; - padding-left: 0.5rem; - } - - .col-lg { - -webkit-box-flex: 1; - -ms-flex-positive: 1; - flex-grow: 1; - -ms-flex-preferred-size: 0; - flex-basis: 0; - max-width: 100%; - } - - .col-lg-1 { - -ms-flex-preferred-size: 8.33333333%; - flex-basis: 8.33333333%; - max-width: 8.33333333%; - } - - .col-lg-2 { - -ms-flex-preferred-size: 16.66666667%; - flex-basis: 16.66666667%; - max-width: 16.66666667%; - } - - .col-lg-3 { - -ms-flex-preferred-size: 25%; - flex-basis: 25%; - max-width: 25%; - } - - .col-lg-4 { - -ms-flex-preferred-size: 33.33333333%; - flex-basis: 33.33333333%; - max-width: 33.33333333%; - } - - .col-lg-5 { - -ms-flex-preferred-size: 41.66666667%; - flex-basis: 41.66666667%; - max-width: 41.66666667%; - } - - .col-lg-6 { - -ms-flex-preferred-size: 50%; - flex-basis: 50%; - max-width: 50%; - } - - .col-lg-7 { - -ms-flex-preferred-size: 58.33333333%; - flex-basis: 58.33333333%; - max-width: 58.33333333%; - } - - .col-lg-8 { - -ms-flex-preferred-size: 66.66666667%; - flex-basis: 66.66666667%; - max-width: 66.66666667%; - } - - .col-lg-9 { - -ms-flex-preferred-size: 75%; - flex-basis: 75%; - max-width: 75%; - } - - .col-lg-10 { - -ms-flex-preferred-size: 83.33333333%; - flex-basis: 83.33333333%; - max-width: 83.33333333%; - } - - .col-lg-11 { - -ms-flex-preferred-size: 91.66666667%; - flex-basis: 91.66666667%; - max-width: 91.66666667%; - } - - .col-lg-12 { - -ms-flex-preferred-size: 100%; - flex-basis: 100%; - max-width: 100%; - } - - .col-lg-offset-0 { - margin-left: 0; - } - - .col-lg-offset-1 { - margin-left: 8.33333333%; - } - - .col-lg-offset-2 { - margin-left: 16.66666667%; - } - - .col-lg-offset-3 { - margin-left: 25%; - } - - .col-lg-offset-4 { - margin-left: 33.33333333%; - } - - .col-lg-offset-5 { - margin-left: 41.66666667%; - } - - .col-lg-offset-6 { - margin-left: 50%; - } - - .col-lg-offset-7 { - margin-left: 58.33333333%; - } - - .col-lg-offset-8 { - margin-left: 66.66666667%; - } - - .col-lg-offset-9 { - margin-left: 75%; - } - - .col-lg-offset-10 { - margin-left: 83.33333333%; - } - - .col-lg-offset-11 { - margin-left: 91.66666667%; - } - - .start-lg { - -webkit-box-pack: start; - -ms-flex-pack: start; - justify-content: flex-start; - text-align: start; - } - - .center-lg { - -webkit-box-pack: center; - -ms-flex-pack: center; - justify-content: center; - text-align: center; - } - - .end-lg { - -webkit-box-pack: end; - -ms-flex-pack: end; - justify-content: flex-end; - text-align: end; - } - - .top-lg { - -webkit-box-align: start; - -ms-flex-align: start; - align-items: flex-start; - } - - .middle-lg { - -webkit-box-align: center; - -ms-flex-align: center; - align-items: center; - } - - .bottom-lg { - -webkit-box-align: end; - -ms-flex-align: end; - align-items: flex-end; - } - - .around-lg { - -ms-flex-pack: distribute; - justify-content: space-around; - } - - .between-lg { - -webkit-box-pack: justify; - -ms-flex-pack: justify; - justify-content: space-between; - } - - .first-lg { - -webkit-box-ordinal-group: 0; - -ms-flex-order: -1; - order: -1; - } - - .last-lg { - -webkit-box-ordinal-group: 2; - -ms-flex-order: 1; - order: 1; - } -} diff --git a/website/static/css/funnel.css b/website/static/css/funnel.css deleted file mode 100644 index 825d6af..0000000 --- a/website/static/css/funnel.css +++ /dev/null @@ -1,245 +0,0 @@ -.global-header { - background-color: #23241f; - padding: .3rem .5rem; -} - -.global-header-container { - display: flex; - align-items: center; -} - -.global-header-container h1, -.global-header-container h2 { - margin: 0; - padding: 0; - color: white; -} -.global-header-container h1 { - font-size: 1.2rem; -} -.global-header-container h2 { - font-size: .9rem; -} - -.global-header-container, -.homepage { - max-width: 50rem; - margin: 0 auto; -} - -.homepage-intro .col { - padding: 0 1rem; -} -.homepage-intro li { - font-size: 0.8rem; -} - -.global-header-nav { - list-style-type: none; - padding: 0; - margin: 0; - margin-left: 3rem; - flex-grow: 1; - display: flex; - align-items: center; - justify-content: center; -} - -@media only screen and (max-width: 600px) { - .global-header-container, - .global-header-nav { - flex-direction: column; - } - .global-header-home, - .global-header-ohsucb { - margin: .5rem 0; - } - .global-header-nav { - margin: 0; - } - .global-header-nav li { - margin: .5rem 0; - } - - .homepage-demo .col h1, - .homepage-demo .col p { - margin-left: .3rem; - } - - .content { - padding: 0 .7rem; - } - - .sidebar { - padding: 1rem; - padding-bottom: 0; - } - - .sidebar-nav li { - margin: .3rem 0; - } -} - -.sidebar-nav { - font-size: .9rem; -} -.sidebar-nav span.intermediate { - color: #23241f; -} - -.global-header-nav li { - display: inline-block; - padding: 0 0.5rem; - font-size: .9rem; -} - -.global-header-nav li a { - color: white; -} - -.global-header a:hover, -.global-header a:hover h1, -.global-header a:hover h2, -.global-header-nav li a:hover { - color: #9ed9ff; - text-decoration: none; -} - -.lead { - font-size: .8rem; -} - -.lead a { - color: #b8d4e0; -} - -.homepage h2 { - text-align: center; - font-size: 1.5rem; - margin-bottom: 1rem; -} - -.homepage-lead { - background-color: #f1f1f1; - padding: 2rem 2rem 1rem 2rem; - border-radius: 10px; - margin-bottom: 1rem; - text-align: center; -} - -.homepage-lead-container { - max-width: 42rem; - margin: 0 auto; -} - -.homepage-lead h1 { - margin: 0; -} - -.homepage-footer { - height: 100px; -} - -.homepage-notice { - background-color: #fffcbf; - padding: 1rem 3rem; - border-radius: 10px; - margin-top: 0; - margin-bottom: 1rem; - text-align: center; -} - -.homepage-notice h4 { - font-size: 1rem; -} - -.homepage-notice h3, -.homepage-notice p { - margin: 0; -} - -.homepage-lead .download-button, -.homepage-lead .docs-button { - padding: 10px 30px; - border-radius: 5px; - border: 0; - color: white; - font-size: .7rem; - display: inline-block; - margin: 0.1rem 0.2rem; -} -.docs-button { - border-radius: 5px; - border: 0; - color: white; - font-size: .7rem; - background-color: #4ca0ea; - padding: 10px 30px; -} - -.homepage-lead .download-button { - background-color: #29b429; -} - -.homepage .row { - width: 100%; - margin-bottom: 20px; -} - -.homepage-demo { - margin-top: 3rem; - margin-bottom: 3rem; -} - -.homepage-demo h1.demo-header { - font-size: 1.5rem; - text-align: center; - margin-bottom: 2rem; -} - -.homepage-demo h1 { - font-size: 1rem; - margin: 0; -} - -.homepage-demo p { - margin: .7rem 0; - padding-right: .7rem; - font-size: .8rem; -} - -.homepage h3 { - font-size: 1rem; -} - -.homepage-more { - text-align: center; -} - -.homepage p { - font-size: .8rem; -} - -pre { - padding: 0; -} - -.homepage-demo .section { - margin-bottom: 2rem; -} - -.homepage-demo pre { - margin: 0; -} -.homepage-demo code { - width: 100%; - display: block; - font-size: .8rem; - border-radius: 0; -} - -.optional { - font-size: 1rem; - color: #aaa; - font-style: normal; -} diff --git a/website/static/css/highlight.min.css b/website/static/css/highlight.min.css deleted file mode 100644 index 7d8be18..0000000 --- a/website/static/css/highlight.min.css +++ /dev/null @@ -1 +0,0 @@ -.hljs{display:block;overflow-x:auto;padding:0.5em;background:#F0F0F0}.hljs,.hljs-subst{color:#444}.hljs-comment{color:#888888}.hljs-keyword,.hljs-attribute,.hljs-selector-tag,.hljs-meta-keyword,.hljs-doctag,.hljs-name{font-weight:bold}.hljs-type,.hljs-string,.hljs-number,.hljs-selector-id,.hljs-selector-class,.hljs-quote,.hljs-template-tag,.hljs-deletion{color:#880000}.hljs-title,.hljs-section{color:#880000;font-weight:bold}.hljs-regexp,.hljs-symbol,.hljs-variable,.hljs-template-variable,.hljs-link,.hljs-selector-attr,.hljs-selector-pseudo{color:#BC6060}.hljs-literal{color:#78A960}.hljs-built_in,.hljs-bullet,.hljs-code,.hljs-addition{color:#397300}.hljs-meta{color:#1f7199}.hljs-meta-string{color:#4d99bf}.hljs-emphasis{font-style:italic}.hljs-strong{font-weight:bold} \ No newline at end of file diff --git a/website/static/css/html5reset.css b/website/static/css/html5reset.css deleted file mode 100755 index 3bfbb3d..0000000 --- a/website/static/css/html5reset.css +++ /dev/null @@ -1,96 +0,0 @@ -/* html5reset.css - 01/11/2011 */ - -html, body, div, span, object, iframe, -h1, h2, h3, h4, h5, h6, p, blockquote, pre, -abbr, address, cite, code, -del, dfn, em, img, ins, kbd, q, samp, -small, strong, sub, sup, var, -b, i, -dl, dt, dd, ol, ul, li, -fieldset, form, label, legend, -table, caption, tbody, tfoot, thead, tr, th, td, -article, aside, canvas, details, figcaption, figure, -footer, header, hgroup, menu, nav, section, summary, -time, mark, audio, video { - margin: 0; - padding: 0; - border: 0; - outline: 0; - font-size: 100%; - vertical-align: baseline; - background: transparent; -} - -body { - line-height: 1; -} - -article,aside,details,figcaption,figure, -footer,header,hgroup,menu,nav,section { - display: block; -} - -nav ul { - list-style: none; -} - -blockquote, q { - quotes: none; -} - -blockquote:before, blockquote:after, -q:before, q:after { - content: ''; - content: none; -} - -a { - margin: 0; - padding: 0; - font-size: 100%; - vertical-align: baseline; - background: transparent; -} - -/* change colours to suit your needs */ -ins { - background-color: #ff9; - color: #000; - text-decoration: none; -} - -/* change colours to suit your needs */ -mark { - background-color: #ff9; - color: #000; - font-style: italic; - font-weight: bold; -} - -del { - text-decoration: line-through; -} - -abbr[title], dfn[title] { - border-bottom: 1px dotted; - cursor: help; -} - -table { - border-collapse: collapse; - border-spacing: 0; -} - -/* change border colour to suit your needs */ -hr { - display: block; - height: 1px; - border: 0; - border-top: 1px solid #cccccc; - margin: 1em 0; - padding: 0; -} - -input, select { - vertical-align: middle; -} \ No newline at end of file diff --git a/website/static/css/hybrid.css b/website/static/css/hybrid.css deleted file mode 100644 index 29735a1..0000000 --- a/website/static/css/hybrid.css +++ /dev/null @@ -1,102 +0,0 @@ -/* - -vim-hybrid theme by w0ng (https://github.com/w0ng/vim-hybrid) - -*/ - -/*background color*/ -.hljs { - display: block; - overflow-x: auto; - padding: 0.5em; - background: #1d1f21; -} - -/*selection color*/ -.hljs::selection, -.hljs span::selection { - background: #373b41; -} - -.hljs::-moz-selection, -.hljs span::-moz-selection { - background: #373b41; -} - -/*foreground color*/ -.hljs { - color: #c5c8c6; -} - -/*color: fg_yellow*/ -.hljs-title, -.hljs-name { - color: #f0c674; -} - -/*color: fg_comment*/ -.hljs-comment, -.hljs-meta, -.hljs-meta .hljs-keyword { - color: #707880; -} - -/*color: fg_red*/ -.hljs-number, -.hljs-symbol, -.hljs-literal, -.hljs-deletion, -.hljs-link { - color: #cc6666 -} - -/*color: fg_green*/ -.hljs-string, -.hljs-doctag, -.hljs-addition, -.hljs-regexp, -.hljs-selector-attr, -.hljs-selector-pseudo { - color: #b5bd68; -} - -/*color: fg_purple*/ -.hljs-attribute, -.hljs-code, -.hljs-selector-id { - color: #b294bb; -} - -/*color: fg_blue*/ -.hljs-keyword, -.hljs-selector-tag, -.hljs-bullet, -.hljs-tag { - color: #81a2be; -} - -/*color: fg_aqua*/ -.hljs-subst, -.hljs-variable, -.hljs-template-tag, -.hljs-template-variable { - color: #8abeb7; -} - -/*color: fg_orange*/ -.hljs-type, -.hljs-built_in, -.hljs-builtin-name, -.hljs-quote, -.hljs-section, -.hljs-selector-class { - color: #de935f; -} - -.hljs-emphasis { - font-style: italic; -} - -.hljs-strong { - font-weight: bold; -} diff --git a/website/static/css/monokai-sublime.css b/website/static/css/monokai-sublime.css deleted file mode 100644 index 2864170..0000000 --- a/website/static/css/monokai-sublime.css +++ /dev/null @@ -1,83 +0,0 @@ -/* - -Monokai Sublime style. Derived from Monokai by noformnocontent http://nn.mit-license.org/ - -*/ - -.hljs { - display: block; - overflow-x: auto; - padding: 0.5em; - background: #23241f; -} - -.hljs, -.hljs-tag, -.hljs-subst { - color: #f8f8f2; -} - -.hljs-strong, -.hljs-emphasis { - color: #a8a8a2; -} - -.hljs-bullet, -.hljs-quote, -.hljs-number, -.hljs-regexp, -.hljs-literal, -.hljs-link { - color: #ae81ff; -} - -.hljs-code, -.hljs-title, -.hljs-section, -.hljs-selector-class { - color: #a6e22e; -} - -.hljs-strong { - font-weight: bold; -} - -.hljs-emphasis { - font-style: italic; -} - -.hljs-keyword, -.hljs-selector-tag, -.hljs-name, -.hljs-attr { - color: #f92672; -} - -.hljs-symbol, -.hljs-attribute { - color: #66d9ef; -} - -.hljs-params, -.hljs-class .hljs-title { - color: #f8f8f2; -} - -.hljs-string, -.hljs-type, -.hljs-built_in, -.hljs-builtin-name, -.hljs-selector-id, -.hljs-selector-attr, -.hljs-selector-pseudo, -.hljs-addition, -.hljs-variable, -.hljs-template-variable { - color: #e6db74; -} - -.hljs-comment, -.hljs-deletion, -.hljs-meta { - color: #75715e; -} diff --git a/website/static/css/poole.css b/website/static/css/poole.css deleted file mode 100644 index 03f9338..0000000 --- a/website/static/css/poole.css +++ /dev/null @@ -1,283 +0,0 @@ -/* - * ___ - * /\_ \ - * _____ ___ ___\//\ \ __ - * /\ '__`\ / __`\ / __`\\ \ \ /'__`\ - * \ \ \_\ \/\ \_\ \/\ \_\ \\_\ \_/\ __/ - * \ \ ,__/\ \____/\ \____//\____\ \____\ - * \ \ \/ \/___/ \/___/ \/____/\/____/ - * \ \_\ - * \/_/ - * - * Designed, built, and released under MIT license by @mdo. Learn more at - * https://github.com/poole/poole. - */ - - -/* - * Contents - * - * Body resets - * Custom type - * Messages - * Container - * Masthead - * Posts and pages - * Pagination - * Reverse layout - * Themes - */ - - -/* - * Body resets - * - * Update the foundational and global aspects of the page. - */ - -* { - -webkit-box-sizing: border-box; - -moz-box-sizing: border-box; - box-sizing: border-box; -} - -html, -body { - margin: 0; - padding: 0; -} - -html { - font-family: "Helvetica Neue", Helvetica, Arial, sans-serif; - font-size: 16px; - line-height: 1.5; -} - -body { - color: #515151; - background-color: #fff; - -webkit-text-size-adjust: 100%; - -ms-text-size-adjust: 100%; -} - -/* No `:visited` state is required by default (browsers will use `a`) */ -a { - color: #268bd2; - text-decoration: none; -} -/* `:focus` is linked to `:hover` for basic accessibility */ -a:hover, -a:focus { - text-decoration: underline; -} - -/* Headings */ -h1, h2, h3, h4, h5, h6 { - margin-bottom: .5rem; - font-weight: bold; - line-height: 1.25; - color: #313131; - text-rendering: optimizeLegibility; -} -h1 { - font-size: 2rem; -} -h2 { - margin-top: 1rem; - font-size: 1.5rem; -} -h3 { - margin-top: 1.5rem; - font-size: 1.25rem; -} -h4, h5, h6 { - margin-top: 1rem; - font-size: 1rem; -} - -/* Body text */ -p { - margin-top: 0; - margin-bottom: 1rem; -} - -strong { - color: #303030; -} - - -/* Lists */ -ul, ol, dl { - margin-top: 0; - margin-bottom: 1rem; -} - -dt { - font-weight: bold; -} -dd { - margin-bottom: .5rem; -} - -/* Misc */ -hr { - position: relative; - margin: 1.5rem 0; - border: 0; - border-top: 1px solid #eee; - border-bottom: 1px solid #fff; -} - -abbr { - font-size: 85%; - font-weight: bold; - color: #555; - text-transform: uppercase; -} -abbr[title] { - cursor: help; - border-bottom: 1px dotted #e5e5e5; -} - -/* Code */ -code, -pre { - font-family: Menlo, Monaco, "Courier New", monospace; -} -code { - padding: .25em .5em; - font-size: 85%; - color: #bf616a; - background-color: #f9f9f9; - border-radius: 3px; -} -pre { - display: block; - margin-top: 0; - margin-bottom: 1rem; - padding: 1rem; - font-size: .8rem; - line-height: 1.4; - white-space: pre; - white-space: pre-wrap; - word-break: break-all; - word-wrap: break-word; - background-color: #f9f9f9; -} -pre code { - padding: 0; - font-size: 100%; - color: inherit; - background-color: transparent; -} -.highlight { - margin-bottom: 1rem; - border-radius: 4px; -} -.highlight pre { - margin-bottom: 0; -} - -/* Quotes */ -blockquote { - padding: .5rem 1rem; - margin: .8rem 0; - color: #7a7a7a; - border-left: .25rem solid #e5e5e5; -} -blockquote p:last-child { - margin-bottom: 0; -} -@media (min-width: 30em) { - blockquote { - padding-right: 5rem; - padding-left: 1.25rem; - } -} - -img { - display: block; - margin: 0 0 1rem; - border-radius: 5px; - max-width: 100%; -} - -/* Tables */ -table { - margin-bottom: 1rem; - width: 100%; - border: 1px solid #e5e5e5; - border-collapse: collapse; -} -td, -th { - padding: .25rem .5rem; - border: 1px solid #e5e5e5; -} -tbody tr:nth-child(odd) td, -tbody tr:nth-child(odd) th { - background-color: #f9f9f9; -} - - -/* - * Custom type - * - * Extend paragraphs with `.lead` for larger introductory text. - */ - -.lead { - font-size: 1.25rem; - font-weight: 300; -} - - -/* - * Messages - * - * Show alert messages to users. You may add it to single elements like a `

`, - * or to a parent if there are multiple elements to show. - */ - -.message { - margin-bottom: 1rem; - padding: 1rem; - color: #717171; - background-color: #f9f9f9; -} - - -/* - * Masthead - * - * Super small header above the content for site name and short description. - */ - -.masthead { - padding-top: 1rem; - padding-bottom: 1rem; - margin-bottom: 3rem; -} -.masthead-title { - margin-top: 0; - margin-bottom: 0; - color: #505050; -} -.masthead-title a { - color: #505050; -} -.masthead-title small { - font-size: 75%; - font-weight: 400; - color: #c0c0c0; - letter-spacing: 0; -} - - -/* Meta data line below post title */ -.post-date { - display: block; - margin-top: -.5rem; - margin-bottom: 1rem; - color: #9a9a9a; -} diff --git a/website/static/css/syntax.css b/website/static/css/syntax.css deleted file mode 100644 index 1264b87..0000000 --- a/website/static/css/syntax.css +++ /dev/null @@ -1,66 +0,0 @@ -.hll { background-color: #ffffcc } - /*{ background: #f0f3f3; }*/ -.c { color: #999; } /* Comment */ -.err { color: #AA0000; background-color: #FFAAAA } /* Error */ -.k { color: #006699; } /* Keyword */ -.o { color: #555555 } /* Operator */ -.cm { color: #0099FF; font-style: italic } /* Comment.Multiline */ -.cp { color: #009999 } /* Comment.Preproc */ -.c1 { color: #999; } /* Comment.Single */ -.cs { color: #999; } /* Comment.Special */ -.gd { background-color: #FFCCCC; border: 1px solid #CC0000 } /* Generic.Deleted */ -.ge { font-style: italic } /* Generic.Emph */ -.gr { color: #FF0000 } /* Generic.Error */ -.gh { color: #003300; } /* Generic.Heading */ -.gi { background-color: #CCFFCC; border: 1px solid #00CC00 } /* Generic.Inserted */ -.go { color: #AAAAAA } /* Generic.Output */ -.gp { color: #000099; } /* Generic.Prompt */ -.gs { } /* Generic.Strong */ -.gu { color: #003300; } /* Generic.Subheading */ -.gt { color: #99CC66 } /* Generic.Traceback */ -.kc { color: #006699; } /* Keyword.Constant */ -.kd { color: #006699; } /* Keyword.Declaration */ -.kn { color: #006699; } /* Keyword.Namespace */ -.kp { color: #006699 } /* Keyword.Pseudo */ -.kr { color: #006699; } /* Keyword.Reserved */ -.kt { color: #007788; } /* Keyword.Type */ -.m { color: #FF6600 } /* Literal.Number */ -.s { color: #d44950 } /* Literal.String */ -.na { color: #4f9fcf } /* Name.Attribute */ -.nb { color: #336666 } /* Name.Builtin */ -.nc { color: #00AA88; } /* Name.Class */ -.no { color: #336600 } /* Name.Constant */ -.nd { color: #9999FF } /* Name.Decorator */ -.ni { color: #999999; } /* Name.Entity */ -.ne { color: #CC0000; } /* Name.Exception */ -.nf { color: #CC00FF } /* Name.Function */ -.nl { color: #9999FF } /* Name.Label */ -.nn { color: #00CCFF; } /* Name.Namespace */ -.nt { color: #2f6f9f; } /* Name.Tag */ -.nv { color: #003333 } /* Name.Variable */ -.ow { color: #000000; } /* Operator.Word */ -.w { color: #bbbbbb } /* Text.Whitespace */ -.mf { color: #FF6600 } /* Literal.Number.Float */ -.mh { color: #FF6600 } /* Literal.Number.Hex */ -.mi { color: #FF6600 } /* Literal.Number.Integer */ -.mo { color: #FF6600 } /* Literal.Number.Oct */ -.sb { color: #CC3300 } /* Literal.String.Backtick */ -.sc { color: #CC3300 } /* Literal.String.Char */ -.sd { color: #CC3300; font-style: italic } /* Literal.String.Doc */ -.s2 { color: #CC3300 } /* Literal.String.Double */ -.se { color: #CC3300; } /* Literal.String.Escape */ -.sh { color: #CC3300 } /* Literal.String.Heredoc */ -.si { color: #AA0000 } /* Literal.String.Interpol */ -.sx { color: #CC3300 } /* Literal.String.Other */ -.sr { color: #33AAAA } /* Literal.String.Regex */ -.s1 { color: #CC3300 } /* Literal.String.Single */ -.ss { color: #FFCC33 } /* Literal.String.Symbol */ -.bp { color: #336666 } /* Name.Builtin.Pseudo */ -.vc { color: #003333 } /* Name.Variable.Class */ -.vg { color: #003333 } /* Name.Variable.Global */ -.vi { color: #003333 } /* Name.Variable.Instance */ -.il { color: #FF6600 } /* Literal.Number.Integer.Long */ - -.css .o, -.css .o + .nt, -.css .nt + .nt { color: #999; } diff --git a/website/static/css/theme.css b/website/static/css/theme.css deleted file mode 100644 index af44672..0000000 --- a/website/static/css/theme.css +++ /dev/null @@ -1,223 +0,0 @@ -/* - * Originally based on the Hyde theme, but heavily modified - # and who knows what original code remains. - * - * Designed, built, and released under MIT license by @mdo. Learn more at - * https://github.com/poole/hyde. - */ - - -/* - * Global resets - * - * Update the foundational and global aspects of the page. - */ - -html { - font-family: "PT Sans", Helvetica, Arial, sans-serif; - font-size: 20px; -} - -@media (max-width: 48em) { - .main { - font-size: 16px; - } -} - -/* SECTIONS ============================================================================= */ - -.section { - clear: both; - padding: 0px; - margin: 0px; -} - -/* GROUPING ============================================================================= */ - - -.group:before, -.group:after { - content:""; - display:table; -} -.group:after { - clear:both; -} -.group { - zoom:1; /* For IE 6/7 (trigger hasLayout) */ -} - -/* GRID COLUMN SETUP ==================================================================== */ - -.col { - display: block; - float:left; - margin: 1% 0 1% 1.6%; -} - -.col:first-child { margin-left: 0; } /* all browsers except IE6 and lower */ - - -/* REMOVE MARGINS AS ALL GO FULL WIDTH AT 600 PIXELS */ - -@media only screen and (max-width: 600px) { - .col { - margin: 1% 0 1% 0%; - } -} - -/* GRID OF THREE ============================================================================= */ - - -.span_3_of_3 { - width: 100%; -} - -.span_2_of_3 { - width: 66.13%; -} - -.span_1_of_3 { - width: 32.26%; -} - - -/* GO FULL WIDTH AT LESS THAN 600 PIXELS */ - -@media only screen and (max-width: 600px) { - .span_3_of_3 { - width: 100%; - } - .span_2_of_3 { - width: 100%; - } - .span_1_of_3 { - width: 100%; - } -} - -/* GRID OF TWELVE ============================================================================= */ - -.span_12_of_12 { - width: 100%; -} - -.span_11_of_12 { - width: 91.53%; -} - -.span_10_of_12 { - width: 83.06%; -} - -.span_9_of_12 { - width: 74.6%; -} - -.span_8_of_12 { - width: 66.13%; -} - -.span_7_of_12 { - width: 57.66%; -} - -.span_6_of_12 { - width: 49.2%; -} - -.span_5_of_12 { - width: 40.73%; -} - -.span_4_of_12 { - width: 32.26%; -} - -.span_3_of_12 { - width: 23.8%; -} - -.span_2_of_12 { - width: 15.33%; -} - -.span_1_of_12 { - width: 6.86%; -} - - -/* GO FULL WIDTH AT LESS THAN 600 PIXELS */ - -@media only screen and (max-width: 600px) { - .span_12_of_12 { - width: 100%; - } - .span_11_of_12 { - width: 100%; - } - .span_10_of_12 { - width: 100%; - } - .span_9_of_12 { - width: 100%; - } - .span_8_of_12 { - width: 100%; - } - .span_7_of_12 { - width: 100%; - } - .span_6_of_12 { - width: 100%; - } - .span_5_of_12 { - width: 100%; - } - .span_4_of_12 { - width: 100%; - } - .span_3_of_12 { - width: 100%; - } - .span_2_of_12 { - width: 100%; - } - .span_1_of_12 { - width: 100%; - } -} - - -/* - * Sidebar - * - * Flexible banner for housing site name, intro, and "footer" content. Starts - * out above content in mobile and later moves to the side with wider viewports. - */ - -.sidebar { - padding: 2rem; - padding-right: 0; - color: rgba(255,255,255,.5); - font-size: 1rem; -} - -.sidebar-nav { - padding-left: 0; - list-style: none; -} -.sidebar-nav-item { - display: block; -} -a.sidebar-nav-item:hover, -a.sidebar-nav-item:focus { - text-decoration: underline; -} -.sidebar-nav-item.active { - font-weight: bold; -} -.sidebar-nav-nested { - padding-left: 1rem; - margin-bottom: 0; -} diff --git a/website/static/sifter_example.png b/website/static/sifter_example.png deleted file mode 100644 index 284e0dd..0000000 Binary files a/website/static/sifter_example.png and /dev/null differ