Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
91 changes: 81 additions & 10 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,16 +43,87 @@ You can create your own knowledgebase with the minr command, available at https:

Syntax: scanoss [parameters] [TARGET]

Configuration:
* -w Treats TARGET as a .wfp file regardless of the actual file extension
* -s FILE Use assets specified in the provided JSON SBOM (CycloneDX/SPDX2.2 JSON format) as input to identification
* -b FILE Ignore matches to assets specified in the provided JSON SBOM (CycloneDX/SPDX2.2 JSON format)

Options:
* -t Tests engine performance
* -v Display version and exit
* -h Display this help and exit
* -d Enable debugging information
## Configuration Options

### Basic Configuration
* `-w, --wfp` - Process TARGET as a .wfp file, regardless of its actual extension
* `-H, --hpsm` - Enable High Precision Snippet Match mode (requires 'libhpsm.so' in the system)
* `-M, --max-snippets NUM` - Search for up to NUM different components in each file (maximum: 9)
* `-N, --max-components NUM` - Set maximum number of components (default: 5)
* `-T, --tolerance NUM` - Set snippet scanning tolerance percentage (default: 0.1)
* `-r, --rank NUM` - Set maximum component rank accepted (default: 11)
* `--max-files NUM` - Set maximum number of files to fetch during matching (default: 12000)
* `--min-match-hits NUM` - Set minimum snippet ID hits for a match (default: 3, disables auto-adjust)
* `--min-match-lines NUM` - Set minimum matched lines for a range (default: 10, disables auto-adjust)
* `--range-tolerance NUM` - Set max non-matched lines tolerated in a range (default: 5)
* `--ignore-file-ext` - Ignore file extension during snippet matching (default: honor extension)

### SBOM and Filtering
* `-s, --sbom FILE` - Include assets from a JSON SBOM file (CycloneDX/SPDX2.2 format) in identification
* `-b, --blacklist FILE` - Exclude matches from assets listed in JSON SBOM file (CycloneDX/SPDX2.2 format)
* `--force-snippet` - Same as "-b" but with forced snippet scanning
* `-c, --component HINT` - Add a component HINT to guide scan results

### Attribution and Licenses
* `-a, --attribution FILE` - Show attribution notices for the provided SBOM.json file
* `-k, --key KEY` - Show contents of the specified KEY file from MZ sources archive
* `-l, --license LICENSE` - Display OSADL metadata for the given SPDX license ID
* `-L, --full-license` - Enable full license report
* `-F, --flags FLAGS` - Set engine scanning flags (see Engine Flags section below)

### General Options
* `-t, --test` - Run engine performance tests
* `-v, --version` - Show version information and exit
* `-n, --name NAME` - Set database name (default: oss)
* `-h, --help` - Display help information and exit
* `-d, --debug` - Store debugging information to disk (/tmp)
* `-q, --quiet` - Suppress JSON output (show only debugging info via STDERR)

## Environment Variables

* `SCANOSS_MATCHMAP_MAX` - Set the snippet scanning match map size (default: 10000)
* `SCANOSS_FILE_CONTENTS_URL` - Define the API URL endpoint for sources. Source URL won't be reported if not defined

## Engine Scanning Flags

Configure the scanning engine using flags with the `-F/--flags` parameter. These settings can also be specified in `/etc/scanoss_flags.cfg`

| Flag | Setting |
|-------|-------------------------------------------------------|
| 1 | Disable snippet matching (default: enabled) |
| 2 | Enable snippet_ids (default: disabled) |
| 4 | Disable dependencies (default: enabled) |
| 8 | Disable licenses (default: enabled) |
| 16 | Disable copyrights (default: enabled) |
| 32 | Disable vulnerabilities (default: enabled) |
| 64 | Disable quality (default: enabled) |
| 128 | Disable cryptography (default: enabled) |
| 256 | Disable best match only (default: enabled) |
| 512 | Hide identified files (default: disabled) |
| 1024 | Enable download_url (default: disabled) |
| 2048 | Enable "use path hint" logic (default: disabled) |
| 4096 | Disable extended server stats (default: enabled) |
| 8192 | Disable health layer (default: enabled) |
| 16384 | Enable high accuracy, slower scan (default: disabled) |

### Examples:
```bash
# Scan DIRECTORY without license and dependency data
scanoss -F 12 DIRECTORY
scanoss --flags 12 DIRECTORY

# Scan TARGET including SBOM assets
scanoss --sbom my_sbom.json TARGET

# Scan with custom snippet matching parameters
scanoss --min-match-hits 5 --min-match-lines 15 TARGET

# Scan with custom range tolerance
scanoss --range-tolerance 10 TARGET

# Ignore file extensions during matching
scanoss --ignore-file-ext TARGET
```

# File matching logic

Expand Down
39 changes: 38 additions & 1 deletion inc/component.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#define __COMPONENT_H

#include "scanoss.h"
#include "limits.h"

#define COMPONENT_DEFAULT_RANK 999 //default rank for components without rank information
#define COMPONENT_RANK_SELECTION_MAX 8 //max rank to be considered in component selection
Expand All @@ -18,11 +19,13 @@ extern int component_rank_max;
*
*/
enum {
IDENTIFIED_FILTERED = -1,
IDENTIFIED_NONE = 0,
IDENTIFIED_PURL,
IDENTIFIED_PURL_VERSION
IDENTIFIED_PURL_VERSION,
};


typedef struct component_data_t
{
char * vendor; /* component vendor */
Expand Down Expand Up @@ -62,6 +65,40 @@ typedef struct component_data_t
int third_party_rank; /* Saves third party ranking*/
} component_data_t;

typedef struct keywords
{
int count;
char word[MAX_FIELD_LN];
} keywords;


typedef struct file_recordset
{
uint8_t url_id[MD5_LEN];
char path[MAX_FILE_PATH];
int path_ln;
bool external;
} file_recordset;

typedef struct len_rank
{
int id;
int len;
} len_rank;

typedef struct component_item
{
char * vendor;
char * component;
char * purl;
char * version;
char * license;
} component_item;

extern component_item *ignore_components;
extern component_item *declared_components;


component_data_t * component_init(void);
void component_data_free(component_data_t * data);
bool fill_component(component_data_t * component, uint8_t *url_key, char *file_path, uint8_t *url_record);
Expand Down
23 changes: 11 additions & 12 deletions inc/limits.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,25 +34,24 @@
#define MAX_QUERY_RESPONSE (1024 * 1024 * 8)
#define SLOW_QUERY_LIMIT_IN_USEC 2000000
#define MAX_JSON_VALUE_LEN 4096
#define MAX_FILE_PATH 1024
#define FETCH_MAX_FILES_DEFAULT 12000
#define MIN_FILE_SIZE 256 // files below this size will be ignored
#define CRC_LIST_LEN 1024 // list of crc checksums to avoid metadata duplicates

/* Snippets */
#define DEFAULT_MATCHMAP_FILES 10000 // Default number of files evaluated in snippet matching
#define MAX_MATCHMAP_FILES (DEFAULT_MATCHMAP_FILES * 10) // Max number of files evaluated in snippet matching to prevent performance issues
#define MIN_LINES_COVERAGE 0.8
#define SKIP_SNIPPETS_IF_FILE_BIGGER (1024 * 1024 * 4)
#define MAX_SNIPPETS_SCANNED 2500

#define SNIPPETS_DEFAULT_RANGE_TOLERANCE 5 /** A maximum number of non-matched lines tolerated inside a matching range */
#define SNIPPETS_DEFAULT_MIN_MATCH_LINES 5 /** Minimum number of lines matched for a match range to be acepted */
#define SNIPPETS_DEFAULT_MIN_MATCH_HITS 2 /** Minimum number of snippet ID hits to produce a snippet match*/
#define SNIPPETS_DEFAULT_ADJUST_TOLERANCE true /** Adjust tolerance based on file size */
#define SNIPPETS_DEFAULT_HONOR_FILE_EXTENSION true /** Honor file extension during snippet matching */
#define DEFAULT_FETCH_MAX_FILES 12000 /** Maximum number of files to fetch during component matching */
/* Variables */

/* During snippet scanning, when a wfp (with more than consecutive_threshold wfps) produces a score higher
than consecutive_score by consecutive_hits in a row, the scan will skip consecutive_jump snippets */
extern int consecutive_score;
extern int consecutive_hits;
extern int consecutive_jump;
extern int consecutive_threshold;

extern int range_tolerance; // A maximum number of non-matched lines tolerated inside a matching range
extern int min_match_lines; // Minimum number of lines matched for a match range to be acepted
extern int min_match_hits; // Minimum number of snippet ID hits to produce a snippet match
extern int fetch_max_files; // Maximum number of files to fetch during component matching

#endif
1 change: 0 additions & 1 deletion inc/match.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ typedef struct match_data_t
uint32_t * crclist; /* pointer to crc list used in for processing */
char * quality_text; /* quality string used in json output format */
char * crytography_text; /* crytography string used in json output format */
uint16_t from;
} match_data_t;

match_data_t * match_data_copy(match_data_t * in);
Expand Down
1 change: 1 addition & 0 deletions inc/parse.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include <stdint.h>
#include <stdbool.h>
#include "scanoss.h"
#include "component.h"

void extract_csv(char *out, char *in, int n, long limit);
void lowercase(char *word);
Expand Down
12 changes: 9 additions & 3 deletions inc/scan.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,16 +66,22 @@ typedef struct scan_data_t
int max_matchmap_size;
bool printed_succed;
bool windows_line_endings;
bool snippet_adjust_tolerance; // Enable adjust snippet tolerance based on file size
int component_ranking_threshold; //-1 = disable ranking. 0 = all accepted
int snippet_min_hits;
int snippet_min_lines;
int snippet_range_tolerance;
bool snippet_honor_file_extension;
} scan_data_t;

extern bool force_snippet_scan;

scan_data_t * scan_data_init(char *target, int max_snippets, int max_components);
scan_data_t * scan_data_init(char *target, int max_snippets, int max_components, bool adjust_tolerance, int component_ranking_threshold, int snippet_min_hits, int snippet_min_lines, int snippet_range_tolerance, bool snippet_honor_file_extension);
void scan_data_free (scan_data_t * scan);

void ldb_scan(scan_data_t * scan);
match_t ldb_scan_snippets(scan_data_t *scan_ptr);
int wfp_scan(char * path, int scan_max_snippets, int scan_max_components);
int hash_scan(char *path, int scan_max_snippets, int scan_max_components);
int wfp_scan(char * path, int scan_max_snippets, int scan_max_components, bool adjust_tolerance, int component_ranking_threshold, int snippet_min_hits, int snippet_min_lines, int snippet_range_tolerance, bool snippet_honor_file_extension);
int hash_scan(char *path, int scan_max_snippets, int scan_max_components, bool adjust_tolerance, int component_ranking_threshold, int snippet_min_hits, int snippet_min_lines, int snippet_range_tolerance, bool snippet_honor_file_extension);

#endif
44 changes: 1 addition & 43 deletions inc/scanoss.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,16 +30,10 @@
#include <unistd.h>
#include "limits.h"

#define MAX_FILE_PATH 1024
#define FETCH_MAX_FILES 12000
#define MIN_FILE_SIZE 256 // files below this size will be ignored
#define CRC_LIST_LEN 1024 // list of crc checksums to avoid metadata duplicates
#define SNIPPET_LINE_TOLERANCE 10

#define WFP_LN 4
#define WFP_REC_LN 18

#define SCANOSS_VERSION "5.4.19"
#define SCANOSS_VERSION "5.4.20"

/* Log files */
#define SCAN_LOG "/tmp/scanoss_scan.log"
Expand Down Expand Up @@ -84,40 +78,8 @@ extern const char *dependency_sources[];

typedef enum {MATCH_NONE, MATCH_FILE, MATCH_SNIPPET, MATCH_BINARY} match_t;

typedef struct keywords
{
int count;
char word[MAX_FIELD_LN];
} keywords;


typedef struct file_recordset
{
uint8_t url_id[MD5_LEN];
char path[MAX_FILE_PATH];
int path_ln;
bool external;
} file_recordset;

typedef struct len_rank
{
int id;
int len;
} len_rank;

typedef struct component_item
{
char * vendor;
char * component;
char * purl;
char * version;
char * license;
} component_item;


extern long microseconds_start;
extern int map_rec_len;
extern bool match_extensions;

/*component hint hold the last component matched/guessed */
extern char * component_hint;
Expand All @@ -141,12 +103,8 @@ extern struct ldb_table oss_notices;


extern bool first_file;
extern int max_vulnerabilities;

extern char *ignored_assets;
extern component_item *ignore_components;
extern component_item *declared_components;


/* Prototype declarations */

Expand Down
2 changes: 1 addition & 1 deletion src/binary_scan.c
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,7 @@ int binary_scan(char * input)
char * file_name = field_n(3,input);
int target_len = strchr(file_name,',') - file_name;
char * target = strndup(file_name, target_len);
scan_data_t * scan = scan_data_init(target, 1, 1);
scan_data_t * scan = scan_data_init(target, 1, 1, true, 0, 3, 5, SNIPPETS_DEFAULT_RANGE_TOLERANCE, false);
free(target);
memcpy(scan->md5, bin_md5, MD5_LEN);
scan->match_type = MATCH_FILE;
Expand Down
6 changes: 3 additions & 3 deletions src/component.c
Original file line number Diff line number Diff line change
Expand Up @@ -253,7 +253,7 @@ bool fill_component(component_data_t *component, uint8_t *url_key, char *file_pa
extract_csv(license, (char *)url_record, 5, sizeof(license));
extract_csv(purl, (char *)url_record, 6, sizeof(purl));
extract_csv(url, (char *)url_record, 7, sizeof(url));
extract_csv(rank, (char *)url_record, 13, sizeof(rank)); //extracts the rank field if available
extract_csv(rank, (char *)url_record, 14, sizeof(rank)); //extracts the rank field if available
/* Fill url stats if these are available*/
for (int i = 0; i < 5; i++) {
char stat[16] = "\0";
Expand Down Expand Up @@ -292,10 +292,10 @@ bool fill_component(component_data_t *component, uint8_t *url_key, char *file_pa
MD5((uint8_t *)component->purls[0], strlen(component->purls[0]), component->purls_md5[0]);
}
component->age = -1;
if (*rank && strlen(rank) < 3)
if (*rank)
{
component->rank = atoi(rank);
//scanlog("Component rank from DB: %d\n", component->rank);
//scanlog("Component rank from DB: %s- %d\n", rank, component->rank);
}
else
component->rank = COMPONENT_DEFAULT_RANK;
Expand Down
2 changes: 1 addition & 1 deletion src/debug.c
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ void scan_benchmark()

for (int f = 0; f < total_files ; f++)
{
scan_data_t * scan = scan_data_init("pseudo_file", 0, 0);
scan_data_t * scan = scan_data_init("pseudo_file", 0, 0, true, 0, 3, 5, SNIPPETS_DEFAULT_RANGE_TOLERANCE, false);
scan->preload = true;
memcpy(scan->md5, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0", MD5_LEN);
strcpy(scan->file_size, "1024");
Expand Down
6 changes: 3 additions & 3 deletions src/file.c
Original file line number Diff line number Diff line change
Expand Up @@ -192,8 +192,8 @@ int dir_count(char *path)
bool collect_all_files(uint8_t *key, uint8_t *subkey, int subkey_ln, uint8_t *raw_data, uint32_t datalen, int iteration, void *ptr)
{

/* Leave if FETCH_MAX_FILES is reached */
if (iteration >= FETCH_MAX_FILES) return true;
/* Leave if fetch_max_files is reached */
if (iteration >= fetch_max_files) return true;

/* Ignore path lengths over the limit */
if (!datalen || datalen >= (MD5_LEN + MAX_FILE_PATH)) return false;
Expand Down Expand Up @@ -231,7 +231,7 @@ bool count_all_files(uint8_t *key, uint8_t *subkey, int subkey_ln, uint8_t *raw_

int * count = ptr;
*count = iteration;
if (iteration >= FETCH_MAX_FILES)
if (iteration >= fetch_max_files)
{
return true;
}
Expand Down
Loading
Loading