From 2fb53f39ae3e9cc80b1fa7c7328f0113d175d5d0 Mon Sep 17 00:00:00 2001 From: Tomas Gatial Date: Thu, 5 Feb 2026 14:20:36 +0100 Subject: [PATCH] feat: domain and ip reputation --- .gitignore | 6 +- docs/changelog.md | 30 +++ s1-validin.yaml | 34 +++- storm/commands/s1.validin.reputation.storm | 2 + storm/modules/s1.validin.api.storm | 13 ++ .../s1.validin.ingest.reputation.storm | 141 +++++++++++++ storm/modules/s1.validin.model.storm | 129 ++++++++++++ storm/modules/s1.validin.storm | 38 +++- ...on_domain_reputation_quick_github.com.json | 169 +++++++++++++++ ...on_domain_reputation_quick_google.com.json | 167 +++++++++++++++ ...reputation_quick_micsrosoftonline.com.json | 62 ++++++ .../axon_ip_reputation_quick_1.1.1.1.json | 82 ++++++++ .../axon_ip_reputation_quick_8.8.8.8.json | 126 ++++++++++++ test/test_reputation.py | 192 ++++++++++++++++++ 14 files changed, 1185 insertions(+), 6 deletions(-) create mode 100644 storm/commands/s1.validin.reputation.storm create mode 100644 storm/modules/s1.validin.ingest.reputation.storm create mode 100644 test/mock/axon_domain_reputation_quick_github.com.json create mode 100644 test/mock/axon_domain_reputation_quick_google.com.json create mode 100644 test/mock/axon_domain_reputation_quick_micsrosoftonline.com.json create mode 100644 test/mock/axon_ip_reputation_quick_1.1.1.1.json create mode 100644 test/mock/axon_ip_reputation_quick_8.8.8.8.json create mode 100644 test/test_reputation.py diff --git a/.gitignore b/.gitignore index e29c947..41cc0b0 100644 --- a/.gitignore +++ b/.gitignore @@ -29,4 +29,8 @@ dist/ .ruff_cache/ # Logs -*.log \ No newline at end of file +*.log + +# Temporary files +.tmp/ +tmp/ \ No newline at end of file diff --git a/docs/changelog.md b/docs/changelog.md index 132b1d0..3934d66 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -1,5 +1,35 @@ # Changelog +## [3.4.0] - 2026-02-05 + +### Features + +- Add `s1.validin.reputation` command for domain and IP reputation checks +- Add IP reputation ingestion with ASN and geolocation data +- Add domain reputation ingestion with ranking data (Majestic, Tranco, Umbrella, Anchors) +- Add new data model extensions: + - `inet:fqdn._s1:validin:verdict` - Reputation verdict for domains + - `inet:fqdn._s1:validin:reputation` - Reputation score for domains + - `inet:fqdn._s1:validin:rank:majestic` - Majestic Million rank + - `inet:fqdn._s1:validin:rank:tranco` - Tranco Top 1M rank + - `inet:fqdn._s1:validin:rank:umbrella` - Umbrella Top 1M rank + - `inet:fqdn._s1:validin:rank:anchors` - Validin Top Anchors rank + - `inet:ipv4._s1:validin:verdict` - Reputation verdict for IPv4 + - `inet:ipv4._s1:validin:reputation` - Reputation score for IPv4 + - `inet:ipv4._s1:validin:rank:pivot_count` - Pivot count for IPv4 + - `inet:ipv4._s1:validin:rank:top_a` - Validin Top A rank for IPv4 + - `inet:ipv6._s1:validin:verdict` - Reputation verdict for IPv6 + - `inet:ipv6._s1:validin:reputation` - Reputation score for IPv6 + - `inet:ipv6._s1:validin:rank:pivot_count` - Pivot count for IPv6 + - `inet:ipv6._s1:validin:rank:top_a` - Validin Top A rank for IPv6 + +### Improvements + +- IP reputation now always parses ASN and geolocation information +- Enhanced ASN modeling using `inet:asnet4` and `inet:asnet6` forms for CIDR-to-ASN relationships +- Improved ownership data processing to handle multiple ownership entries +- Added comprehensive test coverage for reputation functionality + ## [3.3.1] - 2025-11-19 ### Fixes diff --git a/s1-validin.yaml b/s1-validin.yaml index 03d8a38..0cab6c7 100644 --- a/s1-validin.yaml +++ b/s1-validin.yaml @@ -1,5 +1,5 @@ name: s1-validin -version: 3.3.2 +version: 3.4.0 synapse_version: '>=2.144.0,<3.0.0' @@ -42,6 +42,7 @@ modules: - name: s1.validin.ingest.dns - name: s1.validin.ingest.pivot - name: s1.validin.ingest.registration + - name: s1.validin.ingest.reputation - name: s1.validin.model - name: s1.validin.privsep asroot:perms: @@ -282,6 +283,33 @@ commands: default: pilot.validin.com help: The hostname to use. + - name: s1.validin.reputation + descr: | + Get reputation information for domains and IPs. + + This command accepts the following input node forms: + - inet:fqdn + - inet:ipv4 + - inet:ipv6 + + // get reputation for a domain + inet:fqdn=example.com | s1.validin.reputation + + // get reputation for an IP address + inet:ipv4=8.8.8.8 | s1.validin.reputation + + // get reputation with IP location and ASN data + inet:ipv4=8.8.8.8 | s1.validin.reputation + + // get reputation and yield the enriched nodes + inet:fqdn=example.com | s1.validin.reputation --yield + + cmdargs: + - - --yield + - type: bool + action: store_true + help: Yield created nodes. + - name: s1.validin.whois descr: | Get WHOIS records. @@ -347,6 +375,10 @@ optic: storm: s1.validin.http.pivot descr: Pivot from HTTP content or a hash to related artifacts. forms: [inet:http:request, hash:md5, hash:sha1, hash:sha256] + - name: reputation + storm: s1.validin.reputation + descr: Get reputation information for domains and IPs. + forms: [inet:fqdn, inet:ipv4, inet:ipv6] - name: whois storm: s1.validin.whois descr: Get WHOIS records for a domain or email. diff --git a/storm/commands/s1.validin.reputation.storm b/storm/commands/s1.validin.reputation.storm new file mode 100644 index 0000000..f86a942 --- /dev/null +++ b/storm/commands/s1.validin.reputation.storm @@ -0,0 +1,2 @@ +$validin = $lib.import(s1.validin) +divert $cmdopts.yield $validin.reputation($node, $cmdopts) diff --git a/storm/modules/s1.validin.api.storm b/storm/modules/s1.validin.api.storm index 2aeaec8..b788eed 100644 --- a/storm/modules/s1.validin.api.storm +++ b/storm/modules/s1.validin.api.storm @@ -283,4 +283,17 @@ function ip_crawl_history( limit=$limit ).records.crawlr ) +} + + +// reputation endpoints +function domain_reputation(fqdn) { + $uri = `axon/domain/reputation/quick/{$fqdn}` + return($endpoint($uri)) +} + + +function ip_reputation(ip) { + $uri = `axon/ip/reputation/quick/{$ip}` + return($endpoint($uri)) } \ No newline at end of file diff --git a/storm/modules/s1.validin.ingest.reputation.storm b/storm/modules/s1.validin.ingest.reputation.storm new file mode 100644 index 0000000..7abdfb9 --- /dev/null +++ b/storm/modules/s1.validin.ingest.reputation.storm @@ -0,0 +1,141 @@ +$ingest = $lib.import(s1.validin.ingest) + + +function extract_asn_int(asn_string) { + $asn_parts = $asn_string.split(" ") + $asn_num = $asn_parts.index(1) + return($lib.cast(int, $asn_num)) +} + + +function create_asn_from_ownership(ip_form, ownership) { + $source = $ingest.get_source() + $asn_int = $extract_asn_int($ownership.asn) + + //TODO :org synthesis + [inet:asn=$asn_int :name?=$ownership.owner] + + for $cidr in $ownership.cidrs { + if ($ip_form = "inet:ipv4") { + [(inet:asnet4=($asn_int, $cidr))] + } elif ($ip_form = "inet:ipv6") { + [(inet:asnet6=($asn_int, $cidr))] + } + } +} + + +function domain_reputation(fqdn, data) { + // model domain reputation data from validin + + $source = $ingest.get_source() + + $domain_annotation_extended_proprty_value_map = ({ + 'MAGESTIC_MILLION_RANK':'majestic', + 'TRANCO_TOP_1M_RANK':'tranco', + 'UMBRELLA_TOP_1M_RANK':'umbrella', + 'VALIDIN_TOP_ANCHORS_RANK':'anchors', + }) + + function annotate_fqdn(annotation) { + /* + Validin FQDN Anontation may be keyed with parnet fqdn, so we need to lift the node first + */ + $extended_property_key = $domain_annotation_extended_proprty_value_map.`{$annotation.description}` + if $extended_property_key { + [inet:fqdn=$annotation.key] + $node.data.set(`s1:validin:rank:{$extended_property_key}`, $annotation) + $node.props.set(`_s1:validin:rank:{$extended_property_key}`, $annotation.value) + } + } + + try { + // Create/lift the fqdn node + [ + inet:fqdn=$fqdn + :_s1:validin:verdict?=$data.verdict + :_s1:validin:reputation?=$data.score + ] + + // Store raw response data + $node.data.set(s1:validin:reputation, $data) + + // Extract ranks from annotations array + if ($data.annotations != $lib.null) { + for $annotation in $data.annotations { + yield $annotate_fqdn($annotation) + } + } | uniq | [ <(seen)+ $source ] + } catch * as error { + $lib.warn(`Failed to model domain reputation for {$fqdn}: {$error}`) + } +} + + +function ip_reputation(ip, data) { + // model IP reputation data from validin + + $source = $ingest.get_source() + + try { + $is_ipv6 = $ip.find(":") + + if $is_ipv6 { + [ inet:ipv6=$ip ] + } else { + [ inet:ipv4=$ip ] + } + + $node.data.set(s1:validin:reputation, $data) + + [ + :_s1:validin:verdict?=$data.verdict + :_s1:validin:reputation?=$data.score + ] + + // Extract ranks from annotations array + if ($data.annotations != $lib.null) {{ + for $annotation in $data.annotations { + switch $annotation.description { + "PIVOT_COUNT_IP": { + [:_s1:validin:rank:pivot_count?=$annotation.value] + } + "VALIDIN_TOP_A_RANK": { + [:_s1:validin:rank:top_a?=$annotation.value] + } + } + } + }} + + + // Set location from latitude/longitude + if ($data.informational.location != $lib.null) { + $lat = $lib.cast(float, $data.informational.location.latitude) + $lon = $lib.cast(float, $data.informational.location.longitude) + [ + :latlong=($lat, $lon) + :loc?=$data.informational.location.country + ] + } + + // Extract ASN from ownership + if ($data.informational.ownership != $lib.null) { + $ip_form = $node.form() + // TODO: handle nested ownership whenever synapse starts supporting it (see test/mock/axon_ip_reputation_quick_8.8.8.8.json) + $ownership = $data.informational.ownership.index(0) + + if ($ownership.asn != $lib.null) { + [:asn=$extract_asn_int($ownership.asn)] + } + + for $owner in $data.informational.ownership { + yield $create_asn_from_ownership($ip_form, $owner) + + } + } + + | uniq | [<(seen)+ $source] + } catch * as error { + $lib.warn(`Failed to model IP reputation for {$ip}: {$error}`) + } +} diff --git a/storm/modules/s1.validin.model.storm b/storm/modules/s1.validin.model.storm index c97a402..1d59fb1 100644 --- a/storm/modules/s1.validin.model.storm +++ b/storm/modules/s1.validin.model.storm @@ -62,4 +62,133 @@ function update_forms() { ({"doc":'Favicon bytes observed by Validin crawler.' }) ) } + + // Reputation rank properties for inet:fqdn + if (not $existing_props.has('inet:fqdn:_s1:validin:rank:majestic')) { + $lib.model.ext.addFormProp( + inet:fqdn, + '_s1:validin:rank:majestic', + (int, ({})), + ({"doc":'The average rank of a domain on the Majestic top-N list over 14 days. A domain must remain on this list for 14 continuous days to be considered "stable".' }) + ) + } + + if (not $existing_props.has('inet:fqdn:_s1:validin:rank:tranco')) { + $lib.model.ext.addFormProp( + inet:fqdn, + '_s1:validin:rank:tranco', + (int, ({})), + ({"doc":'The average rank of a domain on the Tranco top-N list over 14 days. A domain must remain on this list for 14 continuous days to be considered "stable".' }) + ) + } + + if (not $existing_props.has('inet:fqdn:_s1:validin:rank:umbrella')) { + $lib.model.ext.addFormProp( + inet:fqdn, + '_s1:validin:rank:umbrella', + (int, ({})), + ({"doc":'The average rank of a domain on the Umbrella top-N list over 14 days. A domain must remain on this list for 14 continuous days to be considered "stable".' }) + ) + } + + if (not $existing_props.has('inet:fqdn:_s1:validin:rank:anchors')) { + $lib.model.ext.addFormProp( + inet:fqdn, + '_s1:validin:rank:anchors', + (int, ({})), + ({"doc":'Validin Top Anchor Links rank. Represents the domain ranking based on anchor link popularity.' }) + ) + } + + if (not $existing_props.has('inet:fqdn:_s1:validin:verdict')) { + $lib.model.ext.addFormProp( + inet:fqdn, + '_s1:validin:verdict', + (str, ({})), + ({"doc":'Validin reputation verdict observed by Validin.' }) + ) + } + + if (not $existing_props.has('inet:fqdn:_s1:validin:reputation')) { + $lib.model.ext.addFormProp( + inet:fqdn, + '_s1:validin:reputation', + (float, ({})), + ({"doc":'Validin reputation score observed by Validin.' }) + ) + } + + // Reputation rank properties for inet:ipv4 + if (not $existing_props.has('inet:ipv4:_s1:validin:rank:pivot_count')) { + $lib.model.ext.addFormProp( + inet:ipv4, + '_s1:validin:rank:pivot_count', + (int, ({})), + ({"doc":"The Validin estimated count of this pivot's popularity. Pivots with high estimated counts are less likely to be useful." }) + ) + } + + if (not $existing_props.has('inet:ipv4:_s1:validin:rank:top_a')) { + $lib.model.ext.addFormProp( + inet:ipv4, + '_s1:validin:rank:top_a', + (int, ({})), + ({"doc":"Validin Top IPv4 Addresses rank. Represents the IP ranking in Validin's top IPv4 addresses list." }) + ) + } + + if (not $existing_props.has('inet:ipv4:_s1:validin:verdict')) { + $lib.model.ext.addFormProp( + inet:ipv4, + '_s1:validin:verdict', + (str, ({})), + ({"doc":"Validin reputation verdict observed by Validin." }) + ) + } + + if (not $existing_props.has('inet:ipv4:_s1:validin:reputation')) { + $lib.model.ext.addFormProp( + inet:ipv4, + '_s1:validin:reputation', + (float, ({})), + ({"doc":"Validin reputation score observed by Validin." }) + ) + } + + // Reputation rank properties for inet:ipv6 + if (not $existing_props.has('inet:ipv6:_s1:validin:rank:pivot_count')) { + $lib.model.ext.addFormProp( + inet:ipv6, + '_s1:validin:rank:pivot_count', + (int, ({})), + ({"doc":"The Validin estimated count of this pivot's popularity. Pivots with high estimated counts are less likely to be useful." }) + ) + } + + if (not $existing_props.has('inet:ipv6:_s1:validin:rank:top_a')) { + $lib.model.ext.addFormProp( + inet:ipv6, + '_s1:validin:rank:top_a', + (int, ({})), + ({"doc":"Validin Top IPv6 Addresses rank. Represents the IP ranking in Validin's top IPv6 addresses list." }) + ) + } + + if (not $existing_props.has('inet:ipv6:_s1:validin:verdict')) { + $lib.model.ext.addFormProp( + inet:ipv6, + '_s1:validin:verdict', + (str, ({})), + ({"doc":"Validin reputation verdict observed by Validin." }) + ) + } + + if (not $existing_props.has('inet:ipv6:_s1:validin:reputation')) { + $lib.model.ext.addFormProp( + inet:ipv6, + '_s1:validin:reputation', + (float, ({})), + ({"doc":"Validin reputation score observed by Validin." }) + ) + } } diff --git a/storm/modules/s1.validin.storm b/storm/modules/s1.validin.storm index 69a8a19..a5dd666 100644 --- a/storm/modules/s1.validin.storm +++ b/storm/modules/s1.validin.storm @@ -5,6 +5,7 @@ $ingest_ctstream = $lib.import(s1.validin.ingest.ctstream) $ingest_dns = $lib.import(s1.validin.ingest.dns) $ingest_pivot = $lib.import(s1.validin.ingest.pivot) $ingest_registration = $lib.import(s1.validin.ingest.registration) +$ingest_reputation = $lib.import(s1.validin.ingest.reputation) function dns(fqdn_or_ip, options) { @@ -292,16 +293,16 @@ function pivot_http(node, options) { $hashes.append($node.props.`_s1:validin:response:class_1:md5`) } } - + if ($form = "hash:sha1" or $form = "hash:md5" or $form = "hash:sha256") { $hashes.append($node.repr()) } if ($lib.debug or $options.dry_run) {$lib.print(`Hashes: {$hashes}`)} if ($lib.debug or $options.dry_run) {$lib.print(`Limit: {$options.limit}`)} - + $all_records = ([]) - + for $hash in $hashes { $records = $api.hash_pivots( $hash, @@ -318,6 +319,35 @@ function pivot_http(node, options) { $ingest_pivot.pivot_stats($records) } } - + yield $all_records | uniq } + + +function reputation(node, options) { + // get reputation information for domains and IPs + + $form = $node.form() + + switch $form { + "inet:fqdn": {yield $reputation_domain($node.repr(), $options)} + ("inet:ipv4", "inet:ipv6"): {yield $reputation_ip($node.repr(), $options)} + *: {$lib.warn(`{$form} not supported`)} + } +} + + +function reputation_domain(fqdn, options) { + // get domain reputation + + $data = $api.domain_reputation($fqdn) + yield $ingest_reputation.domain_reputation($fqdn, $data) +} + + +function reputation_ip(ip, options) { + // get IP reputation + + $data = $api.ip_reputation($ip) + yield $ingest_reputation.ip_reputation($ip, $data) +} diff --git a/test/mock/axon_domain_reputation_quick_github.com.json b/test/mock/axon_domain_reputation_quick_github.com.json new file mode 100644 index 0000000..f03f654 --- /dev/null +++ b/test/mock/axon_domain_reputation_quick_github.com.json @@ -0,0 +1,169 @@ +{ + "query_opts": { + "limit": 20000, + "wildcard": false + }, + "informational": { + "structure": { + "fqdn": "github.com", + "e2ld": "github.com", + "etld": "com" + } + }, + "annotations": [ + { + "description": "MAGESTIC_MILLION_RANK", + "key": "github.com", + "value": 16, + "value_type": "int", + "category": "risk", + "risk_cat": "popularity", + "score": 1, + "weight": 9.59176003468815, + "title": "Magestic Stable Rank: 16", + "help_text": "The average rank of a domain on the Magestic top-N list over 14 days. A domain must remain on this list for 14 continuous days to be considered \"stable.\"" + }, + { + "description": "PIVOT_COUNT_ANCHORS_LINKS", + "key": "github.com", + "value": 78113880, + "value_type": "float", + "category": "risk", + "risk_cat": "val_popularity", + "title": "ANCHORS_LINKS: Estimated Pivot Count: 78M", + "help_text": "The Validin estimated count of this pivot's popularity. Pivots with high estimated counts are less likely to be useful." + }, + { + "description": "PIVOT_COUNT_CERT_DOMAIN", + "key": "github.com", + "value": 13274120, + "value_type": "float", + "category": "risk", + "risk_cat": "val_popularity", + "title": "CERT_DOMAIN: Estimated Pivot Count: 13M", + "help_text": "The Validin estimated count of this pivot's popularity. Pivots with high estimated counts are less likely to be useful." + }, + { + "description": "PIVOT_COUNT_IFRAMES_LINKS", + "key": "github.com", + "value": 19080, + "value_type": "float", + "category": "risk", + "risk_cat": "val_popularity", + "title": "IFRAMES_LINKS: Estimated Pivot Count: 19K", + "help_text": "The Validin estimated count of this pivot's popularity. Pivots with high estimated counts are less likely to be useful." + }, + { + "description": "PIVOT_COUNT_JS_LINKS", + "key": "github.com", + "value": 130460, + "value_type": "float", + "category": "risk", + "risk_cat": "val_popularity", + "title": "JS_LINKS: Estimated Pivot Count: 130K", + "help_text": "The Validin estimated count of this pivot's popularity. Pivots with high estimated counts are less likely to be useful." + }, + { + "description": "PIVOT_COUNT_LINKS_LINKS", + "key": "github.com", + "value": 620060, + "value_type": "float", + "category": "risk", + "risk_cat": "val_popularity", + "title": "LINKS_LINKS: Estimated Pivot Count: 620K", + "help_text": "The Validin estimated count of this pivot's popularity. Pivots with high estimated counts are less likely to be useful." + }, + { + "description": "PIVOT_COUNT_LOCATION_DOMAIN", + "key": "github.com", + "value": 1918120, + "value_type": "float", + "category": "risk", + "risk_cat": "val_popularity", + "title": "LOCATION_DOMAIN: Estimated Pivot Count: 1.9M", + "help_text": "The Validin estimated count of this pivot's popularity. Pivots with high estimated counts are less likely to be useful." + }, + { + "description": "PIVOT_COUNT_META_LINKS", + "key": "github.com", + "value": 569520, + "value_type": "float", + "category": "risk", + "risk_cat": "val_popularity", + "title": "META_LINKS: Estimated Pivot Count: 570K", + "help_text": "The Validin estimated count of this pivot's popularity. Pivots with high estimated counts are less likely to be useful." + }, + { + "description": "TRANCO_TOP_1M_RANK", + "key": "github.com", + "value": 48, + "value_type": "int", + "category": "risk", + "risk_cat": "popularity", + "score": 1, + "weight": 8.637517525248825, + "title": "Tranco Stable Rank: 48", + "help_text": "The average rank of a domain on the Tranco top-N list over 14 days. A domain must remain on this list for 14 continuous days to be considered \"stable.\"" + }, + { + "description": "UMBRELLA_TOP_1M_RANK", + "key": "github.com", + "value": 2588, + "value_type": "int", + "category": "risk", + "risk_cat": "popularity", + "score": 1, + "weight": 5.174071456006675, + "title": "Umbrella Stable Rank: 2588", + "help_text": "The average rank of a domain on the Umbrella top-N list over 14 days. A domain must remain on this list for 14 continuous days to be considered \"stable.\"" + }, + { + "description": "VALIDIN_TOP_ANCHORS_RANK", + "key": "github.com", + "value": 21, + "value_type": "int", + "category": "risk", + "risk_cat": "val_popularity", + "title": "Validin Top Anchor Links Rank: 21" + }, + { + "title": "Observed on Block Lists (10)", + "description": "malicious", + "category": "risk", + "risk_cat": "malicious", + "score": 8, + "weight": 2.0, + "values": [ + "abuse.ch ThreatFox database dump", + "abuse.ch URLhaus database dump", + "BotScout - Recently Caught Bots", + "Facebook Threat Research", + "PhishTank - online, valid phishing database", + "Phishing.Database - phishing domains ACTIVE", + "DigitalSide Threat-Intel - CSV listings", + "Zscaler ThreatLabz public report IOCs", + "urlhaus.abuse.ch - Download - URL", + "vxvault.net - URL" + ] + }, + { + "title": "Observed on Ad Block Lists (7)", + "description": "ads", + "category": "risk", + "risk_cat": "suspicious", + "score": 5, + "weight": 1.4, + "values": [ + "adblockplus.org - latvian easy list - Host", + "Easylist - Adblock Plus - RuAdList and EasyList combination subscription", + "AddBlockPlus - cookiemonster - host", + "fanboy-social - Adblock Plus - host", + "Adblock Plus - cntblock - host", + "Adblock Plus - dropboxusercontent.com - host", + "adblockplus - rolist+easylist - host" + ] + } + ], + "score": 1.7, + "verdict": "low_risk" +} \ No newline at end of file diff --git a/test/mock/axon_domain_reputation_quick_google.com.json b/test/mock/axon_domain_reputation_quick_google.com.json new file mode 100644 index 0000000..29b2cc7 --- /dev/null +++ b/test/mock/axon_domain_reputation_quick_google.com.json @@ -0,0 +1,167 @@ +{ + "query_opts": { + "limit": 20000, + "wildcard": false + }, + "informational": { + "structure": { + "fqdn": "google.com", + "e2ld": "google.com", + "etld": "com" + } + }, + "annotations": [ + { + "description": "MAGESTIC_MILLION_RANK", + "key": "google.com", + "value": 2, + "value_type": "int", + "category": "risk", + "risk_cat": "popularity", + "score": 1, + "weight": 11.397940008672037, + "title": "Magestic Stable Rank: 2", + "help_text": "The average rank of a domain on the Magestic top-N list over 14 days. A domain must remain on this list for 14 continuous days to be considered \"stable.\"" + }, + { + "description": "PIVOT_COUNT_ANCHORS_LINKS", + "key": "google.com", + "value": 9478060, + "value_type": "float", + "category": "risk", + "risk_cat": "val_popularity", + "title": "ANCHORS_LINKS: Estimated Pivot Count: 9.5M", + "help_text": "The Validin estimated count of this pivot's popularity. Pivots with high estimated counts are less likely to be useful." + }, + { + "description": "PIVOT_COUNT_CERT_DOMAIN", + "key": "google.com", + "value": 2683300, + "value_type": "float", + "category": "risk", + "risk_cat": "val_popularity", + "title": "CERT_DOMAIN: Estimated Pivot Count: 2.7M", + "help_text": "The Validin estimated count of this pivot's popularity. Pivots with high estimated counts are less likely to be useful." + }, + { + "description": "PIVOT_COUNT_IFRAMES_LINKS", + "key": "google.com", + "value": 63860, + "value_type": "float", + "category": "risk", + "risk_cat": "val_popularity", + "title": "IFRAMES_LINKS: Estimated Pivot Count: 64K", + "help_text": "The Validin estimated count of this pivot's popularity. Pivots with high estimated counts are less likely to be useful." + }, + { + "description": "PIVOT_COUNT_JS_LINKS", + "key": "google.com", + "value": 315340, + "value_type": "float", + "category": "risk", + "risk_cat": "val_popularity", + "title": "JS_LINKS: Estimated Pivot Count: 315K", + "help_text": "The Validin estimated count of this pivot's popularity. Pivots with high estimated counts are less likely to be useful." + }, + { + "description": "PIVOT_COUNT_LINKS_LINKS", + "key": "google.com", + "value": 809640, + "value_type": "float", + "category": "risk", + "risk_cat": "val_popularity", + "title": "LINKS_LINKS: Estimated Pivot Count: 810K", + "help_text": "The Validin estimated count of this pivot's popularity. Pivots with high estimated counts are less likely to be useful." + }, + { + "description": "PIVOT_COUNT_LOCATION_DOMAIN", + "key": "google.com", + "value": 6299260, + "value_type": "float", + "category": "risk", + "risk_cat": "val_popularity", + "title": "LOCATION_DOMAIN: Estimated Pivot Count: 6.3M", + "help_text": "The Validin estimated count of this pivot's popularity. Pivots with high estimated counts are less likely to be useful." + }, + { + "description": "PIVOT_COUNT_META_LINKS", + "key": "google.com", + "value": 22920, + "value_type": "float", + "category": "risk", + "risk_cat": "val_popularity", + "title": "META_LINKS: Estimated Pivot Count: 23K", + "help_text": "The Validin estimated count of this pivot's popularity. Pivots with high estimated counts are less likely to be useful." + }, + { + "description": "TRANCO_TOP_1M_RANK", + "key": "google.com", + "value": 1, + "value_type": "int", + "category": "risk", + "risk_cat": "popularity", + "score": 1, + "weight": 12.0, + "title": "Tranco Stable Rank: 1", + "help_text": "The average rank of a domain on the Tranco top-N list over 14 days. A domain must remain on this list for 14 continuous days to be considered \"stable.\"" + }, + { + "description": "UMBRELLA_TOP_1M_RANK", + "key": "google.com", + "value": 1, + "value_type": "int", + "category": "risk", + "risk_cat": "popularity", + "score": 1, + "weight": 12.0, + "title": "Umbrella Stable Rank: 1", + "help_text": "The average rank of a domain on the Umbrella top-N list over 14 days. A domain must remain on this list for 14 continuous days to be considered \"stable.\"" + }, + { + "description": "VALIDIN_TOP_ANCHORS_RANK", + "key": "google.com", + "value": 8, + "value_type": "int", + "category": "risk", + "risk_cat": "val_popularity", + "title": "Validin Top Anchor Links Rank: 8" + }, + { + "title": "Observed on Block Lists (6)", + "description": "malicious", + "category": "risk", + "risk_cat": "malicious", + "score": 8, + "weight": 1.2, + "values": [ + "abuse.ch ThreatFox database dump", + "BotScout - Recently Caught Bots", + "Phishunt.io feed", + "PhishTank - online, valid phishing database", + "Phishing.Database - phishing domains ACTIVE", + "Validin Public Phishing Feed" + ] + }, + { + "title": "Observed on Ad Block Lists (9)", + "description": "ads", + "category": "risk", + "risk_cat": "suspicious", + "score": 5, + "weight": 1.8, + "values": [ + "adblockplus.org - latvian easy list - Host", + "Easylist - Adblock Plus - RuAdList and EasyList combination subscription", + "AddBlockPlus - cookiemonster - host", + "fanboy-social - Adblock Plus - host", + "Adblock Plus - cntblock - host", + "adblockplus - easylist - host", + "adblockplus - slovenian-list - host", + "adblockplus - rolist+easylist - host", + "adblockplus.org - YousList" + ] + } + ], + "score": 1.4, + "verdict": "low_risk" +} \ No newline at end of file diff --git a/test/mock/axon_domain_reputation_quick_micsrosoftonline.com.json b/test/mock/axon_domain_reputation_quick_micsrosoftonline.com.json new file mode 100644 index 0000000..048a8d4 --- /dev/null +++ b/test/mock/axon_domain_reputation_quick_micsrosoftonline.com.json @@ -0,0 +1,62 @@ +{ + "query_opts": { + "limit": 20000, + "wildcard": false + }, + "informational": { + "structure": { + "fqdn": "micsrosoftonline.com", + "e2ld": "micsrosoftonline.com", + "etld": "com" + }, + "tags": { + "category/malicious": 1 + } + }, + "annotations": [ + { + "description": "USOMTR", + "title": "usom.gov.tr - Malicious URL list", + "url": "https://www.usom.gov.tr/url-list.txt", + "custom": { + "tags": [ + "category/malicious" + ] + }, + "key": "micsrosoftonline.com", + "score": 9, + "weight": 1, + "category": "risk", + "risk_cat": "malicious" + }, + { + "category": "risk", + "description": "Observed on Maltrail", + "title": "apt_voidblizzard (Malware)", + "weight": 5, + "score": 9, + "url": "/threats/detailed/apt_voidblizzard", + "custom": { + "references": [ + "https://www.microsoft.com/en-us/security/blog/2025/05/27/new-russia-affiliated-actor-void-blizzard-targets-critical-sectors-for-espionage/" + ], + "type": "apt_voidblizzard", + "category": "malware", + "ext_url": "https://github.com/stamparm/maltrail/blob/master/trails/static/malware/apt_voidblizzard.txt", + "source": "maltrail", + "aliases": [ + "laundry bear", + "void blizzard", + "pluggyape", + "uac-0190" + ] + }, + "risk_cat": "malware", + "maltrail": "https://github.com/stamparm/maltrail/blob/master/trails/static/malware/apt_voidblizzard.txt", + "key_type": "dom", + "key": "micsrosoftonline.com" + } + ], + "score": 9.0, + "verdict": "malicious" +} \ No newline at end of file diff --git a/test/mock/axon_ip_reputation_quick_1.1.1.1.json b/test/mock/axon_ip_reputation_quick_1.1.1.1.json new file mode 100644 index 0000000..77d1c09 --- /dev/null +++ b/test/mock/axon_ip_reputation_quick_1.1.1.1.json @@ -0,0 +1,82 @@ +{ + "query_opts": { + "limit": 20000, + "wildcard": false + }, + "informational": { + "sources": [ + { + "key": "1.1.1.0/24", + "value": 13335.0, + "value_type": "float" + } + ], + "location": { + "country": "AU", + "latitude": "-33.8688", + "longitude": "151.209", + "postcode": "", + "city": "Sydney", + "state": "New South Wales", + "cidr": "1.1.1.0/24" + }, + "ownership": [ + { + "owner": "CLOUDFLARENET", + "country": "US", + "asn": "AS 13335", + "name": "CLOUDFLARENET, US", + "cidrs": [ + "1.1.1.0/24" + ] + } + ] + }, + "annotations": [ + { + "description": "PIVOT_COUNT_IP", + "key": "1.1.1.1", + "value": 2213460, + "value_type": "float", + "category": "risk", + "risk_cat": "val_popularity", + "title": "IP: Estimated Pivot Count: 2.2M", + "help_text": "The Validin estimated count of this pivot's popularity. Pivots with high estimated counts are less likely to be useful." + }, + { + "description": "PIVOT_COUNT_LOCATION_IP4", + "key": "1.1.1.1", + "value": 18100, + "value_type": "float", + "category": "risk", + "risk_cat": "val_popularity", + "title": "LOCATION_IP4: Estimated Pivot Count: 18K", + "help_text": "The Validin estimated count of this pivot's popularity. Pivots with high estimated counts are less likely to be useful." + }, + { + "description": "VALIDIN_TOP_A_RANK", + "key": "1.1.1.1", + "value": 3220, + "value_type": "int", + "category": "risk", + "risk_cat": "val_popularity", + "title": "Validin Top IPv4 Addresses Rank: 3220" + }, + { + "title": "Observed on Block Lists (4)", + "description": "malicious", + "category": "risk", + "risk_cat": "malicious", + "score": 8, + "weight": 0.8, + "values": [ + "abuse.ch ThreatFox database dump", + "BotScout - Recently Caught Bots", + "botscout - 1 day - IP", + "report.cs.rutgers.edu - IP" + ] + } + ], + "score": 8.0, + "verdict": "malicious" +} \ No newline at end of file diff --git a/test/mock/axon_ip_reputation_quick_8.8.8.8.json b/test/mock/axon_ip_reputation_quick_8.8.8.8.json new file mode 100644 index 0000000..9ba1ed5 --- /dev/null +++ b/test/mock/axon_ip_reputation_quick_8.8.8.8.json @@ -0,0 +1,126 @@ +{ + "query_opts": { + "limit": 20000, + "wildcard": false + }, + "informational": { + "title": "Google Cloud", + "cloud": "google", + "cidrs": [ + "8.8.8.0/24" + ], + "sources": [ + { + "key": "8.0.0.0/9", + "value": 3356.0, + "value_type": "float" + }, + { + "key": "8.0.0.0/12", + "value": 3356.0, + "value_type": "float" + }, + { + "key": "8.8.8.0/24", + "value": 15169.0, + "value_type": "float" + }, + { + "description": "GOOGIP", + "title": "Google Cloud", + "url": "https://www.gstatic.com/ipranges/goog.json", + "custom": { + "tags": [ + "category/infrastructure", + "type/google" + ] + }, + "key": "8.8.8.0/24", + "cloud": "google" + } + ], + "location": { + "country": "US", + "latitude": "37.422", + "longitude": "-122.085", + "postcode": "", + "city": "Mountain View", + "state": "California", + "cidr": "8.8.8.0/24" + }, + "tags": { + "category/infrastructure": 1, + "type/google": 1 + }, + "ownership": [ + { + "owner": "GOOGLE", + "country": "US", + "asn": "AS 15169", + "name": "GOOGLE, US", + "cidrs": [ + "8.8.8.0/24" + ] + }, + { + "owner": "LEVEL3", + "country": "US", + "asn": "AS 3356", + "name": "LEVEL3, US", + "cidrs": [ + "8.0.0.0/12", + "8.0.0.0/9" + ] + } + ] + }, + "annotations": [ + { + "description": "PIVOT_COUNT_IP", + "key": "8.8.8.8", + "value": 875980, + "value_type": "float", + "category": "risk", + "risk_cat": "val_popularity", + "title": "IP: Estimated Pivot Count: 876K", + "help_text": "The Validin estimated count of this pivot's popularity. Pivots with high estimated counts are less likely to be useful." + }, + { + "description": "PIVOT_COUNT_LOCATION_IP4", + "key": "8.8.8.8", + "value": 13580, + "value_type": "float", + "category": "risk", + "risk_cat": "val_popularity", + "title": "LOCATION_IP4: Estimated Pivot Count: 14K", + "help_text": "The Validin estimated count of this pivot's popularity. Pivots with high estimated counts are less likely to be useful." + }, + { + "description": "VALIDIN_TOP_A_RANK", + "key": "8.8.8.8", + "value": 2225, + "value_type": "int", + "category": "risk", + "risk_cat": "val_popularity", + "title": "Validin Top IPv4 Addresses Rank: 2225" + }, + { + "title": "Observed on Block Lists (6)", + "description": "malicious", + "category": "risk", + "risk_cat": "malicious", + "score": 8, + "weight": 1.2, + "values": [ + "abuse.ch ThreatFox database dump", + "BotScout - Recently Caught Bots", + "botscout - 1 day - IP", + "firebog.net - Host - Prigent-Malware", + "MyIP.ms - Latest IP Blacklist", + "StrictBlockPAllebone - Manually curated IP Blocklist of malicios IPs that scan/attempt to connect to services" + ] + } + ], + "score": 8.0, + "verdict": "malicious" +} \ No newline at end of file diff --git a/test/test_reputation.py b/test/test_reputation.py new file mode 100644 index 0000000..ce9bf76 --- /dev/null +++ b/test/test_reputation.py @@ -0,0 +1,192 @@ +import re + +import synapse.tests.utils as s_t_utils +from aioresponses import aioresponses +from synapse.tools import genpkg + +from .common import BASE_URL, SYNAPSE_PACKAGE_YAML, get_mock_file_content + + +class TestReputation(s_t_utils.SynTest): + @aioresponses() + async def test_fqdn_reputation(self, mocked: aioresponses): + mocked.get( + re.compile( + f"^{re.escape(BASE_URL + 'axon/domain/reputation/quick/' + 'google.com')}" + ), + status=200, + body=get_mock_file_content( + "axon_domain_reputation_quick_google.com.json" + ), + ) + + pkgdef = genpkg.loadPkgProto(SYNAPSE_PACKAGE_YAML) + async with self.getTestCore() as core: + await core.addStormPkg(pkgdef) + with self.getAsyncLoggerStream("synapse.storm.log"): + + # Run the storm command to trigger reputation check + await core.callStorm("[inet:fqdn=google.com] | s1.validin.reputation") + + mocked.assert_called() + + # Assert meta:source exists + metasource_iden = await core.callStorm( + 'meta:source:name="validin api" | return($node.repr())' + ) + assert metasource_iden, "Find metasource node" + + # Assert rank properties are set + majestic = await core.callStorm( + 'inet:fqdn=google.com | return($node.props."_s1:validin:rank:majestic")' + ) + assert majestic == 2, "Majestic rank should be 2" + + tranco = await core.callStorm( + 'inet:fqdn=google.com | return($node.props."_s1:validin:rank:tranco")' + ) + assert tranco == 1, "Tranco rank should be 1" + + umbrella = await core.callStorm( + 'inet:fqdn=google.com | return($node.props."_s1:validin:rank:umbrella")' + ) + assert umbrella == 1, "Umbrella rank should be 1" + + anchors = await core.callStorm( + 'inet:fqdn=google.com | return($node.props."_s1:validin:rank:anchors")' + ) + assert anchors == 8, "Anchors rank should be 8" + + # Assert node data is stored + data = await core.callStorm( + 'inet:fqdn=google.com | return($node.data.get(s1:validin:reputation))' + ) + assert data is not None, "Raw reputation data should be stored" + assert data.get("score") == 1.4, "Score should be 1.4" + + @aioresponses() + async def test_ipv4_reputation(self, mocked: aioresponses): + mocked.get( + re.compile( + f"^{re.escape(BASE_URL + 'axon/ip/reputation/quick/' '1.1.1.1')}" + ), + status=200, + body=get_mock_file_content( + "axon_ip_reputation_quick_1.1.1.1.json" + ), + ) + + pkgdef = genpkg.loadPkgProto(SYNAPSE_PACKAGE_YAML) + async with self.getTestCore() as core: + await core.addStormPkg(pkgdef) + + # Ensure model extensions are loaded (onload should do this, but test env may need explicit call) + try: + await core.callStorm("$lib.import(s1.validin.model).update_forms()") + except Exception: + pass # Ignore if already loaded + + with self.getAsyncLoggerStream("synapse.storm.log") as stream: + # Run the storm command + msgs = await core.stormlist("[inet:ipv4=1.1.1.1] | s1.validin.reputation") + + # Print all messages to see errors/warnings + for msg in msgs: + if msg[0] in ('warn', 'err'): + print(f"Storm {msg[0]}: {msg[1]}") + + mocked.assert_called() + + # Assert meta:source exists + metasource_iden = await core.callStorm( + 'meta:source:name="validin api" | return($node.repr())' + ) + assert metasource_iden, "Find metasource node" + + # Assert rank properties + + pivot_count = await core.callStorm( + 'inet:ipv4=1.1.1.1 | return($node.props."_s1:validin:rank:pivot_count")' + ) + assert pivot_count == 2213460, "Pivot count should be 2213460" + + top_a = await core.callStorm( + 'inet:ipv4=1.1.1.1 | return($node.props."_s1:validin:rank:top_a")' + ) + assert top_a == 3220, "Top A rank should be 3220" + + # Assert ASN node created + asn_count = await core.count("inet:asn=13335") + assert asn_count == 1, "ASN node should be created" + + # Assert ASN linked to IP + asn = await core.callStorm( + 'inet:ipv4=1.1.1.1 | return($node.props.asn)' + ) + assert asn == 13335, "ASN should be 13335" + + # Assert ASN name + asn_name = await core.callStorm( + 'inet:asn=13335 | return($node.props.name)' + ) + assert asn_name == "cloudflarenet", "ASN name should be cloudflarenet" + + # Assert location + latlong = await core.callStorm( + 'inet:ipv4=1.1.1.1 | return($node.props.latlong)' + ) + assert latlong is not None, "Latlong should be set" + assert latlong[0] == -33.8688, "Latitude should be -33.8688" + assert latlong[1] == 151.209, "Longitude should be 151.209" + + loc = await core.callStorm( + 'inet:ipv4=1.1.1.1 | return($node.props.loc)' + ) + assert loc == "au", "Location should be AU" + + # Assert node data is stored + data = await core.callStorm( + 'inet:ipv4=1.1.1.1 | return($node.data.get(s1:validin:reputation))' + ) + assert data is not None, "Raw reputation data should be stored" + + @aioresponses() + async def test_malicious_domain(self, mocked: aioresponses): + mocked.get( + re.compile( + f"^{re.escape(BASE_URL + 'axon/domain/reputation/quick/' + 'micsrosoftonline.com')}" + ), + status=200, + body=get_mock_file_content( + "axon_domain_reputation_quick_micsrosoftonline.com.json" + ), + ) + + pkgdef = genpkg.loadPkgProto(SYNAPSE_PACKAGE_YAML) + async with self.getTestCore() as core: + await core.addStormPkg(pkgdef) + with self.getAsyncLoggerStream("synapse.storm.log"): + + # Run the storm command + await core.callStorm("[inet:fqdn=micsrosoftonline.com] | s1.validin.reputation") + + mocked.assert_called() + + # Assert meta:source exists + metasource_iden = await core.callStorm( + 'meta:source:name="validin api" | return($node.repr())' + ) + assert metasource_iden, "Find metasource node" + + # Assert node data is stored even without rank annotations + data = await core.callStorm( + 'inet:fqdn=micsrosoftonline.com | return($node.data.get(s1:validin:reputation))' + ) + assert data is not None, "Raw reputation data should be stored" + + # Verify command handles missing rank annotations gracefully + # (no properties should be set, but no errors should occur) + majestic = await core.callStorm( + 'inet:fqdn=micsrosoftonline.com | return($node.props."_s1:validin:rank:majestic")' + ) + assert majestic is None, "Majestic rank should be None for malicious domain"