-
-
Notifications
You must be signed in to change notification settings - Fork 259
Migrate Importer to Advisory v2 & Collect Existing Fix Commits for Project KB #1987
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
ziadhany
wants to merge
6
commits into
aboutcode-org:main
Choose a base branch
from
ziadhany:kb-commits
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from all commits
Commits
Show all changes
6 commits
Select commit
Hold shift + click to select a range
f3d45ca
Add initial support for collecting ProjectKBP old fix commits.
ziadhany 80b43fe
Add ProjectKBv2 importer
ziadhany 666e774
Drop project_kb_msr2019 V1 importer
ziadhany b825f02
Split the project-kb into two separate pipelines
ziadhany caab13b
Refactor CSV processing to avoid duplicate iteration over items
ziadhany 341f711
Fix an indentation bug and update the test case
ziadhany File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
97 changes: 97 additions & 0 deletions
97
vulnerabilities/pipelines/v2_importers/project_kb_msr2019_importer.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,97 @@ | ||
| # | ||
| # Copyright (c) nexB Inc. and others. All rights reserved. | ||
| # VulnerableCode is a trademark of nexB Inc. | ||
| # SPDX-License-Identifier: Apache-2.0 | ||
| # See http://www.apache.org/licenses/LICENSE-2.0 for the license text. | ||
| # See https://github.com/aboutcode-org/vulnerablecode for support or download. | ||
| # See https://aboutcode.org for more information about nexB OSS projects. | ||
| # | ||
|
|
||
| import csv | ||
| from pathlib import Path | ||
| from typing import Iterable | ||
|
|
||
| from fetchcode.vcs import fetch_via_vcs | ||
|
|
||
| from vulnerabilities.importer import AdvisoryData | ||
| from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2 | ||
| from vulnerabilities.pipes.advisory import append_patch_classifications | ||
|
|
||
|
|
||
| class ProjectKBMSR2019Pipeline(VulnerableCodeBaseImporterPipelineV2): | ||
| """ | ||
| ProjectKB Importer Pipeline | ||
| Collect advisory from ProjectKB data: | ||
| - CSV database https://github.com/SAP/project-kb/blob/main/MSR2019/dataset/vulas_db_msr2019_release.csv | ||
| """ | ||
|
|
||
| pipeline_id = "project-kb-MSR-2019_v2" | ||
| spdx_license_expression = "Apache-2.0" | ||
| license_url = "https://github.com/SAP/project-kb/blob/main/LICENSE.txt" | ||
| repo_url = "git+https://github.com/SAP/project-kb" | ||
|
|
||
| @classmethod | ||
| def steps(cls): | ||
| return (cls.clone_repo, cls.collect_and_store_advisories, cls.clean_downloads) | ||
|
|
||
| def clone_repo(self): | ||
| self.log("Cloning ProjectKB advisory data...") | ||
| self.vcs_response = fetch_via_vcs(self.repo_url) | ||
|
|
||
| def advisories_count(self): | ||
| csv_path = Path(self.vcs_response.dest_dir) / "MSR2019/dataset/vulas_db_msr2019_release.csv" | ||
|
|
||
| with open(csv_path, newline="", encoding="utf-8") as f: | ||
| reader = csv.reader(f) | ||
| next(reader, None) | ||
| count = sum(1 for _ in reader) | ||
|
|
||
| self.log(f"Estimated advisories to process: {count}") | ||
| return count | ||
|
|
||
| def collect_advisories(self) -> Iterable[AdvisoryData]: | ||
| self.log("Collecting fix commits from ProjectKB ( vulas_db_msr2019_release )...") | ||
| csv_path = Path(self.vcs_response.dest_dir) / "MSR2019/dataset/vulas_db_msr2019_release.csv" | ||
|
|
||
| with open(csv_path, newline="", encoding="utf-8") as f: | ||
| reader = csv.reader(f) | ||
| next(reader, None) # skip header | ||
|
|
||
| for row in reader: | ||
| if len(row) != 4: | ||
| continue | ||
|
|
||
| vuln_id, vcs_url, commit_hash, poc = row | ||
|
|
||
| if not vuln_id or not vcs_url or not commit_hash: | ||
| continue | ||
|
|
||
| patches = [] | ||
| affected_packages = [] | ||
| references = [] | ||
| append_patch_classifications( | ||
| url=vcs_url, | ||
| commit_hash=commit_hash, | ||
| patch_text=None, | ||
| affected_packages=affected_packages, | ||
| references=references, | ||
| patches=patches, | ||
| ) | ||
|
|
||
| yield AdvisoryData( | ||
| advisory_id=vuln_id, | ||
| affected_packages=affected_packages, | ||
| patches=patches, | ||
| references_v2=references, | ||
| url="https://github.com/SAP/project-kb/blob/main/MSR2019/dataset/vulas_db_msr2019_release.csv", | ||
| ) | ||
|
|
||
| def clean_downloads(self): | ||
| """Remove the cloned repository from disk.""" | ||
| self.log("Removing cloned repository...") | ||
| if self.vcs_response: | ||
| self.vcs_response.delete() | ||
|
|
||
| def on_failure(self): | ||
| """Ensure cleanup happens on pipeline failure.""" | ||
| self.clean_downloads() |
176 changes: 176 additions & 0 deletions
176
vulnerabilities/pipelines/v2_importers/project_kb_statements_importer.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,176 @@ | ||
| # | ||
| # Copyright (c) nexB Inc. and others. All rights reserved. | ||
| # VulnerableCode is a trademark of nexB Inc. | ||
| # SPDX-License-Identifier: Apache-2.0 | ||
| # See http://www.apache.org/licenses/LICENSE-2.0 for the license text. | ||
| # See https://github.com/aboutcode-org/vulnerablecode for support or download. | ||
| # See https://aboutcode.org for more information about nexB OSS projects. | ||
| # | ||
| from collections import defaultdict | ||
| from pathlib import Path | ||
| from typing import Iterable | ||
|
|
||
| import saneyaml | ||
| from fetchcode.vcs import fetch_via_vcs | ||
| from packageurl import PackageURL | ||
| from univers.version_range import RANGE_CLASS_BY_SCHEMES | ||
| from univers.versions import InvalidVersion | ||
|
|
||
| from vulnerabilities.importer import AdvisoryData | ||
| from vulnerabilities.importer import AffectedPackageV2 | ||
| from vulnerabilities.importer import ReferenceV2 | ||
| from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2 | ||
| from vulnerabilities.pipes.advisory import append_patch_classifications | ||
| from vulnerabilities.utils import get_advisory_url | ||
| from vulnerabilities.utils import is_commit | ||
|
|
||
|
|
||
| class ProjectKBStatementsPipeline(VulnerableCodeBaseImporterPipelineV2): | ||
| """ | ||
| ProjectKB Importer Pipeline | ||
| Collect advisory from ProjectKB data: | ||
| - YAML statements: https://github.com/SAP/project-kb/blob/vulnerability-data/statements/*/*.yaml | ||
| """ | ||
|
|
||
| pipeline_id = "project-kb-statements_v2" | ||
| spdx_license_expression = "Apache-2.0" | ||
| license_url = "https://github.com/SAP/project-kb/blob/main/LICENSE.txt" | ||
| repo_url = "git+https://github.com/SAP/project-kb@vulnerability-data" | ||
|
|
||
| @classmethod | ||
| def steps(cls): | ||
| return (cls.clone_repo, cls.collect_and_store_advisories, cls.clean_downloads) | ||
|
|
||
| def clone_repo(self): | ||
| self.log("Cloning ProjectKB Statements advisory data...") | ||
| self.vcs_response = fetch_via_vcs(self.repo_url) | ||
|
|
||
| def advisories_count(self): | ||
| base_path = Path(self.vcs_response.dest_dir) / "statements" | ||
| count = sum(1 for _ in base_path.rglob("*.yaml")) | ||
| self.log(f"Estimated advisories to process: {count}") | ||
| return count | ||
|
|
||
| def collect_advisories(self) -> Iterable[AdvisoryData]: | ||
| self.log("Collecting fix commits from YAML statements under /statements....") | ||
| base_path = Path(self.vcs_response.dest_dir) / "statements" | ||
|
|
||
| for yaml_file in base_path.rglob("*.yaml"): | ||
| if yaml_file.name != "statement.yaml": | ||
| continue | ||
|
|
||
| with open(yaml_file, encoding="utf-8") as f: | ||
| yaml_data = saneyaml.load(f) | ||
|
|
||
| vulnerability_id = yaml_data.get("vulnerability_id") | ||
| if not vulnerability_id: | ||
| continue | ||
|
|
||
| note_texts = [] | ||
| references = [] | ||
| for note_entry in yaml_data.get("notes", []): | ||
| text_content = note_entry.get("text") | ||
| if not text_content: | ||
| continue | ||
| note_texts.append(text_content) | ||
|
|
||
| for link in note_entry.get("links", []): | ||
| ref = ReferenceV2(url=link) | ||
| references.append(ref) | ||
|
|
||
| description = "\n".join(note_texts) | ||
| affected_packages = [] | ||
| patches = [] | ||
| for fix in yaml_data.get("fixes", []): | ||
| for commit in fix.get("commits", []): | ||
| commit_hash = commit.get("id") | ||
| if not is_commit(commit_hash): | ||
| commit_hash = None | ||
|
|
||
| vcs_url = commit.get("repository") | ||
| append_patch_classifications( | ||
| url=vcs_url, | ||
| commit_hash=commit_hash, | ||
| patch_text=None, | ||
| affected_packages=affected_packages, | ||
| references=references, | ||
| patches=patches, | ||
| ) | ||
|
|
||
| purls_to_versions = defaultdict(lambda: [[], []]) | ||
| for artifact in yaml_data.get("artifacts", []): | ||
| affected = artifact.get("affected") | ||
| purl_str = artifact.get("id") | ||
|
|
||
| try: | ||
| purl = PackageURL.from_string(purl_str) | ||
| except ValueError: | ||
| self.log(f"Invalid PackageURL: {purl_str!r}") | ||
| continue | ||
|
|
||
| version_range_class = RANGE_CLASS_BY_SCHEMES.get(purl.type) | ||
| if not version_range_class: | ||
| continue | ||
|
|
||
| base_purl = PackageURL( | ||
| type=purl.type, | ||
| namespace=purl.namespace, | ||
| name=purl.name, | ||
| ) | ||
|
|
||
| if affected: | ||
| purls_to_versions[base_purl][0].append(purl.version) | ||
| else: | ||
| purls_to_versions[base_purl][1].append(purl.version) | ||
|
|
||
| for base_purl, (affected_versions, fixed_versions) in purls_to_versions.items(): | ||
| version_range_class = RANGE_CLASS_BY_SCHEMES.get(base_purl.type) | ||
|
|
||
| affected_range = None | ||
| fixed_range = None | ||
|
|
||
| if affected_versions: | ||
| try: | ||
| affected_range = version_range_class.from_versions(affected_versions) | ||
| except InvalidVersion as e: | ||
| self.log(f"Invalid affected versions for {base_purl}: {e}") | ||
|
|
||
| if fixed_versions: | ||
| try: | ||
| fixed_range = version_range_class.from_versions(fixed_versions) | ||
| except InvalidVersion as e: | ||
| self.log(f"Invalid fixed versions for {base_purl}: {e}") | ||
|
|
||
| if affected_range or fixed_range: | ||
| pkg = AffectedPackageV2( | ||
| package=base_purl, | ||
| affected_version_range=affected_range, | ||
| fixed_version_range=fixed_range, | ||
| ) | ||
| affected_packages.append(pkg) | ||
|
|
||
| advisory_url = get_advisory_url( | ||
| file=yaml_file, | ||
| base_path=base_path, | ||
| url="https://github.com/SAP/project-kb/blob/vulnerability-data/statements/", | ||
| ) | ||
|
|
||
| yield AdvisoryData( | ||
| advisory_id=vulnerability_id, | ||
| summary=description, | ||
| affected_packages=affected_packages, | ||
| references_v2=references, | ||
| patches=patches, | ||
| url=advisory_url, | ||
| ) | ||
|
|
||
| def clean_downloads(self): | ||
| """Remove the cloned repository from disk.""" | ||
| self.log("Removing cloned repository...") | ||
|
|
||
| if self.vcs_response: | ||
| self.vcs_response.delete() | ||
|
|
||
| def on_failure(self): | ||
| """Ensure cleanup happens on pipeline failure.""" | ||
| self.clean_downloads() | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Notes section may also contain list of links which should be collected and stored as references see example https://github.com/SAP/project-kb/blob/730d2192bfa9b909246e02bc051e4fad2958a6d9/statements/CVE-2018-16406/statement.yaml