From 0f8e2961cbfc47237b705b60d20e0ff3c5b3ad8e Mon Sep 17 00:00:00 2001 From: Shrish0098 Date: Fri, 7 Feb 2025 13:14:05 +0530 Subject: [PATCH 1/6] Fix: Added new pipline called anchore_importer and also updated SOURCES.rst and __init__.py Signed-off-by: Shrish Mishra Signed-off-by: Shrish0098 --- SOURCES.rst | 7 ++ vulnerabilities/importers/__init__.py | 4 +- vulnerabilities/pipelines/anchore_importer.py | 77 +++++++++++++++++++ 3 files changed, 87 insertions(+), 1 deletion(-) create mode 100644 vulnerabilities/pipelines/anchore_importer.py diff --git a/SOURCES.rst b/SOURCES.rst index bc0963a10..67d691c84 100644 --- a/SOURCES.rst +++ b/SOURCES.rst @@ -1,3 +1,10 @@ +.. _anchore: + +Anchore NVD Overrides +--------------------- +Anchore provides overrides for NVD data to improve accuracy. This importer fetches data from their +`nvd-data-overrides `_ repository. + +----------------+------------------------------------------------------------------------------------------------------+----------------------------------------------------+ |Importer Name | Data Source |Ecosystems Covered | +================+======================================================================================================+====================================================+ diff --git a/vulnerabilities/importers/__init__.py b/vulnerabilities/importers/__init__.py index 3f429f669..6678211d5 100644 --- a/vulnerabilities/importers/__init__.py +++ b/vulnerabilities/importers/__init__.py @@ -6,7 +6,6 @@ # See https://github.com/aboutcode-org/vulnerablecode for support or download. # See https://aboutcode.org for more information about nexB OSS projects. # - from vulnerabilities.importers import apache_httpd from vulnerabilities.importers import apache_kafka from vulnerabilities.importers import apache_tomcat @@ -42,6 +41,8 @@ from vulnerabilities.pipelines import nvd_importer from vulnerabilities.pipelines import pypa_importer from vulnerabilities.pipelines import pysec_importer +from vulnerabilities.pipelines.anchore_importer import AnchoreImporterPipeline + IMPORTERS_REGISTRY = [ openssl.OpensslImporter, @@ -78,6 +79,7 @@ nvd_importer.NVDImporterPipeline, pysec_importer.PyPIImporterPipeline, alpine_linux_importer.AlpineLinuxImporterPipeline, + AnchoreImporterPipeline, ] IMPORTERS_REGISTRY = { diff --git a/vulnerabilities/pipelines/anchore_importer.py b/vulnerabilities/pipelines/anchore_importer.py new file mode 100644 index 000000000..1ccb1e276 --- /dev/null +++ b/vulnerabilities/pipelines/anchore_importer.py @@ -0,0 +1,77 @@ +from datetime import datetime +from typing import Iterable +import requests +import yaml +from packageurl import PackageURL +from univers.versions import SemverVersion + +from vulnerabilities.importer import AdvisoryData, AffectedPackage, Reference +from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline + + +class AnchoreImporterPipeline(VulnerableCodeBaseImporterPipeline): + """Collect advisories from Anchore's NVD overrides.""" + + pipeline_id = "anchore_importer" + root_url = "https://github.com/anchore/nvd-data-overrides" + license_url = "https://github.com/anchore/nvd-data-overrides/blob/main/LICENSE" + spdx_license_expression = "CC0-1.0" # License of Anchore's data + importer_name = "Anchore NVD Overrides Importer" + + @classmethod + def steps(cls): + return ( + cls.collect_and_store_advisories, + cls.import_new_advisories, + ) + + def advisories_count(self) -> int: + raw_data = self.fetch_data() + return len(raw_data) + + def collect_advisories(self) -> Iterable[AdvisoryData]: + raw_data = self.fetch_data() + for entry in raw_data: + yield self.parse_advisory_data(entry) + + def fetch_data(self): + """Fetch Anchore's NVD overrides from their GitHub repository.""" + url = "https://raw.githubusercontent.com/anchore/nvd-data-overrides/main/overrides.yaml" + response = requests.get(url) + response.raise_for_status() + return yaml.safe_load(response.text) # Correct YAML parsing + + def parse_advisory_data(self, raw_data) -> AdvisoryData: + """Parse a single advisory entry into an AdvisoryData object.""" + # Ensure required fields are present + if not all(key in raw_data for key in ["cve_id", "package_name", "affected_versions"]): + return None + + purl = PackageURL(type="generic", name=raw_data["package_name"]) + affected_version_range = raw_data["affected_versions"] # Use raw version range string + fixed_version = ( + SemverVersion(raw_data["fixed_version"]) if raw_data.get("fixed_version") else None + ) + + affected_package = AffectedPackage( + package=purl, + affected_version_range=affected_version_range, + fixed_version=fixed_version, + ) + + references = [ + Reference(url=url) for url in raw_data.get("references", []) if url + ] + date_published = ( + datetime.strptime(raw_data["published_date"], "%Y-%m-%d") + if raw_data.get("published_date") + else None + ) + + return AdvisoryData( + aliases=[raw_data["cve_id"]], + summary=raw_data.get("description", ""), + affected_packages=[affected_package], + references=references, + date_published=date_published, + ) \ No newline at end of file From a8f707afabe8aec13e45fca1b80ade510fa6fbef Mon Sep 17 00:00:00 2001 From: Shrish0098 Date: Fri, 7 Feb 2025 21:16:28 +0530 Subject: [PATCH 2/6] Fixed the changes asked in review Signed-off-by: Shrish Mishra Signed-off-by: Shrish0098 --- vulnerabilities/importers/__init__.py | 5 ++--- vulnerabilities/pipelines/anchore_importer.py | 14 +++++++++++++- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/vulnerabilities/importers/__init__.py b/vulnerabilities/importers/__init__.py index 6678211d5..2932b6a89 100644 --- a/vulnerabilities/importers/__init__.py +++ b/vulnerabilities/importers/__init__.py @@ -34,6 +34,7 @@ from vulnerabilities.importers import xen from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline from vulnerabilities.pipelines import alpine_linux_importer +from vulnerabilities.pipelines import anchore_importer from vulnerabilities.pipelines import github_importer from vulnerabilities.pipelines import gitlab_importer from vulnerabilities.pipelines import nginx_importer @@ -41,8 +42,6 @@ from vulnerabilities.pipelines import nvd_importer from vulnerabilities.pipelines import pypa_importer from vulnerabilities.pipelines import pysec_importer -from vulnerabilities.pipelines.anchore_importer import AnchoreImporterPipeline - IMPORTERS_REGISTRY = [ openssl.OpensslImporter, @@ -79,7 +78,7 @@ nvd_importer.NVDImporterPipeline, pysec_importer.PyPIImporterPipeline, alpine_linux_importer.AlpineLinuxImporterPipeline, - AnchoreImporterPipeline, + anchore_importer.AnchoreImporterPipeline, # Updated to follow the consistent style ] IMPORTERS_REGISTRY = { diff --git a/vulnerabilities/pipelines/anchore_importer.py b/vulnerabilities/pipelines/anchore_importer.py index 1ccb1e276..842f323f2 100644 --- a/vulnerabilities/pipelines/anchore_importer.py +++ b/vulnerabilities/pipelines/anchore_importer.py @@ -1,11 +1,23 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + from datetime import datetime from typing import Iterable + import requests import yaml from packageurl import PackageURL from univers.versions import SemverVersion -from vulnerabilities.importer import AdvisoryData, AffectedPackage, Reference +from vulnerabilities.importer import AdvisoryData +from vulnerabilities.importer import AffectedPackage +from vulnerabilities.importer import Reference from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline From c76793bd063a2dc36ca6ca03eb3adb5f0189cea9 Mon Sep 17 00:00:00 2001 From: Shrish0098 Date: Sat, 8 Feb 2025 12:34:34 +0530 Subject: [PATCH 3/6] Fix: Added Liferay advisories Signed-off-by: Shrish Mishra Signed-off-by: Shrish0098 --- vulnerabilities/importers/__init__.py | 4 +- vulnerabilities/importers/liferay.py | 100 ++++++++++++++++++++++++++ 2 files changed, 103 insertions(+), 1 deletion(-) create mode 100644 vulnerabilities/importers/liferay.py diff --git a/vulnerabilities/importers/__init__.py b/vulnerabilities/importers/__init__.py index 2932b6a89..6141156d3 100644 --- a/vulnerabilities/importers/__init__.py +++ b/vulnerabilities/importers/__init__.py @@ -32,6 +32,7 @@ from vulnerabilities.importers import ubuntu_usn from vulnerabilities.importers import vulnrichment from vulnerabilities.importers import xen +from vulnerabilities.importers import liferay from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline from vulnerabilities.pipelines import alpine_linux_importer from vulnerabilities.pipelines import anchore_importer @@ -78,7 +79,8 @@ nvd_importer.NVDImporterPipeline, pysec_importer.PyPIImporterPipeline, alpine_linux_importer.AlpineLinuxImporterPipeline, - anchore_importer.AnchoreImporterPipeline, # Updated to follow the consistent style + anchore_importer.AnchoreImporterPipeline, + liferay.LiferayImporter, ] IMPORTERS_REGISTRY = { diff --git a/vulnerabilities/importers/liferay.py b/vulnerabilities/importers/liferay.py new file mode 100644 index 000000000..31125019a --- /dev/null +++ b/vulnerabilities/importers/liferay.py @@ -0,0 +1,100 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# +import requests +from bs4 import BeautifulSoup +from packageurl import PackageURL + +from vulnerabilities.importer import AdvisoryData +from vulnerabilities.importer import Importer +from vulnerabilities.importer import VulnerabilityReference + + +class LiferayImporter(Importer): + """ + Importer for Liferay advisories. + """ + spdx_license_identifier = "CC-BY-SA-4.0" # License for Liferay's data + + def fetch(self): + """ + Fetches the HTML content from the Liferay Known Vulnerabilities page. + """ + url = "https://liferay.dev/portal/security/known-vulnerabilities" + response = requests.get(url) + response.raise_for_status() + return response.text + + def parse(self, html): + """ + Parses the fetched HTML and extracts vulnerability data. + Returns a list of AdvisoryData objects. + """ + soup = BeautifulSoup(html, "html.parser") + advisories = [] + + # Locate the table. (Adjust the selector if the page structure changes.) + table = soup.find("table") + if not table: + return advisories + + # Iterate over each row in the table body. + tbody = table.find("tbody") + if not tbody: + return advisories + + for row in tbody.find_all("tr"): + cells = row.find_all("td") + if len(cells) < 5: + continue + + # Extract each field by cell order. + vulnerability_id = cells[0].get_text(strip=True) + affected_versions = cells[1].get_text(strip=True) + description = cells[2].get_text(strip=True) + severity = cells[3].get_text(strip=True) + + # Extract references – there may be multiple links in the cell. + references = [] + for a in cells[4].find_all("a", href=True): + ref_url = a["href"].strip() + if ref_url: + references.append(VulnerabilityReference(url=ref_url)) + + # Create PackageURL objects for affected versions. + affected_packages = [] + for version in affected_versions.split(","): + version = version.strip() + if version: + affected_packages.append( + PackageURL( + type="liferay", + name="liferay-portal", + version=version, + ) + ) + + # Create an AdvisoryData object. + advisories.append( + AdvisoryData( + aliases=[vulnerability_id], + summary=description, + affected_packages=affected_packages, + references=references, + severity=severity, + ) + ) + + return advisories + + def advisory_data(self): + """ + Fetches and parses the data, returning a list of AdvisoryData objects. + """ + html = self.fetch() + return self.parse(html) \ No newline at end of file From 366a54d53bb6750adc20b8d64f8ec1c8f3159d8f Mon Sep 17 00:00:00 2001 From: Shrish0098 Date: Sat, 15 Feb 2025 14:10:03 +0530 Subject: [PATCH 4/6] Removed SORCES.rst --- SOURCES.rst | 7 ------- 1 file changed, 7 deletions(-) diff --git a/SOURCES.rst b/SOURCES.rst index 67d691c84..bc0963a10 100644 --- a/SOURCES.rst +++ b/SOURCES.rst @@ -1,10 +1,3 @@ -.. _anchore: - -Anchore NVD Overrides ---------------------- -Anchore provides overrides for NVD data to improve accuracy. This importer fetches data from their -`nvd-data-overrides `_ repository. - +----------------+------------------------------------------------------------------------------------------------------+----------------------------------------------------+ |Importer Name | Data Source |Ecosystems Covered | +================+======================================================================================================+====================================================+ From 7e4463f338ce56528566f0b8f5d53f93787dd8db Mon Sep 17 00:00:00 2001 From: Shrish0098 Date: Sat, 15 Feb 2025 14:19:18 +0530 Subject: [PATCH 5/6] Remove anchore_importer.py --- vulnerabilities/pipelines/anchore_importer.py | 89 ------------------- 1 file changed, 89 deletions(-) delete mode 100644 vulnerabilities/pipelines/anchore_importer.py diff --git a/vulnerabilities/pipelines/anchore_importer.py b/vulnerabilities/pipelines/anchore_importer.py deleted file mode 100644 index 842f323f2..000000000 --- a/vulnerabilities/pipelines/anchore_importer.py +++ /dev/null @@ -1,89 +0,0 @@ -# -# Copyright (c) nexB Inc. and others. All rights reserved. -# VulnerableCode is a trademark of nexB Inc. -# SPDX-License-Identifier: Apache-2.0 -# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://github.com/aboutcode-org/vulnerablecode for support or download. -# See https://aboutcode.org for more information about nexB OSS projects. -# - -from datetime import datetime -from typing import Iterable - -import requests -import yaml -from packageurl import PackageURL -from univers.versions import SemverVersion - -from vulnerabilities.importer import AdvisoryData -from vulnerabilities.importer import AffectedPackage -from vulnerabilities.importer import Reference -from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline - - -class AnchoreImporterPipeline(VulnerableCodeBaseImporterPipeline): - """Collect advisories from Anchore's NVD overrides.""" - - pipeline_id = "anchore_importer" - root_url = "https://github.com/anchore/nvd-data-overrides" - license_url = "https://github.com/anchore/nvd-data-overrides/blob/main/LICENSE" - spdx_license_expression = "CC0-1.0" # License of Anchore's data - importer_name = "Anchore NVD Overrides Importer" - - @classmethod - def steps(cls): - return ( - cls.collect_and_store_advisories, - cls.import_new_advisories, - ) - - def advisories_count(self) -> int: - raw_data = self.fetch_data() - return len(raw_data) - - def collect_advisories(self) -> Iterable[AdvisoryData]: - raw_data = self.fetch_data() - for entry in raw_data: - yield self.parse_advisory_data(entry) - - def fetch_data(self): - """Fetch Anchore's NVD overrides from their GitHub repository.""" - url = "https://raw.githubusercontent.com/anchore/nvd-data-overrides/main/overrides.yaml" - response = requests.get(url) - response.raise_for_status() - return yaml.safe_load(response.text) # Correct YAML parsing - - def parse_advisory_data(self, raw_data) -> AdvisoryData: - """Parse a single advisory entry into an AdvisoryData object.""" - # Ensure required fields are present - if not all(key in raw_data for key in ["cve_id", "package_name", "affected_versions"]): - return None - - purl = PackageURL(type="generic", name=raw_data["package_name"]) - affected_version_range = raw_data["affected_versions"] # Use raw version range string - fixed_version = ( - SemverVersion(raw_data["fixed_version"]) if raw_data.get("fixed_version") else None - ) - - affected_package = AffectedPackage( - package=purl, - affected_version_range=affected_version_range, - fixed_version=fixed_version, - ) - - references = [ - Reference(url=url) for url in raw_data.get("references", []) if url - ] - date_published = ( - datetime.strptime(raw_data["published_date"], "%Y-%m-%d") - if raw_data.get("published_date") - else None - ) - - return AdvisoryData( - aliases=[raw_data["cve_id"]], - summary=raw_data.get("description", ""), - affected_packages=[affected_package], - references=references, - date_published=date_published, - ) \ No newline at end of file From 64b3efd9e92f2765ad82a616ab00387646b92ff6 Mon Sep 17 00:00:00 2001 From: Shrish Mishra <127573425+Dedsec0098@users.noreply.github.com> Date: Sat, 15 Feb 2025 14:24:45 +0530 Subject: [PATCH 6/6] Removed unwanted changes in __init__.py Signed-off-by: Shrish0098 --- vulnerabilities/importers/__init__.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/vulnerabilities/importers/__init__.py b/vulnerabilities/importers/__init__.py index 6141156d3..a1e1faa8c 100644 --- a/vulnerabilities/importers/__init__.py +++ b/vulnerabilities/importers/__init__.py @@ -35,7 +35,6 @@ from vulnerabilities.importers import liferay from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline from vulnerabilities.pipelines import alpine_linux_importer -from vulnerabilities.pipelines import anchore_importer from vulnerabilities.pipelines import github_importer from vulnerabilities.pipelines import gitlab_importer from vulnerabilities.pipelines import nginx_importer @@ -80,7 +79,6 @@ pysec_importer.PyPIImporterPipeline, alpine_linux_importer.AlpineLinuxImporterPipeline, anchore_importer.AnchoreImporterPipeline, - liferay.LiferayImporter, ] IMPORTERS_REGISTRY = {