diff --git a/tools/vuxml/README.md b/tools/vuxml/README.md new file mode 100644 index 00000000..fc03e7c2 --- /dev/null +++ b/tools/vuxml/README.md @@ -0,0 +1,51 @@ +# VuXML advisory converter + +This is relevant to FreeBSD's ports, and possibly any other project using VuXML +in order to track vulnerabilities. + +## Prerequisites + +Clone the following repository: +- https://git.freebsd.org/ports.git + +Install the following packages or modules: +- vuxml +- python-lxml + +## Running the converter + +### Usage + +From VuXML to OSV format: + +``` +Usage: convert_vuxml.py [-e ecosystem][-o output_directory] path/to/vuln.xml +``` + +Where the VuXML vulnerabilities are either provided in a sequence of JSON data +on the standard output, or output to individual files in the output directory. + +From OSV format to VuXML: + +``` +Usage: convert_osv.py [-o output_file] path/to/osv.json... +``` + +Where the OSV files provided are consolidated into a single VuXML file. + +#### Options +`-e`: +Set a specific ecosystem in the converted output to OSV files (default: +FreeBSD:ports) + +`-o`: +Output directory to place the converted OSV `.json` files (the directory must +exist and have write permissions), or output filename where to write the +converted VuXML file. + +### Example + +``` +$ python3.9 convert_vuxml.py /usr/ports/security/vuxml/vuln.xml +$ python3.9 convert_osv.py 002432c8-ef6a-11ea-ba8f-08002728f74c.json +``` diff --git a/tools/vuxml/convert_osv.py b/tools/vuxml/convert_osv.py new file mode 100644 index 00000000..115c6742 --- /dev/null +++ b/tools/vuxml/convert_osv.py @@ -0,0 +1,270 @@ +#!/usr/bin/env python +# +# Copyright (C) 1994-2024 The FreeBSD Project. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +# SUCH DAMAGE. +# +# Copyright (c) 2024 The FreeBSD Foundation +# +# Portions of this software were developed by Pierre Pronchery +# at Defora Networks GmbH under sponsorship +# from the FreeBSD Foundation. + +"""VuXML to OSV converter.""" +import getopt +import json +from lxml import etree +import sys + +namespace_vuxml = "{http://www.vuxml.org/apps/vuxml-1}" +namespace_xhtml = "{http://www.w3.org/1999/xhtml}" + +url_bid = "https://www.securityfocus.com/bid/" +url_certsa = "https://www.cert.org/advisories/" +url_certvu = "https://www.kb.cert.org/vuls/id/" +url_cve = "https://api.osv.dev/v1/vulns/" +url_freebsd_bugzilla = "https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=" +url_freebsd_sa = "https://www.freebsd.org/security/advisories/FreeBSD-" + + +# convert +def convert(filename, vuxml): + ret = 0 + + try: + with open(filename, "r") as f: + j = json.load(f) + vuln = etree.Element("vuln", vid=j["id"]) + vuxml.append(vuln) + + # topic + topic = etree.Element("topic") + topic.text = j["summary"] + vuln.append(topic) + + # description + if "details" in j: + description = etree.Element("description") + body = etree.Element(namespace_xhtml+"body") + body.text = j["details"] + description.append(body) + vuln.append(description) + + # affects + if "affected" in j: + for affected in j["affected"]: + affects = None + package = None + if "package" in affected \ + and "name" in affected["package"]: + affects = etree.Element("affects") + package = etree.Element("package") + name = etree.Element("name") + name.text = affected["package"]["name"] + package.append(name) + affects.append(package) + if affects is not None \ + and "package" in affected \ + and "name" in affected["package"] \ + and "versions" in affected: + for version in affected["versions"]: + rnge = etree.Element("range") + eq = etree.Element("eq") + eq.text = version + rnge.append(eq) + package.append(rnge) + if affects is not None \ + and "package" in affected \ + and "name" in affected["package"] \ + and "ranges" in affected: + for r in affected["ranges"]: + if "type" in r \ + and r["type"] == "SEMVER" \ + and "events" in r: + rnge = etree.Element("range") + for event in r["events"]: + for k, v in event.items(): + if k == "introduced" and v != "0": + ge = etree.Element("ge") + ge.text = v + rnge.append(ge) + elif k == "fixed": + lt = etree.Element("lt") + lt.text = v + rnge.append(lt) + elif k == "last_affected": + le = etree.Element("le") + le.text = v + rnge.append(le) + if len(rnge) >= 1: + package.append(rnge) + if affects is not None: + vuln.append(affects) + + # references + references = etree.Element("references") + if "references" in j: + for ref in j["references"]: + if ref["type"] == "ADVISORY": + if ref["url"].startswith(url_bid): + r = etree.Element("bid") + url = ref["url"][len(url_bid):] + if url.endswith("/info"): + url = url[:-5] + r.text = url + references.append(r) + elif ref["url"].startswith(url_freebsd_sa): + r = etree.Element("freebsdsa") + url = ref["url"][len(url_freebsd_sa):] + if url.endswith(".asc"): + url = url[:-4] + r.text = url + references.append(r) + elif ref["url"].startswith(url_certsa): + r = etree.Element("certsa") + url = ref["url"][len(url_certsa):] + if url.endswith(".html"): + url = url[:-5] + r.text = url + references.append(r) + elif ref["url"].startswith(url_certvu): + r = etree.Element("certvu") + r.text = ref["url"][len(url_certvu):] + references.append(r) + elif ref["url"].startswith(url_cve): + r = etree.Element("cvename") + r.text = ref["url"][len(url_cve):] + references.append(r) + else: + r = etree.Element("url") + r.text = ref["url"] + references.append(r) + elif ref["type"] == "REPORT": + if ref["url"].startswith(url_freebsd_bugzilla): + r = etree.Element("freebsdpr") + r.text = ref["url"][len(url_freebsd_bugzilla):] + references.append(r) + else: + r = etree.Element("url") + r.text = ref["url"] + references.append(r) + else: + r = etree.Element("url") + r.text = ref["url"] + references.append(r) + if len(references): + vuln.append(references) + + # dates + dates = etree.Element("dates") + entry = j["modified"][0:10] + discovery = entry + modified = None + if "published" in j: + modified = entry + entry = j["published"][0:10] + if "database_specific" in j \ + and "discovery" in j["database_specific"]: + discovery = j["database_specific"]["discovery"][0:10] + date = etree.Element("discovery") + date.text = discovery + dates.append(date) + date = etree.Element("entry") + date.text = entry + dates.append(date) + if modified is not None: + date = etree.Element("modified") + date.text = modified + dates.append(date) + vuln.append(dates) + + # cancelled + if "withdrawn" in dates: + cancelled = etree.Element("cancelled") + vuln.append(cancelled) + except Exception as e: + ret = error(e) + return ret + + +# error +def error(string): + print(f"{sys.argv[0]}: error: {string}", file=sys.stderr) + return 2 + + +# usage +def usage(e=None): + if e is not None: + print(e, file=sys.stderr) + print("Usage: %s [-o output.xml] vuln.json..." + % sys.argv[0], file=sys.stderr) + return 1 + + +# warn +def warn(string): + print(f"{sys.argv[0]}: warning: {string}", file=sys.stderr) + + +# main +def main(): + ret = 0 + + try: + opts, args = getopt.getopt(sys.argv[1:], "o:") + except getopt.GetoptError as e: + return usage(e) + output = None + for name, optarg in opts: + if name == "-o": + output = optarg + else: + return usage("%s: Unsupported option" % name) + + if len(args) < 1: + return usage() + + vuxml = etree.Element(namespace_vuxml+"vuxml") + for arg in args: + if convert(arg, vuxml) != 0: + ret = 2 + break + + if ret == 0: + try: + xml = etree.tostring(vuxml, pretty_print=True) + if output is not None: + with open(output, "w") as f: + print(""" +"""+xml.decode(), file=f) + else: + print(""" +"""+xml.decode()) + except Exception as e: + ret = error(e) + + return ret + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/tools/vuxml/convert_vuxml.py b/tools/vuxml/convert_vuxml.py new file mode 100644 index 00000000..45d9764a --- /dev/null +++ b/tools/vuxml/convert_vuxml.py @@ -0,0 +1,323 @@ +#!/usr/bin/env python +# +# Copyright (C) 1994-2024 The FreeBSD Project. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +# SUCH DAMAGE. +# +# Copyright (c) 2024 The FreeBSD Foundation +# +# Portions of this software were developed by Pierre Pronchery +# at Defora Networks GmbH under sponsorship +# from the FreeBSD Foundation. + +"""VuXML to OSV converter.""" +import datetime +import getopt +import json +from lxml import etree +import re +import sys + +re_date = re.compile(r'^(19|20)[0-9]{2}-[0-9]{2}-[0-9]{2}$') +re_invalid_package_name = re.compile('[@!#$%^&*()<>?/\\|}{~:]') + +# warn if description has more than X characters +DESCRIPTION_LENGTH = 5000 + +namespace = "{http://www.vuxml.org/apps/vuxml-1}" + +url_advisories = [ + "https://cve.mitre.org/cgi-bin/cvename.cgi?name=", + "https://nvd.nist.gov/vuln/detail/", + "https://github.com/advisories/", + "https://www.debian.org/security/" + ] +url_bid = "https://www.securityfocus.com/bid/%s/info" +url_certsa = "https://www.cert.org/advisories/%s.html" +url_certvu = "https://www.kb.cert.org/vuls/id/%s" +url_cve = "https://api.osv.dev/v1/vulns/%s" +url_freebsd_bugzilla = "https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=%s" +url_freebsd_sa = "https://www.freebsd.org/security/advisories/FreeBSD-%s.asc" +url_reports = [ + "https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=", + "http://bugzilla.mozilla.org/show_bug.cgi?id=", + "https://bugzilla.mozilla.org/show_bug.cgi?id=", + "https://bugzilla.redhat.com/show_bug.cgi?id=", + "https://bugzilla.suse.com/show_bug.cgi?id=" + ] + + +# dateof +def dateof(string): + return datetime.datetime.strptime(string, "%Y-%m-%d").isoformat()+"Z" + + +# error +def error(string): + print(f"{sys.argv[0]}: error: {string}", file=sys.stderr) + return 2 + + +# usage +def usage(e=None): + if e is not None: + print(e, file=sys.stderr) + print("Usage: %s [-e ecosystem][-o output_directory] vuln.xml" + % sys.argv[0], file=sys.stderr) + return 1 + + +# warn +def warn(string): + print(f"{sys.argv[0]}: warning: {string}", file=sys.stderr) + + +# main +def main(): + try: + opts, args = getopt.getopt(sys.argv[1:], "e:o:") + except getopt.GetoptError as e: + return usage(e) + ecosystem = "FreeBSD:ports" + output = None + for name, optarg in opts: + if name == "-e": + ecosystem = optarg + elif name == "-o": + output = optarg + else: + return usage("%s: Unsupported option" % name) + + if len(args) != 1: + return usage() + + parser = etree.XMLParser(dtd_validation=True) + tree = etree.parse(args[0], parser) + root = tree.getroot() + + ret = 0 + + entries = [] + for vuln in root: + if vuln.find(namespace+"cancelled") is not None: + continue + + # id + vid = vuln.get("vid") + entry = {"schema_version": "1.2.0", "id": vid} + + # modified + try: + d = vuln.find(namespace+"dates").find(namespace+"entry").text + if not re_date.match(d): + ret = error("entry date not in YYYY-MM-DD format: {0}" + .format(d)) + raise + else: + dates_entry = dateof(d) + except Exception as e: + dates_entry = None + try: + d = vuln.find(namespace+"dates").find(namespace+"modified").text + if not re_date.match(d): + ret = error("modified date not in YYYY-MM-DD format: {0}" + .format(d)) + raise + else: + dates_modified = dateof(d) + except Exception as e: + dates_modified = None + if dates_modified is not None: + entry["modified"] = dates_modified + elif dates_entry is not None: + entry["modified"] = dates_entry + if dates_entry is not None: + entry["published"] = dates_entry + + # summary + try: + summary = vuln.find(namespace+"topic").text + except Exception as e: + ret = error(f"{vid} has no topic") + summary = None + if summary is not None: + entry["summary"] = summary + + # details + details = vuln.find(namespace+"description") + if details is None: + ret = error(f"{vid} has no description") + else: + try: + details = etree.tostring(details, encoding='unicode', + method='text') + if len(details) > DESCRIPTION_LENGTH: + warn("%s: description truncated (> %s)" + % (vid, DESCRIPTION_LENGTH)) + details = details[0:DESCRIPTION_LENGTH] + except Exception as e: + ret = error("%s could not parse description: %s: %s" + % (vid, type(e).__name__, e)) + details = None + if details is not None: + entry["details"] = details + + # references + references = [] + refs = vuln.find(namespace+"references") + for ref in refs: + if ref.text is None or len(ref.text) == 0: + continue + if ref.tag == namespace+"bid": + reference = {"type": "ADVISORY", "url": url_bid % ref.text} + elif ref.tag == namespace+"certsa": + reference = {"type": "ADVISORY", "url": url_certsa % ref.text} + elif ref.tag == namespace+"certvu": + reference = {"type": "ADVISORY", "url": url_certvu % ref.text} + elif ref.tag == namespace+"cvename": + reference = {"type": "ADVISORY", "url": url_cve % ref.text} + elif ref.tag == namespace+"freebsdpr" \ + and len(ref.text.split("/")) == 2: + id = ref.text.split("/")[1] + reference = {"type": "REPORT", + "url": url_freebsd_bugzilla % id} + elif ref.tag == namespace+"freebsdsa": + reference = {"type": "ADVISORY", + "url": url_freebsd_sa % ref.text} + elif ref.tag == namespace+"mlist": + reference = {"type": "DISCUSSION", "url": ref.text} + elif ref.tag == namespace+"url": + reference = {"type": "WEB", "url": ref.text} + for prefix in url_advisories: + if str(ref.text).startswith(prefix): + reference["type"] = "ADVISORY" + break + if reference["type"] == "WEB": + for prefix in url_reports: + if str(ref.text).startswith(prefix): + reference["type"] = "REPORT" + break + else: + continue + references.append(reference) + if len(references) > 0: + entry["references"] = references + + # affected + affected = [] + affects = vuln.find(namespace+"affects") + for package in affects.findall(namespace+"package"): + + # affected: package + for name in package.findall(namespace+"name"): + a = {} + if re_invalid_package_name.search(name.text) is not None: + ret = error("%s package with invalid name: %s" + % (vid, name.text)) + continue + p = {"ecosystem": ecosystem, "name": name.text} + a["package"] = p + + # affected: ranges + try: + ranges = [] + versions = [] + for e in package.findall(namespace+"range"): + events = [] + semver = {"type": "SEMVER"} + + # affected: ranges + event = {} + ge = e.find(namespace+"ge") + if ge is not None and len(ge.text) > 0 and ge.text != "*": + event["introduced"] = ge.text + gt = e.find(namespace+"gt") + if gt is not None and len(gt.text) > 0 and gt.text != "*": + # FIXME not accurate!!1 + event["introduced"] = gt.text+",1" + le = e.find(namespace+"le") + if le is not None and len(le.text) > 0 and le.text != "*": + event["last_affected"] = le.text + lt = e.find(namespace+"lt") + if lt is not None and len(lt.text) > 0 and lt.text != "*": + event["fixed"] = lt.text + if "fixed" in event or "introduced" in event: + if "introduced" not in event: + event["introduced"] = "0" + for k, v in event.items(): + events.append({k: v}) + + # affected: versions + eq = e.find(namespace+"eq") + if eq is not None and len(eq.text) > 0 and eq.text != "*": + versions.append(eq.text) + + if len(events) > 0: + semver["events"] = events + ranges.append(semver) + except Exception as e: + warn(e, file=sys.stderr) + ranges = [] + if len(ranges) > 0: + a["ranges"] = ranges + if len(versions) > 0: + a["versions"] = versions + + if len(a) > 0: + affected.append(a) + if len(affected) > 0: + entry["affected"] = affected + + # database_specific + database_specific = {} + try: + d = vuln.find(namespace+"dates").find(namespace+"discovery").text + if not re_date.match(d): + ret = error("discovery date not in YYYY-MM-DD format: {0}" + .format(d)) + raise + else: + dates_discovery = dateof(d) + except Exception as e: + dates_discovery = None + if dates_discovery is not None: + database_specific["discovery"] = dates_discovery + if len(database_specific) > 0: + entry["database_specific"] = database_specific + + if output is not None: + try: + with open(output+f"/{vid}.json", "w") as f: + print(json.dumps(entry, indent=4), file=f) + except Exception as e: + ret = error(e) + else: + entries.append(entry) + + if output is None: + print(json.dumps(entries, indent=4)) + + return ret + + +if __name__ == "__main__": + sys.exit(main())