From dca1bcd5c155f1522e1657b4d5a75cc3601970f3 Mon Sep 17 00:00:00 2001 From: yash-nisar Date: Tue, 7 Aug 2018 15:31:00 +0800 Subject: [PATCH] Add 'spdx_id' field to the 'File' class Signed-off-by: Yash Nisar --- data/SPDXRdfExample.rdf | 4 +- data/SPDXSimpleTag.tag | 1 + data/SPDXTagExample.tag | 2 + spdx/file.py | 18 +++++++- spdx/parsers/lexers/tagvalue.py | 2 +- spdx/parsers/rdf.py | 12 +++++ spdx/parsers/tagvalue.py | 55 ++++++++++++++--------- spdx/parsers/tagvaluebuilders.py | 21 +++++++++ spdx/parsers/validations.py | 9 ++++ spdx/writers/rdf.py | 3 +- spdx/writers/tagvalue.py | 1 + tests/data/doc_write/rdf-simple-plus.json | 5 ++- tests/data/doc_write/rdf-simple.json | 11 +++-- tests/data/doc_write/tv-simple-plus.tv | 1 + tests/data/doc_write/tv-simple.tv | 1 + tests/test_document.py | 3 ++ tests/test_tag_value_parser.py | 4 +- 17 files changed, 120 insertions(+), 33 deletions(-) diff --git a/data/SPDXRdfExample.rdf b/data/SPDXRdfExample.rdf index 4fe4484e5..3f27b36d1 100644 --- a/data/SPDXRdfExample.rdf +++ b/data/SPDXRdfExample.rdf @@ -28,7 +28,7 @@ - + > /* @@ -97,7 +97,7 @@ - + Copyright 2010, 2011 Source Auditor Inc. diff --git a/data/SPDXSimpleTag.tag b/data/SPDXSimpleTag.tag index db1c12ca9..8992411cd 100644 --- a/data/SPDXSimpleTag.tag +++ b/data/SPDXSimpleTag.tag @@ -46,6 +46,7 @@ PackageLicenseComments: License Comments # File Info FileName: testfile.java +SPDXID: SPDXRef-File FileType: SOURCE FileChecksum: SHA1: 2fd4e1c67a2d28fced849ee1bb76e7391b93eb12 LicenseConcluded: Apache-2.0 diff --git a/data/SPDXTagExample.tag b/data/SPDXTagExample.tag index b471a70b2..2de752f80 100644 --- a/data/SPDXTagExample.tag +++ b/data/SPDXTagExample.tag @@ -50,6 +50,7 @@ PackageLicenseComments: The declared license information can be found in t ## File Information FileName: src/org/spdx/parser/DOAPProject.java +SPDXID: SPDXRef-File1 FileType: SOURCE FileChecksum: SHA1: 2fd4e1c67a2d28fced849ee1bb76e7391b93eb12 LicenseConcluded: Apache-2.0 @@ -57,6 +58,7 @@ LicenseInfoInFile: Apache-2.0 FileCopyrightText: Copyright 2010, 2011 Source Auditor Inc. FileName: Jenna-2.6.3/jena-2.6.3-sources.jar +SPDXID: SPDXRef-File2 FileType: ARCHIVE FileChecksum: SHA1: 3ab4e1c67a2d28fced849ee1bb76e7391b93f125 LicenseConcluded: LicenseRef-1 diff --git a/spdx/file.py b/spdx/file.py index 868207125..b69185f0a 100644 --- a/spdx/file.py +++ b/spdx/file.py @@ -36,6 +36,8 @@ class File(object): Represent an SPDX file. Fields: - name: File name, str mandatory one. + - spdx_id: Uniquely identify any element in an SPDX document which may be + referenced by other elements. Mandatory, one. Type: str. - comment: File comment str, Optional zero or one. - type: one of FileType.SOURCE, FileType.BINARY, FileType.ARCHIVE and FileType.OTHER, optional zero or one. @@ -54,8 +56,9 @@ class File(object): - artifact_of_project_uri: list of project uris, possibly empty. """ - def __init__(self, name, chk_sum=None): + def __init__(self, name, spdx_id=None, chk_sum=None): self.name = name + self.spdx_id = spdx_id self.comment = None self.type = None self.chk_sum = chk_sum @@ -105,7 +108,18 @@ def validate(self, messages=None): and self.validate_chksum(messages) and self.validate_licenses_in_file(messages) and self.validate_copyright(messages) - and self.validate_artifacts(messages)) + and self.validate_artifacts(messages) + and self.validate_spdx_id(messages)) + + def validate_spdx_id(self, messages=None): + # FIXME: messages should be returned + messages = messages if messages is not None else [] + + if self.spdx_id is None: + messages.append('File has no SPDX Identifier.') + return False + else: + return True def validate_copyright(self, messages=None): # FIXME: messages should be returned diff --git a/spdx/parsers/lexers/tagvalue.py b/spdx/parsers/lexers/tagvalue.py index 8e26be215..747d86266 100644 --- a/spdx/parsers/lexers/tagvalue.py +++ b/spdx/parsers/lexers/tagvalue.py @@ -22,7 +22,7 @@ class Lexer(object): 'SPDXVersion': 'DOC_VERSION', 'DataLicense': 'DOC_LICENSE', 'DocumentName': 'DOC_NAME', - 'SPDXID': 'DOC_SPDX_ID', + 'SPDXID': 'SPDX_ID', 'DocumentComment': 'DOC_COMMENT', 'DocumentNamespace': 'DOC_NAMESPACE', 'ExternalDocumentRef': 'EXT_DOC_REF', diff --git a/spdx/parsers/rdf.py b/spdx/parsers/rdf.py index dac52d152..43e68e788 100644 --- a/spdx/parsers/rdf.py +++ b/spdx/parsers/rdf.py @@ -48,6 +48,8 @@ 'LICS_LIST_MEMBER' : 'Declaritive or Conjunctive license set member must be a license url or identifier', 'PKG_SINGLE_LICS' : 'Package concluded license must be a license url or spdx:noassertion or spdx:none.', 'PKG_LICS_INFO_FILES' : 'Package licenseInfoFromFiles must be a license or spdx:none or spdx:noassertion', + 'FILE_SPDX_ID_VALUE': 'SPDXID must be "SPDXRef-[idstring]" where [idstring] is a unique string containing ' + 'letters, numbers, ".", "-".', 'FILE_TYPE' : 'File type must be binary, other, source or archive term.', 'FILE_SINGLE_LICS': 'File concluded license must be a license url or spdx:noassertion or spdx:none.', 'REVIEWER_VALUE' : 'Invalid reviewer value \'{0}\' must be Organization, Tool or Person.', @@ -515,6 +517,7 @@ def parse_file(self, f_term): for _, _, name in self.graph.triples((f_term, self.spdx_namespace['fileName'], None)): self.builder.set_file_name(self.doc, six.text_type(name)) + self.p_file_spdx_id(f_term, self.spdx_namespace['File']) self.p_file_type(f_term, self.spdx_namespace['fileType']) self.p_file_chk_sum(f_term, self.spdx_namespace['checksum']) self.p_file_lic_conc(f_term, self.spdx_namespace['licenseConcluded']) @@ -613,6 +616,15 @@ def p_file_lic_info(self, f_term, predicate): if lic is not None: self.builder.set_file_license_in_file(self.doc, lic) + def p_file_spdx_id(self, f_term, predicate): + try: + try: + self.builder.set_file_spdx_id(self.doc, f_term) + except SPDXValueError: + self.value_error('FILE_SPDX_ID_VALUE', f_term) + except CardinalityError: + self.more_than_one_error('FILE_SPDX_ID_VALUE') + def p_file_type(self, f_term, predicate): """Sets file type.""" try: diff --git a/spdx/parsers/tagvalue.py b/spdx/parsers/tagvalue.py index 1961a3f72..c8e46fcdd 100644 --- a/spdx/parsers/tagvalue.py +++ b/spdx/parsers/tagvalue.py @@ -71,6 +71,8 @@ 'FILE_NAME_VALUE': 'FileName must be a single line of text, line: {0}', 'FILE_COMMENT_VALUE': 'FileComment must be free form text, line:{0}', 'FILE_TYPE_VALUE': 'FileType must be one of OTHER, BINARY, SOURCE or ARCHIVE, line: {0}', + 'FILE_SPDX_ID_VALUE': 'SPDXID must be "SPDXRef-[idstring]" where [idstring] is a unique string containing ' + 'letters, numbers, ".", "-".', 'FILE_CHKSUM_VALUE': 'FileChecksum must be a single line of text starting with \'SHA1:\', line:{0}', 'FILE_LICS_CONC_VALUE': 'LicenseConcluded must be NOASSERTION, NONE, license identifier or license list, line:{0}', 'FILE_LICS_INFO_VALUE': 'LicenseInfoInFile must be NOASSERTION, NONE or license identifier, line: {0}', @@ -113,9 +115,9 @@ def p_start_2(self, p): def p_attrib(self, p): """attrib : spdx_version + | spdx_id | data_lics | doc_name - | doc_spdx_id | ext_doc_ref | doc_comment | doc_namespace @@ -539,6 +541,17 @@ def p_file_name_2(self, p): msg = ERROR_MESSAGES['FILE_NAME_VALUE'].format(p.lineno(1)) self.logger.log(msg) + def p_spdx_id(self, p): + """spdx_id : SPDX_ID LINE""" + if six.PY2: + value = p[2].decode(encoding='utf-8') + else: + value = p[2] + if not self.builder.doc_spdx_id_set: + self.builder.set_doc_spdx_id(self.document, value) + else: + self.builder.set_file_spdx_id(self.document, value) + def p_file_comment_1(self, p): """file_comment : FILE_COMMENT TEXT""" try: @@ -1148,26 +1161,26 @@ def p_doc_name_2(self, p): msg = ERROR_MESSAGES['DOC_NAME_VALUE'].format(p.lineno(1)) self.logger.log(msg) - def p_doc_spdx_id_1(self, p): - """doc_spdx_id : DOC_SPDX_ID LINE""" - try: - if six.PY2: - value = p[2].decode(encoding='utf-8') - else: - value = p[2] - self.builder.set_doc_spdx_id(self.document, value) - except SPDXValueError: - self.error = True - msg = ERROR_MESSAGES['DOC_SPDX_ID_VALUE'].format(p.lineno(2)) - self.logger.log(msg) - except CardinalityError: - self.more_than_one_error('SPDXID', p.lineno(1)) - - def p_doc_spdx_id_2(self, p): - """doc_spdx_id : DOC_SPDX_ID error""" - self.error = True - msg = ERROR_MESSAGES['DOC_SPDX_ID_VALUE'].format(p.lineno(1)) - self.logger.log(msg) + # def p_doc_spdx_id_1(self, p): + # """doc_spdx_id : DOC_SPDX_ID LINE""" + # try: + # if six.PY2: + # value = p[2].decode(encoding='utf-8') + # else: + # value = p[2] + # self.builder.set_doc_spdx_id(self.document, value) + # except SPDXValueError: + # self.error = True + # msg = ERROR_MESSAGES['DOC_SPDX_ID_VALUE'].format(p.lineno(2)) + # self.logger.log(msg) + # except CardinalityError: + # self.more_than_one_error('SPDXID', p.lineno(1)) + # + # def p_doc_spdx_id_2(self, p): + # """doc_spdx_id : DOC_SPDX_ID error""" + # self.error = True + # msg = ERROR_MESSAGES['DOC_SPDX_ID_VALUE'].format(p.lineno(1)) + # self.logger.log(msg) def p_ext_doc_refs_1(self, p): """ext_doc_ref : EXT_DOC_REF DOC_REF_ID DOC_URI EXT_DOC_REF_CHKSUM""" diff --git a/spdx/parsers/tagvaluebuilders.py b/spdx/parsers/tagvaluebuilders.py index e5b4da932..265e0fd4f 100644 --- a/spdx/parsers/tagvaluebuilders.py +++ b/spdx/parsers/tagvaluebuilders.py @@ -725,6 +725,26 @@ def set_file_name(self, doc, name): else: raise OrderError('File::Name') + def set_file_spdx_id(self, doc, spdx_id): + """ + Sets the file SPDX Identifier. + Raises OrderError if no package or no file defined. + Raises SPDXValueError if malformed value. + Raises CardinalityError if more than one spdx_id set. + """ + if self.has_package(doc) and self.has_file(doc): + if not self.file_spdx_id_set: + self.file_spdx_id_set = True + if validations.validate_file_spdx_id(spdx_id): + self.file(doc).spdx_id = spdx_id + return True + else: + raise SPDXValueError('File::SPDXID') + else: + raise CardinalityError('File::SPDXID') + else: + raise OrderError('File::SPDXID') + def set_file_comment(self, doc, text): """ Raises OrderError if no package or no file defined. @@ -916,6 +936,7 @@ def has_package(self, doc): def reset_file_stat(self): """Resets the builder's state to enable building new files.""" # FIXME: this state does not make sense + self.file_spdx_id_set = False self.file_comment_set = False self.file_type_set = False self.file_chksum_set = False diff --git a/spdx/parsers/validations.py b/spdx/parsers/validations.py index deaceffa8..0bf659286 100644 --- a/spdx/parsers/validations.py +++ b/spdx/parsers/validations.py @@ -151,6 +151,15 @@ def validate_pkg_lics_comment(value, optional=False): return validate_is_free_form_text(value, optional) +def validate_file_spdx_id(value, optional=False): + value = value.split('#')[-1] + TEXT_RE = re.compile(r'SPDXRef-([A-Za-z0-9.\-]+)', re.UNICODE) + if value is None: + return optional + else: + return TEXT_RE.match(value) is not None + + def validate_file_comment(value, optional=False): return validate_is_free_form_text(value, optional) diff --git a/spdx/writers/rdf.py b/spdx/writers/rdf.py index 2047cace8..43e7cdc59 100644 --- a/spdx/writers/rdf.py +++ b/spdx/writers/rdf.py @@ -13,6 +13,7 @@ from __future__ import absolute_import from __future__ import print_function from __future__ import unicode_literals +import uuid from rdflib import BNode from rdflib import Graph @@ -208,7 +209,7 @@ def create_file_node(self, doc_file): """ Create a node for spdx.file. """ - file_node = BNode() + file_node = URIRef('http://www.spdx.org/files#' + str(doc_file.spdx_id)) type_triple = (file_node, RDF.type, self.spdx_namespace.File) self.graph.add(type_triple) diff --git a/spdx/writers/tagvalue.py b/spdx/writers/tagvalue.py index e4ca0f673..abb8c88ab 100644 --- a/spdx/writers/tagvalue.py +++ b/spdx/writers/tagvalue.py @@ -91,6 +91,7 @@ def write_file(spdx_file, out): """ out.write('# File\n\n') write_value('FileName', spdx_file.name, out) + write_value('SPDXID', spdx_file.spdx_id, out) if spdx_file.has_optional_field('type'): write_file_type(spdx_file.type, out) write_value('FileChecksum', spdx_file.chk_sum.to_tv(), out) diff --git a/tests/data/doc_write/rdf-simple-plus.json b/tests/data/doc_write/rdf-simple-plus.json index 914f3249a..11c469346 100644 --- a/tests/data/doc_write/rdf-simple-plus.json +++ b/tests/data/doc_write/rdf-simple-plus.json @@ -5,7 +5,9 @@ "ns1:SpdxDocument": { "ns1:describesPackage": { "ns1:Package": { - "ns1:hasFile": null, + "ns1:hasFile": { + "@rdf:resource": "http://www.spdx.org/files#SPDXRef-File" + }, "ns1:name": "some/path", "ns1:licenseDeclared": { "@rdf:resource": "http://spdx.org/rdf/terms#noassertion" @@ -31,6 +33,7 @@ }, "ns1:referencesFile": { "ns1:File": { + "@rdf:about": "http://www.spdx.org/files#SPDXRef-File", "ns1:fileName": "./some/path/tofile", "ns1:checksum": { "ns1:Checksum": { diff --git a/tests/data/doc_write/rdf-simple.json b/tests/data/doc_write/rdf-simple.json index a64a290b3..840d0cfaa 100644 --- a/tests/data/doc_write/rdf-simple.json +++ b/tests/data/doc_write/rdf-simple.json @@ -5,7 +5,9 @@ "ns1:SpdxDocument": { "ns1:describesPackage": { "ns1:Package": { - "ns1:hasFile": null, + "ns1:hasFile": { + "@rdf:resource": "http://www.spdx.org/files#SPDXRef-File" + }, "ns1:downloadLocation": { "@rdf:resource": "http://spdx.org/rdf/terms#noassertion" }, @@ -31,15 +33,16 @@ }, "ns1:referencesFile": { "ns1:File": { + "@rdf:about": "http://www.spdx.org/files#SPDXRef-File", "ns1:licenseInfoInFile": { "@rdf:resource": "http://spdx.org/licenses/LGPL-2.1" - }, + }, "ns1:checksum": { "ns1:Checksum": { - "ns1:checksumValue": "SOME-SHA1", + "ns1:checksumValue": "SOME-SHA1", "ns1:algorithm": "SHA1" } - }, + }, "ns1:licenseConcluded": { "@rdf:resource": "http://spdx.org/rdf/terms#noassertion" }, diff --git a/tests/data/doc_write/tv-simple-plus.tv b/tests/data/doc_write/tv-simple-plus.tv index 8e5154852..f6c264a3d 100644 --- a/tests/data/doc_write/tv-simple-plus.tv +++ b/tests/data/doc_write/tv-simple-plus.tv @@ -15,6 +15,7 @@ PackageLicenseInfoFromFiles: LGPL-2.1+ PackageCopyrightText: Some copyrught # File FileName: ./some/path/tofile +SPDXID: SPDXRef-File FileChecksum: SHA1: SOME-SHA1 LicenseConcluded: NOASSERTION LicenseInfoInFile: LGPL-2.1+ diff --git a/tests/data/doc_write/tv-simple.tv b/tests/data/doc_write/tv-simple.tv index 7698203f0..43734f6ef 100644 --- a/tests/data/doc_write/tv-simple.tv +++ b/tests/data/doc_write/tv-simple.tv @@ -15,6 +15,7 @@ PackageLicenseInfoFromFiles: LGPL-2.1 PackageCopyrightText: Some copyrught # File FileName: ./some/path/tofile +SPDXID: SPDXRef-File FileChecksum: SHA1: SOME-SHA1 LicenseConcluded: NOASSERTION LicenseInfoInFile: LGPL-2.1 diff --git a/tests/test_document.py b/tests/test_document.py index 4b860990e..8cdbcf3ac 100644 --- a/tests/test_document.py +++ b/tests/test_document.py @@ -81,6 +81,7 @@ def test_document_validate_failures_returns_informative_messages(self): pack = doc.package = Package('some/path', NoAssert()) file1 = File('./some/path/tofile') file1.name = './some/path/tofile' + file1.spdx_id = 'SPDXRef-File' file1.chk_sum = Algorithm('SHA1', 'SOME-SHA1') lic1 = License.from_identifier('LGPL-2.1') file1.add_lics(lic1) @@ -108,6 +109,7 @@ def test_document_is_valid_when_using_or_later_licenses(self): file1 = File('./some/path/tofile') file1.name = './some/path/tofile' + file1.spdx_id = 'SPDXRef-File' file1.chk_sum = Algorithm('SHA1', 'SOME-SHA1') file1.conc_lics = NoAssert() file1.copyright = NoAssert() @@ -140,6 +142,7 @@ def _get_lgpl_doc(self, or_later=False): file1 = File('./some/path/tofile') file1.name = './some/path/tofile' + file1.spdx_id = 'SPDXRef-File' file1.chk_sum = Algorithm('SHA1', 'SOME-SHA1') file1.conc_lics = NoAssert() file1.copyright = NoAssert() diff --git a/tests/test_tag_value_parser.py b/tests/test_tag_value_parser.py index f62329476..6f2f06fa9 100644 --- a/tests/test_tag_value_parser.py +++ b/tests/test_tag_value_parser.py @@ -49,7 +49,7 @@ def test_document(self): self.token_assert_helper(self.l.token(), 'DOC_NAME', 'DocumentName', 5) self.token_assert_helper(self.l.token(), 'LINE', 'Sample_Document-V2.1', 5) - self.token_assert_helper(self.l.token(), 'DOC_SPDX_ID', 'SPDXID', 6) + self.token_assert_helper(self.l.token(), 'SPDX_ID', 'SPDXID', 6) self.token_assert_helper(self.l.token(), 'LINE', 'SPDXRef-DOCUMENT', 6) self.token_assert_helper(self.l.token(), 'DOC_NAMESPACE', 'DocumentNamespace', 7) @@ -188,6 +188,7 @@ class TestParser(TestCase): file_str = '\n'.join([ 'FileName: testfile.java', + 'SPDXID: SPDXRef-File', 'FileType: SOURCE', 'FileChecksum: SHA1: 2fd4e1c67a2d28fced849ee1bb76e7391b93eb12', 'LicenseConcluded: Apache-2.0', @@ -248,6 +249,7 @@ def test_file(self): assert len(document.package.files) == 1 spdx_file = document.package.files[0] assert spdx_file.name == 'testfile.java' + assert spdx_file.spdx_id == 'SPDXRef-File' assert spdx_file.type == spdx.file.FileType.SOURCE assert len(spdx_file.artifact_of_project_name) == 1 assert len(spdx_file.artifact_of_project_home) == 1