Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Upgrade the 'Document' class to the SPDX 2.1 specification #71

Merged
merged 4 commits into from
Aug 7, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 14 additions & 1 deletion data/SPDXRdfExample.rdf
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
xmlns:j.0="http://usefulinc.com/ns/doap#"
xmlns="http://spdx.org/rdf/terms#"
xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#">
<SpdxDocument rdf:about="http://www.spdx.org/tools#SPDXANALYSIS">
<SpdxDocument rdf:about="https://spdx.org/spdxdocs/spdx-example-444504E0-4F89-41D3-9A0C-0305E82C3301#SPDXRef-DOCUMENT">
<name>Sample_Document-V2.1</name>
<creationInfo>
<CreationInfo>
<created>2010-02-03T00:00:00Z</created>
Expand All @@ -14,6 +15,18 @@
</CreationInfo>
</creationInfo>
<specVersion>SPDX-2.1</specVersion>
<externalDocumentRef>
<ExternalDocumentRef>
<externalDocumentId>DocumentRef-spdx-tool-2.1</externalDocumentId>
<spdxDocument rdf:resource="https://spdx.org/spdxdocs/spdx-tools-v2.1-3F2504E0-4F89-41D3-9A0C-0305E82C3301"/>
<checksum>
<Checksum>
<checksumValue>d6a770ba38583ed4bb4525bd96e50461655d2759</checksumValue>
<algorithm rdf:resource="http://spdx.org/rdf/terms#checksumAlgorithm_sha1"/>
</Checksum>
</checksum>
</ExternalDocumentRef>
</externalDocumentRef>
<referencesFile>
<File rdf:nodeID="A0">
<licenseConcluded>
Expand Down
4 changes: 4 additions & 0 deletions data/SPDXSimpleTag.tag
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
# Document info
SPDXVersion: SPDX-2.1
DataLicense: CC0-1.0
DocumentName: Sample_Document-V2.1
SPDXID: SPDXRef-DOCUMENT
DocumentNamespace: https://spdx.org/spdxdocs/spdx-example-444504E0-4F89-41D3-9A0C-0305E82C3301
DocumentComment: <text>Sample Comment</text>
ExternalDocumentRef:DocumentRef-spdx-tool-2.1 https://spdx.org/spdxdocs/spdx-tools-v2.1-3F2504E0-4F89-41D3-9A0C-0305E82C3301 SHA1: d6a770ba38583ed4bb4525bd96e50461655d2759

# Creation info
Creator: Person: Bob (bob@example.com)
Expand Down
3 changes: 3 additions & 0 deletions data/SPDXTagExample.tag
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
SPDXVersion: SPDX-2.1
DataLicense: CC0-1.0
DocumentName: Sample_Document-V2.1
SPDXID: SPDXRef-DOCUMENT
DocumentNamespace: https://spdx.org/spdxdocs/spdx-example-444504E0-4F89-41D3-9A0C-0305E82C3301
DocumentComment: <text>This is a sample spreadsheet</text>

## Creation Information
Expand Down
144 changes: 141 additions & 3 deletions spdx/document.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,78 @@
from spdx import config


@total_ordering
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good usage of this 👍

class ExternalDocumentRef(object):
"""
External Document References entity that contains the following fields :
- external_document_id: A unique string containing letters, numbers, '.',
'-' or '+'.
- spdx_document_uri: The unique ID of the SPDX document being referenced.
- check_sum: The checksum of the referenced SPDX document.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why check_sum instead of checksum?

"""

def __init__(self, external_document_id=None, spdx_document_uri=None,
check_sum=None):
self.external_document_id = external_document_id
self.spdx_document_uri = spdx_document_uri
self.check_sum = check_sum

def __eq__(self, other):
return (
isinstance(other, ExternalDocumentRef)
and self.external_document_id == other.external_document_id
and self.spdx_document_uri == other.spdx_document_uri
and self.check_sum == other.check_sum
)

def __lt__(self, other):
return (
(self.external_document_id, self.spdx_document_uri,
self.check_sum) <
(other.external_document_id, other.spdx_document_uri,
other.check_sum,)
)

def validate(self, messages=None):
"""
Validate all fields of the ExternalDocumentRef class and update the
messages list with user friendly error messages for display.
"""
messages = messages if messages is not None else []

return (self.validate_ext_doc_id(messages) and
self.validate_spdx_doc_uri(messages) and
self.validate_chksum(messages)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

being consistent would be nice: use checksum everywhere?

)

def validate_ext_doc_id(self, messages=None):
messages = messages if messages is not None else []

if self.external_document_id:
return True
else:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This else is not needed.... but this is minor.

messages.append('ExternalDocumentRef has no External Document ID.')
return False

def validate_spdx_doc_uri(self, messages=None):
messages = messages if messages is not None else []

if self.spdx_document_uri:
return True
else:
messages.append('ExternalDocumentRef has no SPDX Document URI.')
return False

def validate_chksum(self, messages=None):
messages = messages if messages is not None else []

if self.check_sum:
return True
else:
messages.append('ExternalDocumentRef has no Checksum.')
return False


def _add_parens(required, text):
"""
Add parens around a license expression if `required` is True, otherwise
Expand Down Expand Up @@ -189,7 +261,13 @@ class Document(object):
Represent an SPDX document with these fields:
- version: Spec version. Mandatory, one - Type: Version.
- data_license: SPDX-Metadata license. Mandatory, one. Type: License.
- name: Name of the document. Mandatory, one. Type: str.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we really need to make this mandatory?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I had referred to https://spdx.org/sites/cpstandard/files/pages/files/spdxversion2.1.pdf#page=11 where the Cardinality is Mandatory. So, what should we do ?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

leave as is.

- spdx_id: SPDX Identifier for the document to refer to itself in
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we really need to make this mandatory too?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I had referred to https://spdx.org/sites/cpstandard/files/pages/files/spdxversion2.1.pdf#page=10 where the Cardinality is Mandatory. So, what should we do ?

relationship to other elements. Mandatory, one. Type: str.
- ext_document_references: External SPDX documents referenced within the
given SPDX document. Optional, one or many. Type: ExternalDocumentRef
- comment: Comments on the SPDX file, optional one. Type: str
- namespace: SPDX document specific namespace. Mandatory, one. Type: str
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we really need to make this mandatory again?

Note that your doc does not seem to be what happens: this does not seem mandatory in the code.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

    def validate_namespace(self, messages=None):
        # FIXME: messages should be returned
        messages = messages if messages is not None else []

        if self.namespace is None:
            messages.append('Document has no namespace.')
            return False
        else:
            return True

has been defined which will throw an error if the namespace is not defined :)
Do you want me to add additional tests ?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

that's ok for now

- creation_info: SPDX file creation info. Mandatory, one. Type: CreationInfo
- package: Package described by this document. Mandatory, one. Type: Package
- extracted_licenses: List of licenses extracted that are not part of the
Expand All @@ -198,12 +276,17 @@ class Document(object):
Type: Review.
"""

def __init__(self, version=None, data_license=None, comment=None, package=None):
def __init__(self, version=None, data_license=None, name=None, spdx_id=None,
namespace=None, comment=None, package=None):
# avoid recursive impor
from spdx.creationinfo import CreationInfo
self.version = version
self.data_license = data_license
self.name = name
self.spdx_id = spdx_id
self.ext_document_references = []
self.comment = comment
self.namespace = namespace
self.creation_info = CreationInfo()
self.package = package
self.extracted_licenses = []
Expand All @@ -215,6 +298,9 @@ def add_review(self, review):
def add_extr_lic(self, lic):
self.extracted_licenses.append(lic)

def add_ext_document_reference(self, ext_doc_ref):
self.ext_document_references.append(ext_doc_ref)

@property
def files(self):
return self.package.files
Expand All @@ -237,11 +323,14 @@ def validate(self, messages=None):

return (self.validate_version(messages)
and self.validate_data_lics(messages)
and self.validate_name(messages)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we likely want the validation to be strict or not.... Note that this PR can still go through and this can be fixed afterwards

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm planning to handle the validation part after Phase 1 ? Is that okay ?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes

and self.validate_spdx_id(messages)
and self.validate_namespace(messages)
and self.validate_ext_document_references(messages)
and self.validate_creation_info(messages)
and self.validate_package(messages)
and self.validate_extracted_licenses(messages)
and self.validate_reviews(messages)
)
and self.validate_reviews(messages))

def validate_version(self, messages=None):
# FIXME: messages should be returned
Expand All @@ -268,6 +357,55 @@ def validate_data_lics(self, messages=None):
messages.append('Document data license must be CC0-1.0.')
return False

def validate_name(self, messages=None):
# FIXME: messages should be returned
messages = messages if messages is not None else []

if self.name is None:
messages.append('Document has no name.')
return False
else:
return True

def validate_namespace(self, messages=None):
# FIXME: messages should be returned
messages = messages if messages is not None else []

if self.namespace is None:
messages.append('Document has no namespace.')
return False
else:
return True

def validate_spdx_id(self, messages=None):
# FIXME: messages should be returned
messages = messages if messages is not None else []

if self.spdx_id is None:
messages.append('Document has no SPDX Identifier.')
return False

if self.spdx_id.endswith('SPDXRef-DOCUMENT'):
return True
else:
messages.append('Invalid Document SPDX Identifier value.')
return False

def validate_ext_document_references(self, messages=None):
# FIXME: messages should be returned
messages = messages if messages is not None else []
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could a simpler messages = messages or [] be enough here and elsewhere?


valid = True
for doc in self.ext_document_references:
if isinstance(doc, ExternalDocumentRef):
valid = doc.validate(messages) and valid
else:
messages.append(
'External document references must be of the type '
'spdx.document.ExternalDocumentRef and not ' + type(doc))
valid = False
return valid

def validate_reviews(self, messages=None):
# FIXME: messages should be returned
messages = messages if messages is not None else []
Expand Down
22 changes: 21 additions & 1 deletion spdx/parsers/lexers/tagvalue.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,11 @@ class Lexer(object):
# Top level fields
'SPDXVersion': 'DOC_VERSION',
'DataLicense': 'DOC_LICENSE',
'DocumentName': 'DOC_NAME',
'SPDXID': 'DOC_SPDX_ID',
'DocumentComment': 'DOC_COMMENT',
'DocumentNamespace': 'DOC_NAMESPACE',
'ExternalDocumentRef': 'EXT_DOC_REF',
# Creation info
'Creator': 'CREATOR',
'Created': 'CREATED',
Expand Down Expand Up @@ -83,7 +87,8 @@ class Lexer(object):

tokens = ['TEXT', 'TOOL_VALUE', 'UNKNOWN_TAG',
'ORG_VALUE', 'PERSON_VALUE',
'DATE', 'LINE', 'CHKSUM'] + list(reserved.values())
'DATE', 'LINE', 'CHKSUM', 'DOC_REF_ID',
'DOC_URI', 'EXT_DOC_REF_CHKSUM'] + list(reserved.values())

def t_text(self, t):
r':\s*<text>'
Expand Down Expand Up @@ -112,6 +117,21 @@ def t_CHKSUM(self, t):
t.value = t.value[1:].strip()
return t

def t_DOC_REF_ID(self, t):
r':\s*DocumentRef-([A-Za-z0-9\+\.\-]+)'
t.value = t.value[1:].strip()
return t

def t_DOC_URI(self, t):
r'\s*((ht|f)tps?:\/\/\S*)'
t.value = t.value.strip()
return t

def t_EXT_DOC_REF_CHKSUM(self, t):
r'\s*SHA1:\s*[a-f0-9]{40,40}'
t.value = t.value[1:].strip()
return t

def t_TOOL_VALUE(self, t):
r':\s*Tool:.+'
t.value = t.value[1:].strip()
Expand Down
64 changes: 63 additions & 1 deletion spdx/parsers/rdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,14 @@
ERROR_MESSAGES = {
'DOC_VERS_VALUE': 'Invalid specVersion \'{0}\' must be SPDX-M.N where M and N are numbers.',
'DOC_D_LICS': 'Invalid dataLicense \'{0}\' must be http://spdx.org/licenses/CC0-1.0.',
'DOC_SPDX_ID_VALUE': 'Invalid SPDXID value, SPDXID must be the document namespace appended '
'by "#SPDXRef-DOCUMENT", line: {0}',
'DOC_NAMESPACE_VALUE': 'Invalid DocumentNamespace value {0}, must contain a scheme (e.g. "https:") '
'and should not contain the "#" delimiter.',
'LL_VALUE': 'Invalid licenseListVersion \'{0}\' must be of the format N.N where N is a number',
'CREATED_VALUE': 'Invalid created value \'{0}\' must be date in ISO 8601 format.',
'CREATOR_VALUE': 'Invalid creator value \'{0}\' must be Organization, Tool or Person.',
'EXT_DOC_REF_VALUE': 'Failed to extract {0} from ExternalDocumentRef.',
'PKG_SUPPL_VALUE': 'Invalid package supplier value \'{0}\' must be Organization, Person or NOASSERTION.',
'PKG_ORIGINATOR_VALUE': 'Invalid package supplier value \'{0}\' must be Organization, Person or NOASSERTION.',
'PKG_DOWN_LOC': 'Invalid package download location value \'{0}\' must be a url or NONE or NOASSERTION',
Expand Down Expand Up @@ -742,6 +747,9 @@ def parse(self, fil):
for s, _p, o in self.graph.triples((None, RDF.type, self.spdx_namespace['SpdxDocument'])):
self.parse_doc_fields(s)

for s, _p, o in self.graph.triples((None, RDF.type, self.spdx_namespace['ExternalDocumentRef'])):
self.parse_ext_doc_ref(s)

for s, _p, o in self.graph.triples((None, RDF.type, self.spdx_namespace['CreationInfo'])):
self.parse_creation_info(s)

Expand Down Expand Up @@ -799,7 +807,20 @@ def parse_creation_info(self, ci_term):
self.value_error('LL_VALUE', o)

def parse_doc_fields(self, doc_term):
"""Parses the version, data license and comment."""
"""Parses the version, data license, name, SPDX Identifier, namespace,
and comment."""
try:
self.builder.set_doc_spdx_id(self.doc, doc_term)
except SPDXValueError:
self.value_error('DOC_SPDX_ID_VALUE', doc_term)
try:
if doc_term.count('#', 0, len(doc_term)) <= 1:
doc_namespace = doc_term.split('#')[0]
self.builder.set_doc_namespace(self.doc, doc_namespace)
else:
self.value_error('DOC_NAMESPACE_VALUE', doc_term)
except SPDXValueError:
self.value_error('DOC_NAMESPACE_VALUE', doc_term)
for _s, _p, o in self.graph.triples((doc_term, self.spdx_namespace['specVersion'], None)):
try:
self.builder.set_doc_version(self.doc, six.text_type(o))
Expand All @@ -816,9 +837,50 @@ def parse_doc_fields(self, doc_term):
except CardinalityError:
self.more_than_one_error('dataLicense')
break
for _s, _p, o in self.graph.triples(
(doc_term, self.spdx_namespace['name'], None)):
try:
self.builder.set_doc_name(self.doc, six.text_type(o))
except CardinalityError:
self.more_than_one_error('name')
break
for _s, _p, o in self.graph.triples((doc_term, RDFS.comment, None)):
try:
self.builder.set_doc_comment(self.doc, six.text_type(o))
except CardinalityError:
self.more_than_one_error('Document comment')
break

def parse_ext_doc_ref(self, ext_doc_ref_term):
"""
Parses the External Document ID, SPDX Document URI and Checksum.
"""
for _s, _p, o in self.graph.triples(
(ext_doc_ref_term,
self.spdx_namespace['externalDocumentId'],
None)):
try:
self.builder.set_ext_doc_id(self.doc, six.text_type(o))
except SPDXValueError:
self.value_error('EXT_DOC_REF_VALUE', 'External Document ID')
break

for _s, _p, o in self.graph.triples(
(ext_doc_ref_term,
self.spdx_namespace['spdxDocument'],
None)):
try:
self.builder.set_spdx_doc_uri(self.doc, six.text_type(o))
except SPDXValueError:
self.value_error('EXT_DOC_REF_VALUE', 'SPDX Document URI')
break

for _s, _p, checksum in self.graph.triples(
(ext_doc_ref_term, self.spdx_namespace['checksum'], None)):
for _, _, value in self.graph.triples(
(checksum, self.spdx_namespace['checksumValue'], None)):
try:
self.builder.set_chksum(self.doc, six.text_type(value))
except SPDXValueError:
self.value_error('EXT_DOC_REF_VALUE', 'Checksum')
break
Loading