Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Duplicated dependencies package results #2944

Merged
merged 3 commits into from
May 5, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/packagedcode/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@
pypi.PipfileLockHandler,
pypi.PipRequirementsFileHandler,
pypi.PypiEggHandler,
pypi.PypiSdistArchiveHandler,
# pypi.PypiSdistArchiveHandler,
pypi.PypiWheelHandler,
pypi.PyprojectTomlHandler,
pypi.PythonEditableInstallationPkgInfoFile,
Expand Down
35 changes: 20 additions & 15 deletions src/packagedcode/npm.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,36 +56,40 @@ def assemble(cls, package_data, resource, codebase):
If there is no package.json, we do not have a package instance. In this
case, we yield each of the dependencies in each lock file.
"""
datafile_name_patterns = (
lockfile_names = {
'package-lock.json',
'.package-lock.json',
'npm-shrinkwrap.json',
'yarn.lock',
)
}

package_resource = None
if resource.name == 'package.json':
package_resource = resource
elif resource.name in datafile_name_patterns:
elif resource.name in lockfile_names:
if resource.has_parent():
siblings = resource.siblings(codebase)
package_resource = [r for r in siblings if r.name == 'package.json']
if package_resource:
package_resource = package_resource[0]

if package_resource:
assert len(package_resource.package_data) == 1, f'Invalid package.json for {package_resource.path}'
pkg_data = package_resource.package_data[0]
pkg_data = models.PackageData.from_dict(pkg_data)

# do we have enough to create a package?
if package_data.purl:
if pkg_data.purl:
package = models.Package.from_package_data(
package_data=package_data,
package_data=pkg_data,
datafile_path=package_resource.path,
)
package_uid = package.package_uid

if not package.license_expression:
package.license_expression = compute_normalized_license(package.declared_license)

root = resource.parent(codebase)
root = package_resource.parent(codebase)
if root:
for npm_res in cls.walk_npm(resource=root, codebase=codebase):
if package_uid not in npm_res.for_packages:
Expand All @@ -96,27 +100,28 @@ def assemble(cls, package_data, resource, codebase):
if package_uid not in package_resource.for_packages:
package_resource.for_packages.append(package_uid)
package_resource.save(codebase)
yield package_resource

# Always yield the package resource in all cases
yield package_resource
yield package
else:
# we have no package, so deps are not for a specific package uid
package_uid = None

# in all cases yield possible dependencies
yield from yield_dependencies_from_package_data(package_data, package_resource.path, package_uid)
yield from yield_dependencies_from_package_data(pkg_data, package_resource.path, package_uid)

# we yield this as we do not want this further processed
yield package_resource

for sibling in package_resource.siblings(codebase):
if sibling.name in datafile_name_patterns:
yield from yield_dependencies_from_package_resource(sibling, package_uid)
for lock_file in package_resource.siblings(codebase):
if lock_file.name in lockfile_names:
yield from yield_dependencies_from_package_resource(lock_file, package_uid)

if package_uid not in sibling.for_packages:
sibling.for_packages.append(package_uid)
sibling.save(codebase)
yield sibling
if package_uid not in lock_file.for_packages:
lock_file.for_packages.append(package_uid)
lock_file.save(codebase)
yield lock_file
else:
# we do not have a package.json
yield from yield_dependencies_from_package_resource(resource)
Expand Down
176 changes: 107 additions & 69 deletions src/packagedcode/pypi.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#

import ast
from distutils.core import setup
import io
import json
import logging
Expand Down Expand Up @@ -132,66 +133,128 @@ class BaseExtractedPythonLayout(BasePypiHandler):
def assemble(cls, package_data, resource, codebase):
# a source distribution can have many manifests
datafile_name_patterns = (
'PKG-INFO',
'setup.py',
'setup.cfg',
'Pipfile.lock',
'Pipfile',
) + PipRequirementsFileHandler.path_patterns

# TODO: we want PKG-INFO first, then (setup.py, setup.cfg), then pyproject.toml for poetry
# then we have the rest of the lock files (pipfile, pipfile.lock, etc.)

package_resource = None
if resource.name in datafile_name_patterns:
if resource.name == 'PKG-INFO':
package_resource = resource

elif resource.name in datafile_name_patterns:
if resource.has_parent():
siblings = resource.siblings(codebase)
package_resource = [r for r in siblings if r.name == 'PKG-INFO']
if package_resource:
package_resource = package_resource[0]

package = None
if package_resource:
# do we have enough to create a package?
if package_data.purl:
pkg_data = package_resource.package_data[0]
pkg_data = models.PackageData.from_dict(pkg_data)
if pkg_data.purl:
package = models.Package.from_package_data(
package_data=package_data,
package_data=pkg_data,
datafile_path=package_resource.path,
)
package_uid = package.package_uid

if not package.license_expression:
package.license_expression = compute_normalized_license(package.declared_license)

root = package_resource.parent(codebase)
if root:
for py_res in root.walk(codebase):
if py_res.is_dir:
continue
if package_uid not in py_res.for_packages:
py_res.for_packages.append(package_uid)
py_res.save(codebase)
yield py_res
elif codebase.has_single_resource:
if package_uid not in package_resource.for_packages:
package_resource.for_packages.append(package_uid)
package_resource.save(codebase)
yield package_resource
yield package
else:
# we have no package, so deps are not for a specific package uid
package_uid = None
package_resource.for_packages.append(package.package_uid)
package_resource.save(codebase)
yield package_resource

# in all cases yield possible dependencies
yield from yield_dependencies_from_package_data(package_data, package_resource.path, package_uid)
yield package_resource
yield from yield_dependencies_from_package_data(
package_data=pkg_data,
datafile_path=package_resource.path,
package_uid=package.package_uid
)
else:
setup_resources = []
if resource.has_parent():
siblings = resource.siblings(codebase)
setup_resources = [r for r in siblings if r.name in ('setup.py', 'setup.cfg')]
setup_package_data = [
(setup_resource, models.PackageData.from_dict(setup_resource.package_data[0]))
for setup_resource in setup_resources
]
setup_package_data = sorted(setup_package_data, key=lambda s: bool(s[1].purl), reverse=True)
for setup_resource, setup_pkg_data in setup_package_data:
if setup_pkg_data.purl:
if not package:
package = models.Package.from_package_data(
package_data=setup_pkg_data,
datafile_path=setup_resource.path,
)
package_resource = setup_resource
else:
package.update(setup_pkg_data, setup_resource.path)
if package:
for setup_resource, setup_pkg_data in setup_package_data:
setup_resource.for_packages.append(package.package_uid)
setup_resource.save(codebase)
yield setup_resource

yield from yield_dependencies_from_package_data(
package_data=setup_pkg_data,
datafile_path=setup_resource.path,
package_uid=package.package_uid
)

if package:
if not package.license_expression:
package.license_expression = compute_normalized_license(package.declared_license)
package_uid = package.package_uid

root = package_resource.parent(codebase)
if root:
for py_res in cls.walk_pypi(resource=root, codebase=codebase):
if py_res.is_dir:
continue
if package_uid not in py_res.for_packages:
py_res.for_packages.append(package_uid)
py_res.save(codebase)
yield py_res
elif codebase.has_single_resource:
if package_uid not in package_resource.for_packages:
package_resource.for_packages.append(package_uid)
package_resource.save(codebase)

for sibling in package_resource.siblings(codebase):
if sibling.name in datafile_name_patterns:
yield from yield_dependencies_from_package_resource(sibling, package_uid)
yield package

if package_uid not in sibling.for_packages:
sibling.for_packages.append(package_uid)
sibling.save(codebase)
yield sibling
else:
yield from yield_dependencies_from_package_resource(resource)
package_uid = None

for sibling in package_resource.siblings(codebase):
if sibling.name in datafile_name_patterns:
yield from yield_dependencies_from_package_resource(
resource=sibling,
package_uid=package_uid
)

if package_uid and package_uid not in sibling.for_packages:
sibling.for_packages.append(package_uid)
sibling.save(codebase)
yield sibling

@classmethod
def assign_package_to_resources(cls, package, resource, codebase):
return models.DatafileHandler.assign_package_to_parent_tree(package, resource, codebase)
def walk_pypi(cls, resource, codebase):
"""
Walk the ``codebase`` Codebase top-down, breadth-first starting from the
``resource`` Resource.

Skip the directory named "site-packages": this avoids
reporting nested vendored packages as being part of their parent.
Instead they will be reported on their own.
"""
for child in resource.children(codebase):
if child.name == 'site-packages':
continue

yield child

if child.is_dir:
for subchild in cls.walk_pypi(child, codebase):
yield subchild


class PythonSdistPkgInfoFile(BaseExtractedPythonLayout):
Expand Down Expand Up @@ -697,31 +760,6 @@ def parse(cls, location):


class PipRequirementsFileHandler(BaseDependencyFileHandler):
"""
A pip requirements (or constraints) file.

Some example::
>>> PipRequirementsFileHandler.is_datafile('dev-requirements.txt', _bare_filename=True)
True
>>> PipRequirementsFileHandler.is_datafile('requirements.txt', _bare_filename=True)
True
>>> PipRequirementsFileHandler.is_datafile('requirement.txt', _bare_filename=True)
True
>>> PipRequirementsFileHandler.is_datafile('requirements.in', _bare_filename=True)
True
>>> PipRequirementsFileHandler.is_datafile('requirements.pip', _bare_filename=True)
True
>>> PipRequirementsFileHandler.is_datafile('requirements-dev.txt', _bare_filename=True)
True
>>> PipRequirementsFileHandler.is_datafile('some-requirements-dev.txt', _bare_filename=True)
True
>>> PipRequirementsFileHandler.is_datafile('requires.txt', _bare_filename=True)
True
>>> PipRequirementsFileHandler.is_datafile('requirements/base.txt', _bare_filename=True)
True
>>> PipRequirementsFileHandler.is_datafile('reqs.txt', _bare_filename=True)
True
"""
datasource_id = 'pip_requirements'

path_patterns = (
Expand Down
2 changes: 0 additions & 2 deletions tests/cluecode/data/copyrights/AliasDotCom_Website.html.yml
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
what:
- copyrights
- holders

copyrights:
- Copyright (c) AliasDotCom Ltd.

holders:
- AliasDotCom Ltd.
10 changes: 3 additions & 7 deletions tests/cluecode/data/copyrights/copr.txt.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,13 @@ what:
- holders
- copyrights_summary
- holders_summary

copyrights:
- copr. (c) Foobar Pvt. Ltd.

holders:
- Foobar Pvt. Ltd.

copyrights_summary:
- value: copr. (c) Foobar Pvt. Ltd.
count: 1

holders_summary:
- value: Foobar Pvt. Ltd.
count: 1
copyrights_summary:
- value: copr. (c) Foobar Pvt. Ltd.
count: 1
16 changes: 7 additions & 9 deletions tests/cluecode/data/copyrights/copyright_in_docstring.py.yml
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
what:
- copyrights
- holders

copyrights:
- Copr. (c) 1999 Random Corp. Ltd.

holders:
- Random Corp. Ltd.
what:
- copyrights
- holders
copyrights:
- Copr. (c) 1999 Random Corp. Ltd.
holders:
- Random Corp. Ltd.
34 changes: 15 additions & 19 deletions tests/cluecode/data/copyrights/copyright_without_icon.txt.yml
Original file line number Diff line number Diff line change
@@ -1,19 +1,15 @@
what:
- copyrights
- holders
- copyrights_summary
- holders_summary

copyrights:
- Copyright Foobar Pvt. Ltd

holders:
- Foobar Pvt. Ltd

copyrights_summary:
- value: Copyright Foobar Pvt. Ltd
count: 1

holders_summary:
- value: Foobar Pvt. Ltd
count: 1
what:
- copyrights
- holders
- copyrights_summary
- holders_summary
copyrights:
- Copyright Foobar Pvt. Ltd
holders:
- Foobar Pvt. Ltd
holders_summary:
- value: Foobar Pvt. Ltd
count: 1
copyrights_summary:
- value: Copyright Foobar Pvt. Ltd
count: 1
6 changes: 3 additions & 3 deletions tests/cluecode/data/copyrights/gibberish_holdername.c.yml
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
what:
- copyrights
- holders
what:
- copyrights
- holders
Loading