Skip to content

Commit

Permalink
remove
Browse files Browse the repository at this point in the history
  • Loading branch information
srossross committed Feb 27, 2025
1 parent 6e4a3dc commit 1053898
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 8 deletions.
6 changes: 3 additions & 3 deletions score/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def pypi(package_name):
def pypi_score(package_name, source_url: Optional[str] = None):
package_data = get_pypi_package_data_cached(package_name)

if source_url is None:
if not source_url:
source_url = package_data.get("source_url")
source_data = None
if source_url:
Expand Down Expand Up @@ -81,7 +81,7 @@ def npm(package_name):
def npm_score(package_name, source_url: Optional[str] = None):
package_data = get_npm_package_data_cached(package_name)

if source_url is None:
if not source_url:
source_url = package_data.get("source_url")
source_data = None
if source_url:
Expand Down Expand Up @@ -114,7 +114,7 @@ def conda(channel, package_name):
def conda_score(channel, package_name, source_url: Optional[str] = None):
package_data = get_conda_package_data_cached(channel, package_name)

if source_url is None:
if not source_url:
source_url = package_data.get("source_url")
source_data = None
if source_url:
Expand Down
21 changes: 19 additions & 2 deletions score/git_vcs/license_detection.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import pandas as pd
from typing import Union
from pathlib import Path
from functools import lru_cache
from strsimpy import SorensenDice
Expand Down Expand Up @@ -116,13 +117,19 @@

def identify_license(license_content: str) -> dict:

license_content_without_copyright = "".join(
[line for line in license_content.splitlines() if not copyright_line(line)]
)

sd = SorensenDice()
similarities = []
for license_name, ref_license in get_all_licenses().items():
similarities.append(
{
"name": license_name,
"similarity": sd.similarity(license_content.strip(), ref_license),
"similarity": sd.similarity(
license_content_without_copyright.strip(), ref_license
),
}
)
similarities = pd.DataFrame(similarities).set_index("name")
Expand Down Expand Up @@ -152,12 +159,22 @@ def identify_license(license_content: str) -> dict:
license_dir = Path(__file__).parent / "licenses"


def copyright_line(line: Union[bytes | str]):
copyright = b"copyright" if isinstance(line, bytes) else "copyright"
if line.strip().lower().startswith(copyright):
return True
return False


@lru_cache
def get_all_licenses():
licenses = {}
for license_file in license_dir.glob("*"):
with open(license_file, "rb") as f:
data = f.read()
lines = f.readlines()
lines = [line for line in lines if not copyright_line(line)]
data = b"".join(lines)

data = data.decode(errors="ignore")
licenses[license_file.name] = data.strip()
return licenses
13 changes: 10 additions & 3 deletions score/score/app_score.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from datetime import timedelta

import re
from .score import safe_date_diff
from .maturity import build_maturity_score
from .legal import build_legal_score
Expand All @@ -10,13 +10,20 @@
from ..notes import Note


def pypi_normalize(name):
if not name:
return None

return re.sub(r"[-_.]+", "-", name).lower()


def score_python(package_data: dict, source_data: dict, score: Score):

if not package_data:
return

expected_name = source_data.get("py_package")
actual_name = package_data.get("name")
expected_name = pypi_normalize(source_data.get("py_package"))
actual_name = pypi_normalize(package_data.get("name"))

if not expected_name:
score.add_note(Note.NO_PROJECT_NAME)
Expand Down

0 comments on commit 1053898

Please sign in to comment.