Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

updated to create score on request #119

Merged
merged 2 commits into from
Dec 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
92 changes: 91 additions & 1 deletion score/app.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import os
from fastapi import FastAPI, Request
from datetime import timedelta
from .app_utils import (
get_conda_package_data_cached,
get_pypi_package_data_cached,
Expand All @@ -8,7 +9,16 @@
convert_numpy_types,
max_age,
)

from .score.score import safe_date_diff
from .score.maturity import build_maturity_score
from .score.health_risk import (
build_health_risk_score,
Score,
CAUTION_NEEDED,
HIGH_RISK,
MODERATE_RISK,
)
from .notes import Note

app = FastAPI()

Expand All @@ -32,6 +42,86 @@ def pypi(package_name):
return {"ecosystem": "pypi", "package_name": package_name, "data": data}


def score_python(package_data: dict, source_data: dict, score: Score):

if not package_data:
return

expected_name = source_data.get("py_package")
actual_name = package_data.get("name")

if not expected_name:
score.limit(CAUTION_NEEDED)
score.notes.append(Note.NO_PROJECT_NAME.value)
return

if expected_name != actual_name:
score.limit(HIGH_RISK)
score.notes.append(Note.PACKAGE_NAME_MISMATCH.value)

one_year = timedelta(days=365)
skew = safe_date_diff(
source_data.get("latest_commit"), package_data.get("release_date")
)
if skew and skew > one_year:
score.limit(MODERATE_RISK)
score.notes.append(Note.PACKGE_SKEW_NOT_UPDATED.value)

if skew and skew < -one_year:
score.limit(MODERATE_RISK)
score.notes.append(Note.PACKGE_SKEW_NOT_RELEASED.value)

return


def build_score(source_url, source_data, package_data):
score: dict = {
"source_url": source_url,
"packages": [],
"ecosystem_destination": {
"pypi": source_data.get("py_package"),
"npm": None,
"conda": None,
},
}

score["maturity"] = build_maturity_score(source_url, source_data)
sc = build_health_risk_score(source_data)
score_python(package_data, source_data, sc)
score["health_risk"] = sc.dict_string_notes()
score["last_updated"] = source_data["latest_commit"]

license = source_data.get("license")
if license:
score["license"] = license["license"]
score["license_kind"] = license["kind"]
score["license_modified"] = license["modified"]

return score


@app.get("/pypi/{package_name}/score")
def pypi_score(package_name):
package_data = get_pypi_package_data_cached(package_name)

source_url = package_data.get("source_url")
source_data = None
print("package_data", package_data)
if source_url:
print("fetch source")
source_data = create_git_metadata_cached(source_url)
source_data = convert_numpy_types(source_data)

score = build_score(source_url, source_data, package_data)
return {
"ecosystem": "pypi",
"package_name": package_name,
"package": package_data,
"source": source_data,
"score": score,
}


@app.get("/npm/{package_name}")
def npm(package_name):
data = get_npm_package_data_cached(package_name)
Expand Down
13 changes: 12 additions & 1 deletion score/notes.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,17 @@ def __new__(cls, *args, **kwds):
def __init__(self, note):
self.note = note

@classmethod
def lookup(cls, note_id):
if not hasattr(cls, "_lookup"):
# for k, v in vars(cls).items():
# print(k, v)
cls._lookup = {
v.value: k for k, v in vars(cls).items() if isinstance(v, Note)
}

return cls._lookup.get(note_id, f"UNKNOWN_{note_id}")

UNSAFE_GIT_PROTOCOL = "Unsafe Git Protocol"
REPO_NOT_FOUND = "Repo not found"
REPO_EMPTY = "Repository is empty"
Expand Down Expand Up @@ -66,6 +77,6 @@ def __init__(self, note):

def to_df():
return pd.DataFrame.from_records(
[(k, v.value, v.note) for k, v in vars(Note).items() if not k.startswith("_")],
[(k, v.value, v.note) for k, v in vars(Note).items() if isinstance(v, Note)],
columns=["code", "id", "note"],
).set_index("id")
36 changes: 18 additions & 18 deletions score/score/health_risk.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,14 @@ def limit(self, new_score: str):
def dict(self):
return {"value": self.value, "notes": self.notes}

def dict_string_notes(self):
return {"value": self.value, "notes": [Note.lookup(n) for n in self.notes]}

def score_contributors(git_info, score: Score):

def score_contributors(git_info: dict, score: Score):
mma_count = git_info["max_monthly_authors_count"]
recent_count = git_info["recent_authors_count"]
latest_commit = git_info.latest_commit
latest_commit = git_info["latest_commit"]

if mma_count < 3:
score.limit(CAUTION_NEEDED)
Expand All @@ -60,18 +63,19 @@ def score_contributors(git_info, score: Score):
score.notes.append(Note.LAST_COMMIT_5_YEARS.value)


def score_license(git_info, score: Score):
license_kind = git_info.license.get("kind")
modified = git_info.license.get("modified")
def score_license(git_info: dict, score: Score):
license = git_info.get("license", {})
license_kind = license.get("kind")
modified = license.get("modified")

if git_info.license.get("error"):
if license.get("error"):
score.limit(MODERATE_RISK)
note = git_info.license.get("error", Note.NO_LICENSE_INFO.value)
note = license.get("error", Note.NO_LICENSE_INFO.value)
score.notes.append(note)

elif not license_kind or license_kind == "Unknown":
score.limit(MODERATE_RISK)
note = git_info.license.get("error", Note.NO_OS_LICENSE.value)
note = license.get("error", Note.NO_OS_LICENSE.value)
score.notes.append(note)

if license_kind in LESS_PERMISSIVE_LICENSES:
Expand All @@ -83,7 +87,7 @@ def score_license(git_info, score: Score):
score.notes.append(Note.LICENSE_MODIFIED.value)


def score_python(git_info, score: Score):
def score_python(git_info: dict, score: Score):

packages = git_info.pypi_packages
expected_name = git_info.py_package
Expand All @@ -105,25 +109,21 @@ def score_python(git_info, score: Score):
return


def build_health_risk_score(git_info) -> Score:
def build_health_risk_score(git_info: dict) -> Score:
score = Score()

if git_info.error and not pd.isna(git_info.error):
if git_info.get("error") and not pd.isna(git_info["error"]):
score.value = "Unknown"
score.notes.append(git_info.error)
score.notes.append(git_info["error"])
return score

if (
git_info.first_commit == "NaT"
or pd.isna(git_info.first_commit)
or not git_info.first_commit
):
if not git_info.get("first_commit") or pd.isnull(git_info["first_commit"]):
score.value = "Placeholder"
score.notes.append(Note.NO_COMMITS.value)
return score

score_license(git_info, score)
score_contributors(git_info, score)
score_python(git_info, score)
# score_python(git_info, score)

return score
12 changes: 6 additions & 6 deletions score/score/maturity.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,24 +4,24 @@
one_year_ago = pd.Timestamp.now() - pd.DateOffset(years=1)


def build_maturity_score(source_url, git_info):
def build_maturity_score(source_url: str, git_info: dict):
score = {"value": "Mature", "notes": []}

if git_info.error and not pd.isna(git_info.error):
if git_info.get("error") and not pd.isna(git_info["error"]):
score["value"] = "Unknown"
score["notes"].append(git_info.error)
score["notes"].append(git_info["error"])
return score

if git_info.first_commit == "NaT":
if not git_info.get("first_commit") or pd.isnull(git_info["first_commit"]):
score["value"] = "Placeholder"
score["notes"].append(Note.NO_COMMITS.value)
return score

if git_info.latest_commit < one_year_ago:
if git_info["latest_commit"] < one_year_ago:
score["value"] = "Legacy"
score["notes"].append(Note.LAST_COMMIT_OVER_A_YEAR.value)

if git_info.first_commit > one_year_ago:
if git_info["first_commit"] > one_year_ago:
score["value"] = "Experimental"
score["notes"].append(Note.FIRST_COMMIT_THIS_YEAR.value)

Expand Down
5 changes: 3 additions & 2 deletions score/score/score.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,8 +119,9 @@ def build_score(source_url, row):
score["packages"].extend(
[fmt_conda(row.latest_commit, c) for c in row.conda_packages]
)
score["maturity"] = build_maturity_score(source_url, row)
score["health_risk"] = build_health_risk_score(row).dict()

score["maturity"] = build_maturity_score(source_url, row.to_dict())
score["health_risk"] = build_health_risk_score(row.to_dict()).dict()
score["timestamp"] = datetime.now()
score["last_updated"] = row.latest_commit

Expand Down
Loading