-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcrawl.py
37 lines (34 loc) · 1.09 KB
/
crawl.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
#!/usr/bin/env python3
import github3
import requests
import json
import os
import sys
index = os.getenv("CRATES_INDEX")
log_file = open("repos.txt", "w")
no_repo = open("no_repos.txt", "w")
unlic = open("unlic.txt", "w")
def handle_crate(name):
crate = requests.get("https://crates.io/api/v1/crates/{}".format(name))
j = crate.json()
if "errors" in j:
return
j = j["crate"]
if "license" in j:
if j["license"] is None:
print(name, file=unlic)
return
mit = "MIT" in j["license"]
apache = "Apache-2.0" in j["license"]
print("{} license: {}, {}, {}".format(name, mit, apache, j["license"]))
if mit ^ apache:
if "repository" in j and j["repository"] is not None and "github.com" in j["repository"]:
print("{} needs to be handled".format(name))
print(j["repository"], file=log_file)
else:
print(name, file=no_repo)
else:
print(name, file=unlic)
for dname, subdirs, files in os.walk(index):
for fname in files:
handle_crate(fname)