Skip to content

Commit

Permalink
Merge pull request #19 from mozilla-services/unique-entity-hosts-16
Browse files Browse the repository at this point in the history
for #16: check duplicate entities in json_verify
  • Loading branch information
groovecoder authored Aug 25, 2016
2 parents 0290ddb + b8105c1 commit 12e0ffb
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 42 deletions.
36 changes: 0 additions & 36 deletions disconnect-entitylist.json
Original file line number Diff line number Diff line change
Expand Up @@ -920,16 +920,6 @@
"dmtry.com"
]
},
"AdOn Network": {
"properties": [
"adonnetwork.com",
"dashboardad.net"
],
"resources": [
"adonnetwork.com",
"dashboardad.net"
]
},
"AdOnion": {
"properties": [
"adonion.com"
Expand Down Expand Up @@ -2009,14 +1999,6 @@
"pulsemgr.com"
]
},
"BuzzCity": {
"properties": [
"buzzcity.com"
],
"resources": [
"buzzcity.com"
]
},
"BuzzParadise": {
"properties": [
"buzzparadise.com"
Expand Down Expand Up @@ -3275,14 +3257,6 @@
"widgetserver.com"
]
},
"Flurry": {
"properties": [
"flurry.com"
],
"resources": [
"flurry.com"
]
},
"Flytxt": {
"properties": [
"flytxt.com"
Expand Down Expand Up @@ -5720,14 +5694,12 @@
},
"QUISMA": {
"properties": [
"i-behavior.com",
"iaded.com",
"quisma.com",
"quismatch.com",
"xmladed.com"
],
"resources": [
"i-behavior.com",
"iaded.com",
"quisma.com",
"quismatch.com",
Expand Down Expand Up @@ -9358,14 +9330,6 @@
"trumba.com"
]
},
"Tumblr": {
"properties": [
"tumblr.com"
],
"resources": [
"tumblr.com"
]
},
"Turn": {
"properties": [
"turn.com"
Expand Down
35 changes: 29 additions & 6 deletions scripts/json_verify.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@
parser.add_argument("-f", "--file", help="filename to verify")

bad_uris = []
dupe_hosts = {
"properties": [],
"resources": []
}
errors = []
file_contents = []
file_name = ""
Expand Down Expand Up @@ -119,15 +123,23 @@ def find_uris(categories_json):


def find_uris_in_entities(entitylist_json):
checked_uris = {
"properties": [],
"resources": []
}
assert len(entitylist_json.items()) > 0
assert type(entitylist_json) is DictType
for entity, types in entitylist_json.iteritems():
assert type(entity) is UnicodeType
assert type(types) is DictType
for prop_type, uris in types.iteritems():
assert prop_type in ["properties", "resources"]
for host_type, uris in types.iteritems():
assert host_type in ["properties", "resources"]
assert type(uris) is ListType
[check_uri(uri) for uri in uris]
for uri in uris:
if uri in checked_uris[host_type]:
dupe_hosts[host_type].append(uri)
check_uri(uri)
checked_uris[host_type].append(uri)


def check_uri(uri):
Expand All @@ -142,7 +154,9 @@ def check_uri(uri):
parsed_uri = urlparse(uri)
try:
assert parsed_uri.scheme == ''
# domains of urls without schemes are parsed into 'path'
# domains of urls without schemes are parsed into 'path' so check path
# for port
assert ':' not in parsed_uri.path
assert parsed_uri.netloc == ''
assert parsed_uri.params == ''
assert parsed_uri.query == ''
Expand All @@ -169,9 +183,13 @@ def find_line_number(uri):


def make_errors_from_bad_uris():
for x in range(0, len(bad_uris)):
for bad_uri in bad_uris:
errors.append("\tError: Bad URI: %s\t: in line %s" %
(bad_uris[x], find_line_number(bad_uris[x])))
(bad_uri, find_line_number(bad_uri)))
for host_type, hosts in dupe_hosts.iteritems():
for host in hosts:
errors.append("\tDupe: Dupe host: %s\t in line %s" %
(host, find_line_number(host)))


def finish():
Expand All @@ -190,6 +208,11 @@ def finish():
def reset():
global bad_uris
bad_uris = []
global dupe_hosts
dupe_hosts = {
"properties": [],
"resources": []
}
global errors
errors = []
global file_contents
Expand Down

0 comments on commit 12e0ffb

Please sign in to comment.