Skip to content

Commit

Permalink
check wiki keys revamping
Browse files Browse the repository at this point in the history
  • Loading branch information
kauevestena committed Oct 28, 2024
1 parent f4c1678 commit 1e18dc3
Show file tree
Hide file tree
Showing 4 changed files with 43 additions and 22 deletions.
20 changes: 10 additions & 10 deletions data_quality/check_wiki_keys.py
Original file line number Diff line number Diff line change
@@ -1,27 +1,27 @@
import sys
sys.path.append('oswm_codebase')

sys.path.append("oswm_codebase")
from functions import *

tags_dict = read_json('quality_check/feature_keys.json')
tags_dict = read_json("quality_check/feature_keys.json")

wiki_absence_dict = {}


for category in tags_dict:
wiki_absence_dict[category] = []
for osm_key in tags_dict[category]:
print('testing ',osm_key)
print("testing ", osm_key)

if not check_if_wikipage_exists(osm_key):
print(' ',osm_key,' absent!!')
print(" ", osm_key, " absent!!")
wiki_absence_dict[category].append(osm_key)


dump_json(wiki_absence_dict,'quality_check/keys_without_wiki.json')
dump_json(wiki_absence_dict, "quality_check/keys_without_wiki.json")

# to record data aging:
record_datetime('Wiki check for keys')
sleep(.1)
record_datetime("Wiki check for keys")
sleep(0.1)

# generate the "report" of the updating info
gen_updating_infotable_page()
gen_updating_infotable_page()
25 changes: 25 additions & 0 deletions filtering_adapting_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,14 +72,37 @@
[sidewalks_big_unary_buffer, crossings_big_unary_buffer]
)

# to store the keys present in raw data:
raw_data_keys = {}

# removing entries that arent in the buffer:
# dealing with the data:
for category in gdf_dict:

# creating the reference:
curr_gdf = gdf_dict[category]

print(category)

print(" - Creating dict of OSM keys in data")

raw_data_keys[category] = [
k
for k in gdf_dict[category].keys()
if k
not in [
"geometry",
"osmid",
"osm_type",
"osm_key",
"osm_value",
"osm_id",
"nodes",
"element_type",
"id",
]
]

if (category != "sidewalks") and (category != "other_footways"):
print(f" - Removing unconnected features")

Expand Down Expand Up @@ -265,6 +288,8 @@

save_geoparquet(curr_gdf, f"data/{category}" + data_format)

# saving the keys in data:
dump_json(raw_data_keys, feat_keys_path)

print("Finishing...")

Expand Down
17 changes: 6 additions & 11 deletions functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -545,20 +545,15 @@ def print_relevant_columnamesV2(
return as_list


def check_if_wikipage_exists(
name, category="Key:", wiki_page="https://wiki.openstreetmap.org/wiki/"
):

def check_if_wikipage_exists(name, category="Key:", wiki_page="https://wiki.openstreetmap.org/wiki/"):
url = f"{wiki_page}{category}{name}"
try:
response = requests.head(url)
return response.status_code == 200
except requests.RequestException:
return False

while True:
try:
status = requests.head(url).status_code
break
except:
pass

return status == 200


"""
Expand Down
3 changes: 2 additions & 1 deletion runners/weekly.sh
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
python oswm_codebase/getting_feature_versioning_data.py || echo "getting_feature_versioning_data.py failed"
python oswm_codebase/getting_feature_versioning_data.py || echo "getting_feature_versioning_data.py failed"; \
python oswm_codebase/data_quality/check_wiki_keys.py || echo "check_wiki_keys.py failed"

0 comments on commit 1e18dc3

Please sign in to comment.