From 7b04242c09987c7394438b9f7904a0ff35c7d21b Mon Sep 17 00:00:00 2001 From: chorng Date: Thu, 18 Apr 2024 18:41:46 +0200 Subject: [PATCH 01/80] Add function to read metadata from S3 --- requirements.txt | 2 ++ scripts/stac_vpp.py | 29 +++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+) create mode 100644 scripts/stac_vpp.py diff --git a/requirements.txt b/requirements.txt index ceaa59e..d19fa3f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,5 @@ #Libraries that your project use +boto3 pystac pystac[validation] +rasterio diff --git a/scripts/stac_vpp.py b/scripts/stac_vpp.py new file mode 100644 index 0000000..6835c7c --- /dev/null +++ b/scripts/stac_vpp.py @@ -0,0 +1,29 @@ +import io + +import boto3 +import rasterio as rio + +AWS_SESSION = boto3.Session(profile_name="hrvpp") +BUCKET = "HRVPP" +KEY = "CLMS/Pan-European/Biophysical/VPP/v01/2023/s2/VPP_2023_S2_T40KCC-010m_V105_s2_TPROD.tif" + + +def read_metadata_from_s3(bucket, key, aws_session): + s3 = aws_session.resource("s3") + obj = s3.Object(bucket, key) + body = obj.get()["Body"].read() + with rio.open(io.BytesIO(body)) as tif: + bounds = tif.bounds + crs = tif.crs + height = tif.height + width = tif.width + return {"bounds": bounds, "crs": crs, "height": height, "width": width} + + +def read_metadata_from_url(url): + with rio.open(url) as tif: + bounds = tif.bounds + crs = tif.crs + height = tif.height + width = tif.width + return {"bounds": bounds, "crs": crs, "height": height, "width": width} From 8b6259948a021500f2a675bd8ee5ea6ffe14fcfd Mon Sep 17 00:00:00 2001 From: chorng Date: Tue, 23 Apr 2024 16:16:34 +0200 Subject: [PATCH 02/80] Update vpp scripts * add vpp providers as constants * add function to get geometry in wgs84 * add function to compile description from file name * add function to get timestamp * use pystac to compile and save item --- requirements.txt | 2 ++ scripts/stac_vpp.py | 84 ++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 77 insertions(+), 9 deletions(-) diff --git a/requirements.txt b/requirements.txt index d19fa3f..a38f271 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,7 @@ #Libraries that your project use boto3 +pyproj pystac pystac[validation] rasterio +shapely diff --git a/scripts/stac_vpp.py b/scripts/stac_vpp.py index 6835c7c..4c4ea97 100644 --- a/scripts/stac_vpp.py +++ b/scripts/stac_vpp.py @@ -1,14 +1,44 @@ +from __future__ import annotations + import io +import os +from datetime import datetime +from typing import Final import boto3 +import pystac import rasterio as rio +from pyproj import Transformer +from pystac.provider import ProviderRole +from rasterio.coords import BoundingBox +from rasterio.crs import CRS +from shapely.geometry import Polygon, box, mapping AWS_SESSION = boto3.Session(profile_name="hrvpp") BUCKET = "HRVPP" KEY = "CLMS/Pan-European/Biophysical/VPP/v01/2023/s2/VPP_2023_S2_T40KCC-010m_V105_s2_TPROD.tif" +VPP_HOST_AND_LICENSOR: Final[pystac.Provider] = pystac.Provider( + name="Copernicus Land Monitoring Service", + description=( + "The Copernicus Land Monitoring Service provides geographical information on land cover and its changes, land" + " use, ground motions, vegetation state, water cycle and Earth's surface energy variables to a broad range of" + " users in Europe and across the World in the field of environmental terrestrial applications." + ), + roles=[ProviderRole.HOST, ProviderRole.LICENSOR], + url="https://land.copernicus.eu", +) +VPP_PRODUCER_AND_PROCESSOR: Final[pystac.Provider] = pystac.Provider( + name="VITO NV", + description=( + "VITO is an independent Flemish research organisation in the area of cleantech and sustainable development." + ), + roles=[ProviderRole.PROCESSOR, ProviderRole.PRODUCER], + url="https://vito.be", +) + -def read_metadata_from_s3(bucket, key, aws_session): +def read_metadata_from_s3(bucket: str, key: str, aws_session: boto3.Session) -> tuple[BoundingBox, CRS, int, int]: s3 = aws_session.resource("s3") obj = s3.Object(bucket, key) body = obj.get()["Body"].read() @@ -17,13 +47,49 @@ def read_metadata_from_s3(bucket, key, aws_session): crs = tif.crs height = tif.height width = tif.width - return {"bounds": bounds, "crs": crs, "height": height, "width": width} + return (bounds, crs, height, width, obj.last_modified) -def read_metadata_from_url(url): - with rio.open(url) as tif: - bounds = tif.bounds - crs = tif.crs - height = tif.height - width = tif.width - return {"bounds": bounds, "crs": crs, "height": height, "width": width} +def get_geom_wgs84(bounds: BoundingBox, crs: CRS) -> Polygon: + transformer = Transformer.from_crs(crs.to_epsg(), 4326) + miny, minx = transformer.transform(bounds.left, bounds.bottom) + maxy, maxx = transformer.transform(bounds.right, bounds.top) + bbox = (minx, miny, maxx, maxy) + return box(*bbox) + + +def get_description(product_id: str) -> str: + product, year, _, tile_res, version, season = product_id.split("_") + return ( + f"The {year} season {season[-1]} version {version[1:]} {product} product of tile {tile_res[:6]} at" + f" {tile_res[8:10]} m resolution." + ) + + +def get_datetime(product_id: str) -> tuple[datetime, datetime]: + year = int(product_id.split("_")[1]) + return (datetime(year=year, month=1, day=1), datetime(year=year, month=12, day=31)) + + +if __name__ == "__main__": + head, tail = os.path.split(KEY) + product_id, asset = tail.split(".")[0].rsplit("_", 1) + bounds, crs, height, width, created = read_metadata_from_s3(BUCKET, KEY, AWS_SESSION) + geom_wgs84 = get_geom_wgs84(bounds, crs) + description = get_description(product_id) + start_datetime, end_datetime = get_datetime(product_id) + + item = pystac.Item( + id=product_id, + geometry=mapping(geom_wgs84), + bbox=geom_wgs84.bounds, + datetime=None, + start_datetime=start_datetime, + end_datetime=end_datetime, + properties={"created": created.strftime("%Y-%m-%dT%H:%M:%SZ"), "description": description}, + collection="vegetation-phenology-and-productivity", + ) + + item.common_metadata.providers = [VPP_HOST_AND_LICENSOR, VPP_PRODUCER_AND_PROCESSOR] + item.set_self_href("scripts/test.json") + item.save_object() From f983231494553039c2d8d49afa93ce3b76d3b4ef Mon Sep 17 00:00:00 2001 From: chorng Date: Tue, 23 Apr 2024 16:29:22 +0200 Subject: [PATCH 03/80] Restructure /scripts directory --- scripts/{stac_vpp.py => vpp/item.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename scripts/{stac_vpp.py => vpp/item.py} (100%) diff --git a/scripts/stac_vpp.py b/scripts/vpp/item.py similarity index 100% rename from scripts/stac_vpp.py rename to scripts/vpp/item.py From df01179b5c02e918468c3ec43f341ceac8867cfa Mon Sep 17 00:00:00 2001 From: chorng Date: Tue, 23 Apr 2024 16:30:27 +0200 Subject: [PATCH 04/80] Fix test item href --- scripts/vpp/item.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/vpp/item.py b/scripts/vpp/item.py index 4c4ea97..9a3695a 100644 --- a/scripts/vpp/item.py +++ b/scripts/vpp/item.py @@ -91,5 +91,5 @@ def get_datetime(product_id: str) -> tuple[datetime, datetime]: ) item.common_metadata.providers = [VPP_HOST_AND_LICENSOR, VPP_PRODUCER_AND_PROCESSOR] - item.set_self_href("scripts/test.json") + item.set_self_href("scripts/vpp/test_item.json") item.save_object() From 166beb15248a7f66dadf3f6f973ed99442bb3ac1 Mon Sep 17 00:00:00 2001 From: Xiaoman Huang Date: Wed, 24 Apr 2024 23:34:18 +0800 Subject: [PATCH 05/80] Add vabh script --- scripts/vabh/vabh_item.py | 91 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 91 insertions(+) create mode 100644 scripts/vabh/vabh_item.py diff --git a/scripts/vabh/vabh_item.py b/scripts/vabh/vabh_item.py new file mode 100644 index 0000000..ed4a8bf --- /dev/null +++ b/scripts/vabh/vabh_item.py @@ -0,0 +1,91 @@ +from __future__ import annotations + +import os +from datetime import datetime +from typing import Final + +import pystac +import rasterio as rio +from pyproj import Transformer +from pystac.provider import ProviderRole +from rasterio.coords import BoundingBox +from rasterio.crs import CRS +from shapely.geometry import Polygon, box, mapping + +KEY = "/Users/xiaomanhuang/pl/ETCDI_STAC/uabh_samples/AT001_WIEN_UA2012_DHM_v020/data/AT001_WIEN_UA2012_DHM_V020.tif" + +VPP_HOST_AND_LICENSOR: Final[pystac.Provider] = pystac.Provider( + name="Copernicus Land Monitoring Service", + description=( + "The Copernicus Land Monitoring Service provides geographical information on land cover and its changes, land" + " use, ground motions, vegetation state, water cycle and Earth's surface energy variables to a broad range of" + " users in Europe and across the World in the field of environmental terrestrial applications." + ), + roles=[ProviderRole.HOST, ProviderRole.LICENSOR], + url="https://land.copernicus.eu", +) + +VPP_PRODUCER_AND_PROCESSOR: Final[pystac.Provider] = pystac.Provider( + name="VITO NV", + description=( + "VITO is an independent Flemish research organisation in the area of cleantech and sustainable development." + ), + roles=[ProviderRole.PROCESSOR, ProviderRole.PRODUCER], + url="https://vito.be", +) + + +def read_metadata_from_tif(key: str) -> tuple[BoundingBox, CRS, int, int]: + with rio.open(key) as tif: + bounds = tif.bounds + crs = tif.crs + height = tif.height + width = tif.width + tif.close() + return (bounds, crs, height, width) # obj.last_modified + + +def read_metadata_from_xml(key: str) -> tuple[BoundingBox, CRS, int, int]: + return (bounds, crs, height, width, created) + + +def get_geom_wgs84(bounds: BoundingBox, crs: CRS) -> Polygon: + transformer = Transformer.from_crs(crs.to_epsg(), 4326) + miny, minx = transformer.transform(bounds.left, bounds.bottom) + maxy, maxx = transformer.transform(bounds.right, bounds.top) + bbox = (minx, miny, maxx, maxy) + return box(*bbox) + + +def get_description(product_id: str) -> str: + country, city, year, product, version = product_id.split("_") + return f"{year[2:]} {city.title()} building height" + + +def get_datetime(product_id: str) -> tuple[datetime, datetime]: + year = int(product_id.split("_")[1]) + return (datetime(year=year, month=1, day=1), datetime(year=year, month=12, day=31)) + + +if __name__ == "__main__": + head, tail = os.path.split(KEY) + (product_id,) = tail.split(".")[0].rsplit("_", 0) + bounds, crs, height, width = read_metadata_from_tif(KEY) + geom_wgs84 = get_geom_wgs84(bounds, crs) + description = get_description(product_id) + start_datetime, end_datetime = get_datetime(product_id) + + item = pystac.Item( + id=product_id, + geometry=mapping(geom_wgs84), + bbox=geom_wgs84.bounds, + datetime=None, + start_datetime=start_datetime, + end_datetime=end_datetime, + properties={"created": created.strftime("%Y-%m-%dT%H:%M:%SZ"), "description": description}, + collection="urban-atlas-building-height", + ) + + item.common_metadata.providers = [VPP_HOST_AND_LICENSOR, VPP_PRODUCER_AND_PROCESSOR] + item.set_self_href("scripts/vpp/test_item.json") + item.save_object() From 58e3c1860ebaa4e921de29b8c8299f56aff28840 Mon Sep 17 00:00:00 2001 From: chorng Date: Wed, 24 Apr 2024 18:51:08 +0200 Subject: [PATCH 06/80] Update vpp item creation script * finish item creation script * fix vpp schema (wip) --- schema/products/vpp.json | 6 ++ scripts/vpp/constants.py | 35 +++++++++ scripts/vpp/item.py | 150 ++++++++++++++++++++++++++++++--------- stacs/clms_catalog.json | 2 +- 4 files changed, 158 insertions(+), 35 deletions(-) create mode 100644 scripts/vpp/constants.py diff --git a/schema/products/vpp.json b/schema/products/vpp.json index 88ee16c..27bbf24 100644 --- a/schema/products/vpp.json +++ b/schema/products/vpp.json @@ -120,6 +120,12 @@ "description": "The Copernicus Land Monitoring Service provides geographical information on land cover and its changes, land use, ground motions, vegetation state, water cycle and Earth's surface energy variables to a broad range of users in Europe and across the World in the field of environmental terrestrial applications.", "roles": ["licensor", "host"], "url": "https://land.copernicus.eu" + }, + { + "name": "VITO NV", + "description": "VITO is an independent Flemish research organisation in the area of cleantech and sustainable development.", + "roles": ["processor", "producer"], + "url": "https://vito.be" } ] }, diff --git a/scripts/vpp/constants.py b/scripts/vpp/constants.py new file mode 100644 index 0000000..4649d55 --- /dev/null +++ b/scripts/vpp/constants.py @@ -0,0 +1,35 @@ +from pystac.provider import ProviderRole +from typing import Final +import pystac +from pystac.link import Link +import os + +WORKING_DIR = os.getcwd() +VPP_HOST_AND_LICENSOR: Final[pystac.Provider] = pystac.Provider( + name="Copernicus Land Monitoring Service", + description=( + "The Copernicus Land Monitoring Service provides geographical information on land cover and its changes, land" + " use, ground motions, vegetation state, water cycle and Earth's surface energy variables to a broad range of" + " users in Europe and across the World in the field of environmental terrestrial applications." + ), + roles=[ProviderRole.LICENSOR, ProviderRole.HOST], + url="https://land.copernicus.eu", +) +VPP_PRODUCER_AND_PROCESSOR: Final[pystac.Provider] = pystac.Provider( + name="VITO NV", + description=( + "VITO is an independent Flemish research organisation in the area of cleantech and sustainable development." + ), + roles=[ProviderRole.PROCESSOR, ProviderRole.PRODUCER], + url="https://vito.be", +) +CLMS_LICENSE: Final[Link] = Link( + rel="license", + target="https://land.copernicus.eu/en/data-policy" +) +CLMS_CATALOG: Final[Link] = Link( + rel=pystac.RelType.ROOT, + target=pystac.STACObject.from_file(os.path.join(WORKING_DIR, "stacs/clms_catalog.json")) +) +PARENT: Final[Link] = Link(rel=pystac.RelType.PARENT, target=pystac.STACObject.from_file(os.path.join(WORKING_DIR, "stacs/vegetation-phenology-and-productivity/vegetation-phenology-and-productivity.json"))) +COLLECTION: Final[Link] = Link(rel=pystac.RelType.COLLECTION, target=pystac.STACObject.from_file(os.path.join(WORKING_DIR, "stacs/vegetation-phenology-and-productivity/vegetation-phenology-and-productivity.json"))) diff --git a/scripts/vpp/item.py b/scripts/vpp/item.py index 9a3695a..8893a16 100644 --- a/scripts/vpp/item.py +++ b/scripts/vpp/item.py @@ -1,41 +1,64 @@ from __future__ import annotations import io +import json import os from datetime import datetime -from typing import Final import boto3 import pystac import rasterio as rio +from constants import ( + CLMS_CATALOG, + CLMS_LICENSE, + COLLECTION, + PARENT, + VPP_HOST_AND_LICENSOR, + VPP_PRODUCER_AND_PROCESSOR, + WORKING_DIR, +) +from jsonschema import Draft7Validator +from jsonschema.exceptions import best_match from pyproj import Transformer -from pystac.provider import ProviderRole +from pystac.extensions.projection import ProjectionExtension from rasterio.coords import BoundingBox from rasterio.crs import CRS +from referencing import Registry, Resource from shapely.geometry import Polygon, box, mapping AWS_SESSION = boto3.Session(profile_name="hrvpp") BUCKET = "HRVPP" -KEY = "CLMS/Pan-European/Biophysical/VPP/v01/2023/s2/VPP_2023_S2_T40KCC-010m_V105_s2_TPROD.tif" - -VPP_HOST_AND_LICENSOR: Final[pystac.Provider] = pystac.Provider( - name="Copernicus Land Monitoring Service", - description=( - "The Copernicus Land Monitoring Service provides geographical information on land cover and its changes, land" - " use, ground motions, vegetation state, water cycle and Earth's surface energy variables to a broad range of" - " users in Europe and across the World in the field of environmental terrestrial applications." - ), - roles=[ProviderRole.HOST, ProviderRole.LICENSOR], - url="https://land.copernicus.eu", -) -VPP_PRODUCER_AND_PROCESSOR: Final[pystac.Provider] = pystac.Provider( - name="VITO NV", - description=( - "VITO is an independent Flemish research organisation in the area of cleantech and sustainable development." - ), - roles=[ProviderRole.PROCESSOR, ProviderRole.PRODUCER], - url="https://vito.be", -) +# KEY = "CLMS/Pan-European/Biophysical/VPP/v01/2023/s2/VPP_2023_S2_T40KCC-010m_V105_s2_TPROD.tif" +TITLE_MAP = { + "AMPL": "Season Amplitude", + "EOSD": "Day of End-of-Season", + "EOSV": "Vegetation Index Value at EOSD", + "LENGTH": "Length of Season", + "LSLOPE": "Slope of The Greening Up Period", + "MAXD": "Day of Maximum-of-Season", + "MAXV": "Vegetation Index Value at MAXD", + "MINV": "Average Vegetation Index Value of Minima on Left and Right Sides of Each Season", + "QFLAG": "Quality Flag", + "RSLOPE": "Slope of The Senescent Period", + "SOSD": "Day of Start-of-Season", + "SOSV": "Vegetation Index Value at SOSD", + "SPROD": "Seasonal Productivity", + "TPROD": "Total Productivity", +} + + +def create_product_list(start_year, end_year): + product_list = [] + for year in range(start_year, end_year + 1): + for season in ("s1", "s2"): + product_list.append(f"CLMS/Pan-European/Biophysical/VPP/v01/{year}/{season}/") + return product_list + + +def create_page_iterator(aws_session, bucket, prefix): + client = aws_session.client("s3") + paginator = client.get_paginator("list_objects_v2") + return paginator.paginate(Bucket=bucket, Prefix=prefix, Delimiter="-") def read_metadata_from_s3(bucket: str, key: str, aws_session: boto3.Session) -> tuple[BoundingBox, CRS, int, int]: @@ -59,11 +82,8 @@ def get_geom_wgs84(bounds: BoundingBox, crs: CRS) -> Polygon: def get_description(product_id: str) -> str: - product, year, _, tile_res, version, season = product_id.split("_") - return ( - f"The {year} season {season[-1]} version {version[1:]} {product} product of tile {tile_res[:6]} at" - f" {tile_res[8:10]} m resolution." - ) + product, year, _, tile_res, season = product_id.split("_") + return f"The {year} season {season[-1]} {product} product of tile {tile_res[:6]} at {tile_res[8:10]} m resolution." def get_datetime(product_id: str) -> tuple[datetime, datetime]: @@ -71,14 +91,29 @@ def get_datetime(product_id: str) -> tuple[datetime, datetime]: return (datetime(year=year, month=1, day=1), datetime(year=year, month=12, day=31)) -if __name__ == "__main__": - head, tail = os.path.split(KEY) - product_id, asset = tail.split(".")[0].rsplit("_", 1) - bounds, crs, height, width, created = read_metadata_from_s3(BUCKET, KEY, AWS_SESSION) +def create_asset(asset_key: str) -> pystac.Asset: + parameter = asset_key.split("_")[-1].split(".")[0] + version = asset_key.split("_")[-3] + return pystac.Asset( + href="s3://HRVPP/" + asset_key, + media_type=pystac.MediaType.GEOTIFF, + title=TITLE_MAP[parameter] + f" {version}", + roles=["data"], + ) + + +def create_item(aws_session, bucket, tile): + client = aws_session.client("s3") + parameters = client.list_objects(Bucket=bucket, Prefix=tile, Delimiter=".")["CommonPrefixes"] + asset_keys = [parameter["Prefix"] + "tif" for parameter in parameters] + _, tail = os.path.split(asset_keys[0]) + product_id = "_".join((tail[:23], tail[29:31])) + bounds, crs, height, width, created = read_metadata_from_s3(bucket, asset_keys[0], aws_session) geom_wgs84 = get_geom_wgs84(bounds, crs) description = get_description(product_id) start_datetime, end_datetime = get_datetime(product_id) + # common metadata item = pystac.Item( id=product_id, geometry=mapping(geom_wgs84), @@ -89,7 +124,54 @@ def get_datetime(product_id: str) -> tuple[datetime, datetime]: properties={"created": created.strftime("%Y-%m-%dT%H:%M:%SZ"), "description": description}, collection="vegetation-phenology-and-productivity", ) - item.common_metadata.providers = [VPP_HOST_AND_LICENSOR, VPP_PRODUCER_AND_PROCESSOR] - item.set_self_href("scripts/vpp/test_item.json") - item.save_object() + + # extensions + projection = ProjectionExtension.ext(item, add_if_missing=True) + projection.epsg = crs.to_epsg() + projection.bbox = [int(bounds.left), int(bounds.bottom), int(bounds.right), int(bounds.top)] + projection.shape = [height, width] + + # links + links = [CLMS_LICENSE, CLMS_CATALOG, PARENT, COLLECTION] + for link in links: + item.links.append(link) + + # assets + assets = {os.path.split(key)[-1][:-4].lower(): create_asset(key) for key in asset_keys} + for key, asset in assets.items(): + item.add_asset(key, asset) + return item + + +def get_stac_validator(product_schema): + with open(product_schema, encoding="utf-8") as f: + schema = json.load(f) + registry = Registry().with_resources( + [("http://example.com/schema.json", Resource.from_contents(schema))], + ) + return Draft7Validator({"$ref": "http://example.com/schema.json"}, registry=registry) + + +def main(): + product_list = create_product_list(2017, 2023) + + # Need a for loop for full implementation + page_iterator = create_page_iterator(AWS_SESSION, BUCKET, product_list[0]) + for page in page_iterator: + tiles = [prefix["Prefix"] for prefix in page["CommonPrefixes"]] + + # Need threading for full implementation + item = create_item(AWS_SESSION, BUCKET, tiles[0]) + item.set_self_href( + os.path.join(WORKING_DIR, f"stacs/vegetation-phenology-and-productivity/{item.id}/{item.id}.json") + ) + validator = get_stac_validator("schemas/products/vpp.json") + error = best_match(validator.iter_errors(item.to_dict())) + assert error is None, error + item.save_object() + break + + +if __name__ == "__main__": + main() diff --git a/stacs/clms_catalog.json b/stacs/clms_catalog.json index 95e157c..77e6fe5 100644 --- a/stacs/clms_catalog.json +++ b/stacs/clms_catalog.json @@ -12,7 +12,7 @@ }, { "rel": "self", - "href": "https://git.sinergise.com/sh-vas/etc-di-stac/stac/clms_catalog.json", + "href": "./clms_catalog.json", "type": "application/json" }, { From 343ef08ca14df4ec62628d9950adc376c2b35556 Mon Sep 17 00:00:00 2001 From: chorng Date: Thu, 25 Apr 2024 12:37:42 +0200 Subject: [PATCH 07/80] Finalise vpp item creation script * implement threading * fix schema * fix formatting * update vpp sample --- schema/products/vpp.json | 40 ++++++------- scripts/vpp/constants.py | 31 ++++++---- scripts/vpp/item.py | 56 +++++++++++++------ .../VPP_2022_S2_T40KCC-010m_V105_s2.json | 6 ++ 4 files changed, 86 insertions(+), 47 deletions(-) diff --git a/schema/products/vpp.json b/schema/products/vpp.json index 27bbf24..df5ee8b 100644 --- a/schema/products/vpp.json +++ b/schema/products/vpp.json @@ -45,42 +45,42 @@ }, { "items": [ - { "type": "number", "minimum": -55.23, "maximum": -50.89 }, - { "type": "number", "minimum": 1.69, "maximum": 6.34 }, - { "type": "number", "minimum": -55.23, "maximum": -50.89 }, - { "type": "number", "minimum": 1.69, "maximum": 6.34 } + { "type": "number", "minimum": -62.09, "maximum": -60.11 }, + { "type": "number", "minimum": 13.46, "maximum": 15.37 }, + { "type": "number", "minimum": -62.09, "maximum": -60.11 }, + { "type": "number", "minimum": 13.46, "maximum": 15.37 } ] }, { "items": [ - { "type": "number", "minimum": -62.09, "maximum": -60.08 }, - { "type": "number", "minimum": 15.26, "maximum": 17.19 }, - { "type": "number", "minimum": -62.09, "maximum": -60.08 }, - { "type": "number", "minimum": 15.26, "maximum": 17.19 } + { "type": "number", "minimum": 44.06, "maximum": 46.03 }, + { "type": "number", "minimum": -13.67, "maximum": -11.74 }, + { "type": "number", "minimum": 44.06, "maximum": 46.03 }, + { "type": "number", "minimum": -13.67, "maximum": -11.74 } ] }, { "items": [ - { "type": "number", "minimum": -62.1, "maximum": -60.1 }, - { "type": "number", "minimum": 13.45, "maximum": 15.38 }, - { "type": "number", "minimum": -62.1, "maximum": -60.1 }, - { "type": "number", "minimum": 13.45, "maximum": 15.38 } + { "type": "number", "minimum": 55.05, "maximum": 56.15 }, + { "type": "number", "minimum": -21.79, "maximum": -19.88 }, + { "type": "number", "minimum": 55.05, "maximum": 56.15 }, + { "type": "number", "minimum": -21.79, "maximum": -19.88 } ] }, { "items": [ - { "type": "number", "minimum": 44.05, "maximum": 46.04 }, - { "type": "number", "minimum": -13.68, "maximum": -11.73 }, - { "type": "number", "minimum": 44.05, "maximum": 46.04 }, - { "type": "number", "minimum": -13.68, "maximum": -11.73 } + { "type": "number", "minimum": -62.08, "maximum": -60.07 }, + { "type": "number", "minimum": 15.27, "maximum": 17.18 }, + { "type": "number", "minimum": -62.08, "maximum": -60.07 }, + { "type": "number", "minimum": 15.27, "maximum": 17.18 } ] }, { "items": [ - { "type": "number", "minimum": 55.04, "maximum": 56.16 }, - { "type": "number", "minimum": -21.81, "maximum": -19.87 }, - { "type": "number", "minimum": 55.04, "maximum": 56.16 }, - { "type": "number", "minimum": -21.81, "maximum": -19.87 } + { "type": "number", "minimum": -55.22, "maximum": -50.9 }, + { "type": "number", "minimum": 1.7, "maximum": 6.33 }, + { "type": "number", "minimum": -55.22, "maximum": -50.9 }, + { "type": "number", "minimum": 1.7, "maximum": 6.33 } ] } ] diff --git a/scripts/vpp/constants.py b/scripts/vpp/constants.py index 4649d55..2f7c612 100644 --- a/scripts/vpp/constants.py +++ b/scripts/vpp/constants.py @@ -1,8 +1,9 @@ -from pystac.provider import ProviderRole +import os from typing import Final + import pystac from pystac.link import Link -import os +from pystac.provider import ProviderRole WORKING_DIR = os.getcwd() VPP_HOST_AND_LICENSOR: Final[pystac.Provider] = pystac.Provider( @@ -23,13 +24,23 @@ roles=[ProviderRole.PROCESSOR, ProviderRole.PRODUCER], url="https://vito.be", ) -CLMS_LICENSE: Final[Link] = Link( - rel="license", - target="https://land.copernicus.eu/en/data-policy" -) +CLMS_LICENSE: Final[Link] = Link(rel="license", target="https://land.copernicus.eu/en/data-policy") CLMS_CATALOG: Final[Link] = Link( - rel=pystac.RelType.ROOT, - target=pystac.STACObject.from_file(os.path.join(WORKING_DIR, "stacs/clms_catalog.json")) + rel=pystac.RelType.ROOT, target=pystac.STACObject.from_file(os.path.join(WORKING_DIR, "stacs/clms_catalog.json")) +) +PARENT: Final[Link] = Link( + rel=pystac.RelType.PARENT, + target=pystac.STACObject.from_file( + os.path.join( + WORKING_DIR, "stacs/vegetation-phenology-and-productivity/vegetation-phenology-and-productivity.json" + ) + ), +) +COLLECTION: Final[Link] = Link( + rel=pystac.RelType.COLLECTION, + target=pystac.STACObject.from_file( + os.path.join( + WORKING_DIR, "stacs/vegetation-phenology-and-productivity/vegetation-phenology-and-productivity.json" + ) + ), ) -PARENT: Final[Link] = Link(rel=pystac.RelType.PARENT, target=pystac.STACObject.from_file(os.path.join(WORKING_DIR, "stacs/vegetation-phenology-and-productivity/vegetation-phenology-and-productivity.json"))) -COLLECTION: Final[Link] = Link(rel=pystac.RelType.COLLECTION, target=pystac.STACObject.from_file(os.path.join(WORKING_DIR, "stacs/vegetation-phenology-and-productivity/vegetation-phenology-and-productivity.json"))) diff --git a/scripts/vpp/item.py b/scripts/vpp/item.py index 8893a16..537e508 100644 --- a/scripts/vpp/item.py +++ b/scripts/vpp/item.py @@ -1,8 +1,11 @@ from __future__ import annotations import io +import itertools as it import json +import logging import os +from concurrent.futures import ThreadPoolExecutor from datetime import datetime import boto3 @@ -25,10 +28,11 @@ from rasterio.crs import CRS from referencing import Registry, Resource from shapely.geometry import Polygon, box, mapping +from tqdm import tqdm +LOGGER = logging.getLogger(__name__) AWS_SESSION = boto3.Session(profile_name="hrvpp") BUCKET = "HRVPP" -# KEY = "CLMS/Pan-European/Biophysical/VPP/v01/2023/s2/VPP_2023_S2_T40KCC-010m_V105_s2_TPROD.tif" TITLE_MAP = { "AMPL": "Season Amplitude", "EOSD": "Day of End-of-Season", @@ -117,7 +121,7 @@ def create_item(aws_session, bucket, tile): item = pystac.Item( id=product_id, geometry=mapping(geom_wgs84), - bbox=geom_wgs84.bounds, + bbox=list(geom_wgs84.bounds), datetime=None, start_datetime=start_datetime, end_datetime=end_datetime, @@ -153,24 +157,42 @@ def get_stac_validator(product_schema): return Draft7Validator({"$ref": "http://example.com/schema.json"}, registry=registry) +def create_vpp_item(aws_session, bucket, validator, tile): + item = create_item(aws_session, bucket, tile) + item.set_self_href( + os.path.join(WORKING_DIR, f"stacs/vegetation-phenology-and-productivity/{item.id}/{item.id}.json") + ) + error_msg = best_match(validator.iter_errors(item.to_dict())) + try: + assert error_msg is None, f"Failed to create {item.id} item. Reason: {error_msg}." + item.save_object() + except AssertionError as error: + LOGGER.error(error) + + def main(): + logging.basicConfig(filename="create_vpp_stac.log") + validator = get_stac_validator("schema/products/vpp.json") + product_list = create_product_list(2017, 2023) - # Need a for loop for full implementation - page_iterator = create_page_iterator(AWS_SESSION, BUCKET, product_list[0]) - for page in page_iterator: - tiles = [prefix["Prefix"] for prefix in page["CommonPrefixes"]] - - # Need threading for full implementation - item = create_item(AWS_SESSION, BUCKET, tiles[0]) - item.set_self_href( - os.path.join(WORKING_DIR, f"stacs/vegetation-phenology-and-productivity/{item.id}/{item.id}.json") - ) - validator = get_stac_validator("schemas/products/vpp.json") - error = best_match(validator.iter_errors(item.to_dict())) - assert error is None, error - item.save_object() - break + # remove [:1] for full implementation + for product in product_list[:1]: + page_iterator = create_page_iterator(AWS_SESSION, BUCKET, product) + for page in page_iterator: + tiles = [prefix["Prefix"] for prefix in page["CommonPrefixes"]] + with ThreadPoolExecutor(max_workers=100) as executor: + list( + tqdm( + executor.map( + create_vpp_item, it.repeat(AWS_SESSION), it.repeat(BUCKET), it.repeat(validator), tiles + ), + total=len(tiles), + ) + ) + + # remove break for full implementation + break if __name__ == "__main__": diff --git a/stacs/vegetation-phenology-and-productivity/VPP_2022_S2_T40KCC-010m_V105_s2/VPP_2022_S2_T40KCC-010m_V105_s2.json b/stacs/vegetation-phenology-and-productivity/VPP_2022_S2_T40KCC-010m_V105_s2/VPP_2022_S2_T40KCC-010m_V105_s2.json index b7c28e2..5b89c1a 100644 --- a/stacs/vegetation-phenology-and-productivity/VPP_2022_S2_T40KCC-010m_V105_s2/VPP_2022_S2_T40KCC-010m_V105_s2.json +++ b/stacs/vegetation-phenology-and-productivity/VPP_2022_S2_T40KCC-010m_V105_s2/VPP_2022_S2_T40KCC-010m_V105_s2.json @@ -30,6 +30,12 @@ "description": "The Copernicus Land Monitoring Service provides geographical information on land cover and its changes, land use, ground motions, vegetation state, water cycle and Earth's surface energy variables to a broad range of users in Europe and across the World in the field of environmental terrestrial applications.", "roles": ["licensor", "host"], "url": "https://land.copernicus.eu" + }, + { + "name": "VITO NV", + "description": "VITO is an independent Flemish research organisation in the area of cleantech and sustainable development.", + "roles": ["processor", "producer"], + "url": "https://vito.be" } ], "proj:epsg": 32740, From 62f5cea6be329c19dba34baa5a7e76e625a43ae2 Mon Sep 17 00:00:00 2001 From: chorng Date: Thu, 25 Apr 2024 19:05:10 +0200 Subject: [PATCH 08/80] Add collection creation script * add collection script * update constants * update item script * remove projection extension from collection sample * update sample item update vpp schema --- schema/products/vpp.json | 13 ++- scripts/vpp/collection.py | 57 +++++++++++ scripts/vpp/constants.py | 98 +++++++++++++++---- scripts/vpp/item.py | 57 ++++------- .../VPP_2022_S2_T40KCC-010m_s2.json} | 37 ++++--- ...vegetation-phenology-and-productivity.json | 13 ++- 6 files changed, 195 insertions(+), 80 deletions(-) create mode 100644 scripts/vpp/collection.py rename stacs/vegetation-phenology-and-productivity/{VPP_2022_S2_T40KCC-010m_V105_s2/VPP_2022_S2_T40KCC-010m_V105_s2.json => VPP_2022_S2_T40KCC-010m_s2/VPP_2022_S2_T40KCC-010m_s2.json} (86%) diff --git a/schema/products/vpp.json b/schema/products/vpp.json index df5ee8b..3b4eba6 100644 --- a/schema/products/vpp.json +++ b/schema/products/vpp.json @@ -227,8 +227,7 @@ "stac_version": { "const": "1.0.0" }, "stac_extensions": { "const": [ - "https://stac-extensions.github.io/item-assets/v1.0.0/schema.json", - "https://stac-extensions.github.io/projection/v1.1.0/schema.json" + "https://stac-extensions.github.io/item-assets/v1.0.0/schema.json" ] }, "type": { "const": "Collection" }, @@ -257,7 +256,13 @@ "name": "Copernicus Land Monitoring Service", "description": "The Copernicus Land Monitoring Service provides geographical information on land cover and its changes, land use, ground motions, vegetation state, water cycle and Earth's surface energy variables to a broad range of users in Europe and across the World in the field of environmental terrestrial applications.", "roles": ["licensor", "host"], - "url": "https://land.copernicus.eu/en" + "url": "https://land.copernicus.eu" + }, + { + "name": "VITO NV", + "description": "VITO is an independent Flemish research organisation in the area of cleantech and sustainable development.", + "roles": ["processor", "producer"], + "url": "https://vito.be" } ] }, @@ -267,7 +272,7 @@ "bbox": [[-25, 26, 45, 72]] }, "temporal": { - "interval": [["2017-01-01T00:00:00.000Z", null]] + "interval": [["2017-01-01T00:00:00Z", null]] } } }, diff --git a/scripts/vpp/collection.py b/scripts/vpp/collection.py new file mode 100644 index 0000000..7d49938 --- /dev/null +++ b/scripts/vpp/collection.py @@ -0,0 +1,57 @@ +import os +import pystac +import json +from pystac.extensions.item_assets import ItemAssetsExtension, AssetDefinition +from glob import glob +from constants import COLLECTION_DESCRIPTION, COLLECTION_ID, COLLECTION_EXTENT, COLLECTION_TITLE, WORKING_DIR, COLLECTION_KEYWORDS, VPP_HOST_AND_LICENSOR, VPP_PRODUCER_AND_PROCESSOR, COLLECTION_SUMMARIES, TITLE_MAP, CLMS_LICENSE, STAC_DIR +from jsonschema import Draft7Validator +from jsonschema.exceptions import best_match +from referencing import Registry, Resource + +def get_stac_validator(product_schema: str) -> Draft7Validator: + with open(product_schema, encoding="utf-8") as f: + schema = json.load(f) + registry = Registry().with_resources( + [("http://example.com/schema.json", Resource.from_contents(schema))], + ) + return Draft7Validator({"$ref": "http://example.com/schema.json"}, registry=registry) + + +def create_collection(): + collection = pystac.Collection( + id=COLLECTION_ID, + description=COLLECTION_DESCRIPTION, + extent=COLLECTION_EXTENT, + title=COLLECTION_TITLE, + keywords=COLLECTION_KEYWORDS, + providers=[VPP_HOST_AND_LICENSOR, VPP_PRODUCER_AND_PROCESSOR], + summaries=COLLECTION_SUMMARIES, + ) + + # extensions + item_assets = ItemAssetsExtension.ext(collection, add_if_missing=True) + item_assets.item_assets = { + key: AssetDefinition({"title": TITLE_MAP[key], "media_type": pystac.MediaType.GEOTIFF, "roles": ["data"]}) + for key in TITLE_MAP + } + + # links + collection.links.append(CLMS_LICENSE) + + # add items + items = glob(f"{WORKING_DIR}/{STAC_DIR}/{COLLECTION_ID}/**/VPP*.json") + for item in items: + stac_object = pystac.read_file(item) + collection.add_item(stac_object, title=stac_object.id) + + collection.set_self_href(os.path.join(WORKING_DIR, f"{STAC_DIR}/{collection.id}/{collection.id}.json")) + catalog = pystac.read_file(f"{WORKING_DIR}/{STAC_DIR}/clms_catalog.json") + collection.set_root(catalog) + collection.set_parent(catalog) + validator = get_stac_validator("schema/products/vpp.json") + error_msg = best_match(validator.iter_errors(collection.to_dict())) + assert error_msg is None, f"Failed to create {collection.id} collection. Reason: {error_msg}." + collection.save_object() + +if __name__ == "__main__": + create_collection() diff --git a/scripts/vpp/constants.py b/scripts/vpp/constants.py index 2f7c612..7769e54 100644 --- a/scripts/vpp/constants.py +++ b/scripts/vpp/constants.py @@ -1,11 +1,83 @@ import os +from datetime import datetime from typing import Final +import boto3 import pystac from pystac.link import Link from pystac.provider import ProviderRole -WORKING_DIR = os.getcwd() +AWS_SESSION = boto3.Session(profile_name="hrvpp") +BUCKET = "HRVPP" +CLMS_LICENSE: Final[Link] = Link(rel="license", target="https://land.copernicus.eu/en/data-policy") +COLLECTION_DESCRIPTION = ( + "Vegetation Phenology and Productivity Parameters (VPP) product is part of the Copernicus Land Monitoring Service" + " (CLMS), pan-European High Resolution Vegetation Phenology and Productivity (HR-VPP) product suite. The VPP" + " product is comprised of 13 parameters that describe specific stages of the seasonal vegetation growth cycle." + " These parameters are extracted from Seasonal Trajectories of the Plant Phenology Index (PPI) derived from" + " Sentinel-2 satellite observations at 10m resolution. Since growing seasons can traverse years, VPP parameters are" + " provided for a maximum of two growing seasons per year. The parameters include (1) start of season (date, PPI" + " value and slope), (2) end of season (date, PPI value and slope), (3)length of season, (4) minimum of season, (4)" + " peak of the season (date and PPI value), (5) amplitude, (6) small integrated value and (7) large integrated" + " value." +) +COLLECTION_EXTENT = pystac.Extent( + spatial=pystac.SpatialExtent([[-25, 26, 45, 72]]), + temporal=pystac.TemporalExtent([[datetime(year=2017, month=1, day=1), None]]), +) +COLLECTION_ID = "vegetation-phenology-and-productivity" +COLLECTION_KEYWORDS = [ + "agriculture", + "clms", + "derived data", + "open data", + "phenology", + "plant phenology index", + "vegetation", +] +COLLECTION_SUMMARIES = pystac.Summaries( + { + "proj:epsg": [ + 32620, + 32621, + 32622, + 32625, + 32626, + 32627, + 32628, + 32629, + 32630, + 32631, + 32632, + 32633, + 32634, + 32635, + 32636, + 32637, + 32638, + 32738, + 32740, + ] + } +) +COLLECTION_TITLE = "Vegetation Phenology and Productivity Parameters" +STAC_DIR = "stac_tests" +TITLE_MAP = { + "AMPL": "Season Amplitude", + "EOSD": "Day of End-of-Season", + "EOSV": "Vegetation Index Value at EOSD", + "LENGTH": "Length of Season", + "LSLOPE": "Slope of The Greening Up Period", + "MAXD": "Day of Maximum-of-Season", + "MAXV": "Vegetation Index Value at MAXD", + "MINV": "Average Vegetation Index Value of Minima on Left and Right Sides of Each Season", + "QFLAG": "Quality Flag", + "RSLOPE": "Slope of The Senescent Period", + "SOSD": "Day of Start-of-Season", + "SOSV": "Vegetation Index Value at SOSD", + "SPROD": "Seasonal Productivity", + "TPROD": "Total Productivity", +} VPP_HOST_AND_LICENSOR: Final[pystac.Provider] = pystac.Provider( name="Copernicus Land Monitoring Service", description=( @@ -24,23 +96,15 @@ roles=[ProviderRole.PROCESSOR, ProviderRole.PRODUCER], url="https://vito.be", ) -CLMS_LICENSE: Final[Link] = Link(rel="license", target="https://land.copernicus.eu/en/data-policy") -CLMS_CATALOG: Final[Link] = Link( +WORKING_DIR = os.getcwd() +CLMS_CATALOG_LINK: Final[Link] = Link( rel=pystac.RelType.ROOT, target=pystac.STACObject.from_file(os.path.join(WORKING_DIR, "stacs/clms_catalog.json")) ) -PARENT: Final[Link] = Link( - rel=pystac.RelType.PARENT, - target=pystac.STACObject.from_file( - os.path.join( - WORKING_DIR, "stacs/vegetation-phenology-and-productivity/vegetation-phenology-and-productivity.json" - ) - ), -) -COLLECTION: Final[Link] = Link( +COLLECTION_LINK: Final[Link] = Link( rel=pystac.RelType.COLLECTION, - target=pystac.STACObject.from_file( - os.path.join( - WORKING_DIR, "stacs/vegetation-phenology-and-productivity/vegetation-phenology-and-productivity.json" - ) - ), + target=pystac.STACObject.from_file(os.path.join(WORKING_DIR, f"stacs/{COLLECTION_ID}/{COLLECTION_ID}.json")), +) +ITEM_PARENT_LINK: Final[Link] = Link( + rel=pystac.RelType.PARENT, + target=pystac.STACObject.from_file(os.path.join(WORKING_DIR, f"stacs/{COLLECTION_ID}/{COLLECTION_ID}.json")), ) diff --git a/scripts/vpp/item.py b/scripts/vpp/item.py index 537e508..e6d2821 100644 --- a/scripts/vpp/item.py +++ b/scripts/vpp/item.py @@ -12,10 +12,15 @@ import pystac import rasterio as rio from constants import ( - CLMS_CATALOG, + AWS_SESSION, + BUCKET, + CLMS_CATALOG_LINK, CLMS_LICENSE, - COLLECTION, - PARENT, + COLLECTION_ID, + COLLECTION_LINK, + ITEM_PARENT_LINK, + STAC_DIR, + TITLE_MAP, VPP_HOST_AND_LICENSOR, VPP_PRODUCER_AND_PROCESSOR, WORKING_DIR, @@ -31,27 +36,9 @@ from tqdm import tqdm LOGGER = logging.getLogger(__name__) -AWS_SESSION = boto3.Session(profile_name="hrvpp") -BUCKET = "HRVPP" -TITLE_MAP = { - "AMPL": "Season Amplitude", - "EOSD": "Day of End-of-Season", - "EOSV": "Vegetation Index Value at EOSD", - "LENGTH": "Length of Season", - "LSLOPE": "Slope of The Greening Up Period", - "MAXD": "Day of Maximum-of-Season", - "MAXV": "Vegetation Index Value at MAXD", - "MINV": "Average Vegetation Index Value of Minima on Left and Right Sides of Each Season", - "QFLAG": "Quality Flag", - "RSLOPE": "Slope of The Senescent Period", - "SOSD": "Day of Start-of-Season", - "SOSV": "Vegetation Index Value at SOSD", - "SPROD": "Seasonal Productivity", - "TPROD": "Total Productivity", -} - - -def create_product_list(start_year, end_year): + + +def create_product_list(start_year: int, end_year: int) -> list[str]: product_list = [] for year in range(start_year, end_year + 1): for season in ("s1", "s2"): @@ -59,10 +46,10 @@ def create_product_list(start_year, end_year): return product_list -def create_page_iterator(aws_session, bucket, prefix): +def create_page_iterator(aws_session: boto3.Session, bucket: str, prefix: str): client = aws_session.client("s3") paginator = client.get_paginator("list_objects_v2") - return paginator.paginate(Bucket=bucket, Prefix=prefix, Delimiter="-") + return paginator.paginate(Bucket=bucket, Prefix=prefix, Delimiter="-", MaxKeys=10) def read_metadata_from_s3(bucket: str, key: str, aws_session: boto3.Session) -> tuple[BoundingBox, CRS, int, int]: @@ -99,14 +86,14 @@ def create_asset(asset_key: str) -> pystac.Asset: parameter = asset_key.split("_")[-1].split(".")[0] version = asset_key.split("_")[-3] return pystac.Asset( - href="s3://HRVPP/" + asset_key, + href=f"s3://{BUCKET}/" + asset_key, media_type=pystac.MediaType.GEOTIFF, title=TITLE_MAP[parameter] + f" {version}", roles=["data"], ) -def create_item(aws_session, bucket, tile): +def create_item(aws_session: boto3.Session, bucket: str, tile: str) -> pystac.Item: client = aws_session.client("s3") parameters = client.list_objects(Bucket=bucket, Prefix=tile, Delimiter=".")["CommonPrefixes"] asset_keys = [parameter["Prefix"] + "tif" for parameter in parameters] @@ -126,7 +113,7 @@ def create_item(aws_session, bucket, tile): start_datetime=start_datetime, end_datetime=end_datetime, properties={"created": created.strftime("%Y-%m-%dT%H:%M:%SZ"), "description": description}, - collection="vegetation-phenology-and-productivity", + collection=COLLECTION_ID, ) item.common_metadata.providers = [VPP_HOST_AND_LICENSOR, VPP_PRODUCER_AND_PROCESSOR] @@ -137,7 +124,7 @@ def create_item(aws_session, bucket, tile): projection.shape = [height, width] # links - links = [CLMS_LICENSE, CLMS_CATALOG, PARENT, COLLECTION] + links = [CLMS_LICENSE, CLMS_CATALOG_LINK, ITEM_PARENT_LINK, COLLECTION_LINK] for link in links: item.links.append(link) @@ -148,7 +135,7 @@ def create_item(aws_session, bucket, tile): return item -def get_stac_validator(product_schema): +def get_stac_validator(product_schema: str) -> Draft7Validator: with open(product_schema, encoding="utf-8") as f: schema = json.load(f) registry = Registry().with_resources( @@ -157,11 +144,9 @@ def get_stac_validator(product_schema): return Draft7Validator({"$ref": "http://example.com/schema.json"}, registry=registry) -def create_vpp_item(aws_session, bucket, validator, tile): +def create_vpp_item(aws_session: boto3.Session, bucket: str, validator: Draft7Validator, tile: str) -> None: item = create_item(aws_session, bucket, tile) - item.set_self_href( - os.path.join(WORKING_DIR, f"stacs/vegetation-phenology-and-productivity/{item.id}/{item.id}.json") - ) + item.set_self_href(os.path.join(WORKING_DIR, f"{STAC_DIR}/{COLLECTION_ID}/{item.id}/{item.id}.json")) error_msg = best_match(validator.iter_errors(item.to_dict())) try: assert error_msg is None, f"Failed to create {item.id} item. Reason: {error_msg}." @@ -181,7 +166,7 @@ def main(): page_iterator = create_page_iterator(AWS_SESSION, BUCKET, product) for page in page_iterator: tiles = [prefix["Prefix"] for prefix in page["CommonPrefixes"]] - with ThreadPoolExecutor(max_workers=100) as executor: + with ThreadPoolExecutor(max_workers=10) as executor: list( tqdm( executor.map( diff --git a/stacs/vegetation-phenology-and-productivity/VPP_2022_S2_T40KCC-010m_V105_s2/VPP_2022_S2_T40KCC-010m_V105_s2.json b/stacs/vegetation-phenology-and-productivity/VPP_2022_S2_T40KCC-010m_s2/VPP_2022_S2_T40KCC-010m_s2.json similarity index 86% rename from stacs/vegetation-phenology-and-productivity/VPP_2022_S2_T40KCC-010m_V105_s2/VPP_2022_S2_T40KCC-010m_V105_s2.json rename to stacs/vegetation-phenology-and-productivity/VPP_2022_S2_T40KCC-010m_s2/VPP_2022_S2_T40KCC-010m_s2.json index 5b89c1a..5ea6c23 100644 --- a/stacs/vegetation-phenology-and-productivity/VPP_2022_S2_T40KCC-010m_V105_s2/VPP_2022_S2_T40KCC-010m_V105_s2.json +++ b/stacs/vegetation-phenology-and-productivity/VPP_2022_S2_T40KCC-010m_s2/VPP_2022_S2_T40KCC-010m_s2.json @@ -4,7 +4,7 @@ "https://stac-extensions.github.io/projection/v1.1.0/schema.json" ], "type": "Feature", - "id": "VPP_2022_S2_T40KCC-010m_V105_s2", + "id": "VPP_2022_S2_T40KCC-010m_s2", "bbox": [55.077444, -20.877262, 56.13278, -19.8938], "geometry": { "type": "Polygon", @@ -19,7 +19,7 @@ ] }, "properties": { - "description": "2022 Season 2 Vegetation Phenology and Productivity Parameters (VPP) product of Sentinel-2 tile T40KCC.", + "description": "The 2022 season 2 VPP product of tile T40KCC at 10 m resolution.", "datetime": null, "start_datetime": "2022-01-01T00:00:00.000Z", "end_datetime": "2022-12-31T00:00:00.000Z", @@ -50,9 +50,8 @@ }, { "rel": "self", - "href": "./VPP_2022_S2_T40KCC-010m_V105_s2_TPROD/VPP_2022_S2_T40KCC-010m_V105_s2.json", - "type": "application/json", - "title": "Vegetation Phenology and Productivity Parameters 2022 Season 2 Tile T40KCC" + "href": "./VPP_2022_S2_T40KCC-010m_s2_TPROD/VPP_2022_S2_T40KCC-010m_s2.json", + "type": "application/json" }, { "rel": "root", @@ -77,85 +76,85 @@ "vpp_2022_s2_t40kcc-010m_v105_s2_ampl": { "href": "../../../data/vegetation-phenology-and-productivity/vpp_2022/VPP_2022_S2_T40KCC-010m_V105_s2_AMPL.tif", "type": "image/tiff; application=geotiff; profile=cloud-optimized", - "title": "Season Amplitude", + "title": "Season Amplitude V105", "roles": ["data"] }, "vpp_2022_s2_t40kcc-010m_v105_s2_eosd": { "href": "../../../data/vegetation-phenology-and-productivity/vpp_2022/VPP_2022_S2_T40KCC-010m_V105_s2_EOSD.tif", "type": "image/tiff; application=geotiff; profile=cloud-optimized", - "title": "Day of End-of-Season", + "title": "Day of End-of-Season V105", "roles": ["data"] }, "vpp_2022_s2_t40kcc-010m_v105_s2_eosv": { "href": "../../../data/vegetation-phenology-and-productivity/vpp_2022/VPP_2022_S2_T40KCC-010m_V105_s2_EOSV.tif", "type": "image/tiff; application=geotiff; profile=cloud-optimized", - "title": "Vegetation Index Value at EOSD", + "title": "Vegetation Index Value at EOSD V105", "roles": ["data"] }, "vpp_2022_s2_t40kcc-010m_v105_s2_length": { "href": "../../../data/vegetation-phenology-and-productivity/vpp_2022/VPP_2022_S2_T40KCC-010m_V105_s2_LENGTH.tif", "type": "image/tiff; application=geotiff; profile=cloud-optimized", - "title": "Length of Season", + "title": "Length of Season V105", "roles": ["data"] }, "vpp_2022_s2_t40kcc-010m_v105_s2_lslope": { "href": "../../../data/vegetation-phenology-and-productivity/vpp_2022/VPP_2022_S2_T40KCC-010m_V105_s2_LSLOPE.tif", "type": "image/tiff; application=geotiff; profile=cloud-optimized", - "title": "Slope of The Greening Up Period", + "title": "Slope of The Greening Up Period V105", "roles": ["data"] }, "vpp_2022_s2_t40kcc-010m_v105_s2_maxd": { "href": "../../../data/vegetation-phenology-and-productivity/vpp_2022/VPP_2022_S2_T40KCC-010m_V105_s2_MAXD.tif", "type": "image/tiff; application=geotiff; profile=cloud-optimized", - "title": "Day of Maximum-of-Season", + "title": "Day of Maximum-of-Season V105", "roles": ["data"] }, "vpp_2022_s2_t40kcc-010m_v105_s2_maxv": { "href": "../../../data/vegetation-phenology-and-productivity/vpp_2022/VPP_2022_S2_T40KCC-010m_V105_s2_MAXV.tif", "type": "image/tiff; application=geotiff; profile=cloud-optimized", - "title": "Vegetation Index Value at MAXD", + "title": "Vegetation Index Value at MAXD V105", "roles": ["data"] }, "vpp_2022_s2_t40kcc-010m_v105_s2_minv": { "href": "../../../data/vegetation-phenology-and-productivity/vpp_2022/VPP_2022_S2_T40KCC-010m_V105_s2_MINV.tif", "type": "image/tiff; application=geotiff; profile=cloud-optimized", - "title": "Average Vegetation Index Value of Minima on Left and Right Sides of Each Season", + "title": "Average Vegetation Index Value of Minima on Left and Right Sides of Each Season V105", "roles": ["data"] }, "vpp_2022_s2_t40kcc-010m_v105_s2_qflag": { "href": "../../../data/vegetation-phenology-and-productivity/vpp_2022/VPP_2022_S2_T40KCC-010m_V105_s2_QFLAG.tif", "type": "image/tiff; application=geotiff; profile=cloud-optimized", - "title": "Quality Flag", + "title": "Quality Flag V105", "roles": ["data"] }, "vpp_2022_s2_t40kcc-010m_v105_s2_rslope": { "href": "../../../data/vegetation-phenology-and-productivity/vpp_2022/VPP_2022_S2_T40KCC-010m_V105_s2_RSLOPE.tif", "type": "image/tiff; application=geotiff; profile=cloud-optimized", - "title": "Slope of The Senescent Period", + "title": "Slope of The Senescent Period V105", "roles": ["data"] }, "vpp_2022_s2_t40kcc-010m_v105_s2_sosd": { "href": "../../../data/vegetation-phenology-and-productivity/vpp_2022/VPP_2022_S2_T40KCC-010m_V105_s2_SOSD.tif", "type": "image/tiff; application=geotiff; profile=cloud-optimized", - "title": "Day of Start-of-Season", + "title": "Day of Start-of-Season V105", "roles": ["data"] }, "vpp_2022_s2_t40kcc-010m_v105_s2_sosv": { "href": "../../../data/vegetation-phenology-and-productivity/vpp_2022/VPP_2022_S2_T40KCC-010m_V105_s2_SOSV.tif", "type": "image/tiff; application=geotiff; profile=cloud-optimized", - "title": "Vegetation Index Value at SOSD", + "title": "Vegetation Index Value at SOSD V105", "roles": ["data"] }, "vpp_2022_s2_t40kcc-010m_v105_s2_sprod": { "href": "../../../data/vegetation-phenology-and-productivity/vpp_2022/VPP_2022_S2_T40KCC-010m_V105_s2_SPROD.tif", "type": "image/tiff; application=geotiff; profile=cloud-optimized", - "title": "Seasonal Productivity", + "title": "Seasonal Productivity V105", "roles": ["data"] }, "vpp_2022_s2_t40kcc-010m_v105_s2_tprod": { "href": "../../../data/vegetation-phenology-and-productivity/vpp_2022/VPP_2022_S2_T40KCC-010m_V105_s2_TPROD.tif", "type": "image/tiff; application=geotiff; profile=cloud-optimized", - "title": "Total Productivity", + "title": "Total Productivity V105", "roles": ["data"] } } diff --git a/stacs/vegetation-phenology-and-productivity/vegetation-phenology-and-productivity.json b/stacs/vegetation-phenology-and-productivity/vegetation-phenology-and-productivity.json index ee9de8d..bd79ea9 100644 --- a/stacs/vegetation-phenology-and-productivity/vegetation-phenology-and-productivity.json +++ b/stacs/vegetation-phenology-and-productivity/vegetation-phenology-and-productivity.json @@ -2,8 +2,7 @@ "type": "Collection", "stac_version": "1.0.0", "stac_extensions": [ - "https://stac-extensions.github.io/item-assets/v1.0.0/schema.json", - "https://stac-extensions.github.io/projection/v1.1.0/schema.json" + "https://stac-extensions.github.io/item-assets/v1.0.0/schema.json" ], "id": "vegetation-phenology-and-productivity", "title": "Vegetation Phenology and Productivity Parameters", @@ -23,7 +22,13 @@ "name": "Copernicus Land Monitoring Service", "description": "The Copernicus Land Monitoring Service provides geographical information on land cover and its changes, land use, ground motions, vegetation state, water cycle and Earth's surface energy variables to a broad range of users in Europe and across the World in the field of environmental terrestrial applications.", "roles": ["licensor", "host"], - "url": "https://land.copernicus.eu/en" + "url": "https://land.copernicus.eu" + }, + { + "name": "VITO NV", + "description": "VITO is an independent Flemish research organisation in the area of cleantech and sustainable development.", + "roles": ["processor", "producer"], + "url": "https://vito.be" } ], "extent": { @@ -31,7 +36,7 @@ "bbox": [[-25, 26, 45, 72]] }, "temporal": { - "interval": [["2017-01-01T00:00:00.000Z", null]] + "interval": [["2017-01-01T00:00:00Z", null]] } }, "item_assets": { From b20511b944961c3868ca8e19cbbe7fca2d67e22e Mon Sep 17 00:00:00 2001 From: chorng Date: Thu, 25 Apr 2024 19:07:50 +0200 Subject: [PATCH 09/80] Fix formatting --- scripts/vpp/collection.py | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/scripts/vpp/collection.py b/scripts/vpp/collection.py index 7d49938..018020e 100644 --- a/scripts/vpp/collection.py +++ b/scripts/vpp/collection.py @@ -1,13 +1,28 @@ -import os -import pystac import json -from pystac.extensions.item_assets import ItemAssetsExtension, AssetDefinition +import os from glob import glob -from constants import COLLECTION_DESCRIPTION, COLLECTION_ID, COLLECTION_EXTENT, COLLECTION_TITLE, WORKING_DIR, COLLECTION_KEYWORDS, VPP_HOST_AND_LICENSOR, VPP_PRODUCER_AND_PROCESSOR, COLLECTION_SUMMARIES, TITLE_MAP, CLMS_LICENSE, STAC_DIR + +import pystac +from constants import ( + CLMS_LICENSE, + COLLECTION_DESCRIPTION, + COLLECTION_EXTENT, + COLLECTION_ID, + COLLECTION_KEYWORDS, + COLLECTION_SUMMARIES, + COLLECTION_TITLE, + STAC_DIR, + TITLE_MAP, + VPP_HOST_AND_LICENSOR, + VPP_PRODUCER_AND_PROCESSOR, + WORKING_DIR, +) from jsonschema import Draft7Validator from jsonschema.exceptions import best_match +from pystac.extensions.item_assets import AssetDefinition, ItemAssetsExtension from referencing import Registry, Resource + def get_stac_validator(product_schema: str) -> Draft7Validator: with open(product_schema, encoding="utf-8") as f: schema = json.load(f) @@ -53,5 +68,6 @@ def create_collection(): assert error_msg is None, f"Failed to create {collection.id} collection. Reason: {error_msg}." collection.save_object() + if __name__ == "__main__": create_collection() From 8e6cb85008a1b41bbc59bff0c7f15d1807c5c8d4 Mon Sep 17 00:00:00 2001 From: chorng Date: Fri, 26 Apr 2024 09:55:16 +0200 Subject: [PATCH 10/80] Restructure vpp stac creation scripts --- create_vpp_collection.py | 9 +++++++ create_vpp_items.py | 38 +++++++++++++++++++++++++++ scripts/__init__.py | 0 scripts/vpp/__init__.py | 0 scripts/vpp/collection.py | 31 +++++++++++++--------- scripts/vpp/item.py | 55 +++++++++------------------------------ 6 files changed, 77 insertions(+), 56 deletions(-) create mode 100644 create_vpp_collection.py create mode 100644 create_vpp_items.py create mode 100644 scripts/__init__.py create mode 100644 scripts/vpp/__init__.py diff --git a/create_vpp_collection.py b/create_vpp_collection.py new file mode 100644 index 0000000..28779d3 --- /dev/null +++ b/create_vpp_collection.py @@ -0,0 +1,9 @@ +import logging + +from scripts.vpp.collection import create_collection +from scripts.vpp.constants import COLLECTION_ID, STAC_DIR, WORKING_DIR + +LOGGER = logging.getLogger(__name__) +if __name__ == "__main__": + logging.basicConfig(filename="create_vpp_collection.log") + create_collection(f"{WORKING_DIR}/{STAC_DIR}/{COLLECTION_ID}/**/VPP*.json") diff --git a/create_vpp_items.py b/create_vpp_items.py new file mode 100644 index 0000000..d459a1f --- /dev/null +++ b/create_vpp_items.py @@ -0,0 +1,38 @@ +import itertools as it +import logging +from concurrent.futures import ThreadPoolExecutor + +from tqdm import tqdm + +from scripts.vpp.constants import AWS_SESSION, BUCKET +from scripts.vpp.item import create_page_iterator, create_product_list, create_vpp_item, get_stac_validator + +LOGGER = logging.getLogger(__name__) + + +def main(): + logging.basicConfig(filename="create_vpp_items.log") + validator = get_stac_validator("schema/products/vpp.json") + product_list = create_product_list(2017, 2023) + + # remove [:1] for full implementation + for product in product_list[:1]: + page_iterator = create_page_iterator(AWS_SESSION, BUCKET, product) + for page in page_iterator: + tiles = [prefix["Prefix"] for prefix in page["CommonPrefixes"]] + with ThreadPoolExecutor(max_workers=10) as executor: + list( + tqdm( + executor.map( + create_vpp_item, it.repeat(AWS_SESSION), it.repeat(BUCKET), it.repeat(validator), tiles + ), + total=len(tiles), + ) + ) + + # remove break for full implementation + break + + +if __name__ == "__main__": + main() diff --git a/scripts/__init__.py b/scripts/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/scripts/vpp/__init__.py b/scripts/vpp/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/scripts/vpp/collection.py b/scripts/vpp/collection.py index 018020e..104857d 100644 --- a/scripts/vpp/collection.py +++ b/scripts/vpp/collection.py @@ -1,9 +1,17 @@ +from __future__ import annotations + import json +import logging import os from glob import glob import pystac -from constants import ( +from jsonschema import Draft7Validator +from jsonschema.exceptions import best_match +from pystac.extensions.item_assets import AssetDefinition, ItemAssetsExtension +from referencing import Registry, Resource + +from .constants import ( CLMS_LICENSE, COLLECTION_DESCRIPTION, COLLECTION_EXTENT, @@ -17,10 +25,8 @@ VPP_PRODUCER_AND_PROCESSOR, WORKING_DIR, ) -from jsonschema import Draft7Validator -from jsonschema.exceptions import best_match -from pystac.extensions.item_assets import AssetDefinition, ItemAssetsExtension -from referencing import Registry, Resource + +LOGGER = logging.getLogger(__name__) def get_stac_validator(product_schema: str) -> Draft7Validator: @@ -32,7 +38,7 @@ def get_stac_validator(product_schema: str) -> Draft7Validator: return Draft7Validator({"$ref": "http://example.com/schema.json"}, registry=registry) -def create_collection(): +def create_collection(item_list: list[str]) -> pystac.Collection: collection = pystac.Collection( id=COLLECTION_ID, description=COLLECTION_DESCRIPTION, @@ -54,7 +60,7 @@ def create_collection(): collection.links.append(CLMS_LICENSE) # add items - items = glob(f"{WORKING_DIR}/{STAC_DIR}/{COLLECTION_ID}/**/VPP*.json") + items = glob(item_list) for item in items: stac_object = pystac.read_file(item) collection.add_item(stac_object, title=stac_object.id) @@ -64,10 +70,9 @@ def create_collection(): collection.set_root(catalog) collection.set_parent(catalog) validator = get_stac_validator("schema/products/vpp.json") - error_msg = best_match(validator.iter_errors(collection.to_dict())) - assert error_msg is None, f"Failed to create {collection.id} collection. Reason: {error_msg}." + try: + error_msg = best_match(validator.iter_errors(collection.to_dict())) + assert error_msg is None, f"Failed to create {collection.id} collection. Reason: {error_msg}." + except AssertionError as error: + LOGGER.error(error) collection.save_object() - - -if __name__ == "__main__": - create_collection() diff --git a/scripts/vpp/item.py b/scripts/vpp/item.py index e6d2821..9416e1f 100644 --- a/scripts/vpp/item.py +++ b/scripts/vpp/item.py @@ -1,18 +1,25 @@ from __future__ import annotations import io -import itertools as it import json import logging import os -from concurrent.futures import ThreadPoolExecutor from datetime import datetime import boto3 import pystac import rasterio as rio -from constants import ( - AWS_SESSION, +from botocore.paginate import PageIterator +from jsonschema import Draft7Validator +from jsonschema.exceptions import best_match +from pyproj import Transformer +from pystac.extensions.projection import ProjectionExtension +from rasterio.coords import BoundingBox +from rasterio.crs import CRS +from referencing import Registry, Resource +from shapely.geometry import Polygon, box, mapping + +from .constants import ( BUCKET, CLMS_CATALOG_LINK, CLMS_LICENSE, @@ -25,15 +32,6 @@ VPP_PRODUCER_AND_PROCESSOR, WORKING_DIR, ) -from jsonschema import Draft7Validator -from jsonschema.exceptions import best_match -from pyproj import Transformer -from pystac.extensions.projection import ProjectionExtension -from rasterio.coords import BoundingBox -from rasterio.crs import CRS -from referencing import Registry, Resource -from shapely.geometry import Polygon, box, mapping -from tqdm import tqdm LOGGER = logging.getLogger(__name__) @@ -46,7 +44,7 @@ def create_product_list(start_year: int, end_year: int) -> list[str]: return product_list -def create_page_iterator(aws_session: boto3.Session, bucket: str, prefix: str): +def create_page_iterator(aws_session: boto3.Session, bucket: str, prefix: str) -> PageIterator: client = aws_session.client("s3") paginator = client.get_paginator("list_objects_v2") return paginator.paginate(Bucket=bucket, Prefix=prefix, Delimiter="-", MaxKeys=10) @@ -153,32 +151,3 @@ def create_vpp_item(aws_session: boto3.Session, bucket: str, validator: Draft7Va item.save_object() except AssertionError as error: LOGGER.error(error) - - -def main(): - logging.basicConfig(filename="create_vpp_stac.log") - validator = get_stac_validator("schema/products/vpp.json") - - product_list = create_product_list(2017, 2023) - - # remove [:1] for full implementation - for product in product_list[:1]: - page_iterator = create_page_iterator(AWS_SESSION, BUCKET, product) - for page in page_iterator: - tiles = [prefix["Prefix"] for prefix in page["CommonPrefixes"]] - with ThreadPoolExecutor(max_workers=10) as executor: - list( - tqdm( - executor.map( - create_vpp_item, it.repeat(AWS_SESSION), it.repeat(BUCKET), it.repeat(validator), tiles - ), - total=len(tiles), - ) - ) - - # remove break for full implementation - break - - -if __name__ == "__main__": - main() From 941a7f40d85197ba77628140af08a2325ee99974 Mon Sep 17 00:00:00 2001 From: Xiaoman Huang Date: Fri, 26 Apr 2024 19:02:36 +0800 Subject: [PATCH 11/80] update for collection --- scripts/vabh/test_item.json | 62 ++++++++++++++++++++ scripts/vabh/vabh_item.py | 114 ++++++++++++++++++++++++++---------- 2 files changed, 145 insertions(+), 31 deletions(-) create mode 100644 scripts/vabh/test_item.json diff --git a/scripts/vabh/test_item.json b/scripts/vabh/test_item.json new file mode 100644 index 0000000..dbbbacb --- /dev/null +++ b/scripts/vabh/test_item.json @@ -0,0 +1,62 @@ +{ + "type": "Collection", + "id": "urban-atlas-building-height", + "stac_version": "1.0.0", + "description": "Urban Atlas building height over capital cities.", + "links": [ + { + "rel": "root", + "href": "/Users/xiaomanhuang/pl/ETCDI_STAC/clms-stac-1/scripts/vabh/test_item.json", + "type": "application/json", + "title": "Urban Atlas Building Height 10m" + }, + { + "rel": "self", + "href": "/Users/xiaomanhuang/pl/ETCDI_STAC/clms-stac-1/scripts/vabh/test_item.json", + "type": "application/json" + } + ], + "title": "Urban Atlas Building Height 10m", + "extent": { + "spatial": { + "bbox": [ + [ + -21.210399013454868, + 62.99044383484405, + -20.96981298030872, + 63.339366607232876 + ] + ] + }, + "temporal": { + "interval": [ + [ + "2012-01-01T00:00:00Z", + null + ] + ] + } + }, + "license": "proprietary", + "keywords": [ + "Buildings", + "Building height", + "Elevation" + ], + "providers": [ + { + "name": "Copernicus Land Monitoring Service", + "description": "The Copernicus Land Monitoring Service provides geographical information on land cover and its changes, land use, ground motions, vegetation state, water cycle and Earth's surface energy variables to a broad range of users in Europe and across the World in the field of environmental terrestrial applications.", + "roles": [ + "host", + "licensor" + ], + "url": "https://land.copernicus.eu" + } + ], + "summaries": { + "proj:epsg": [ + 3035 + ] + } +} \ No newline at end of file diff --git a/scripts/vabh/vabh_item.py b/scripts/vabh/vabh_item.py index ed4a8bf..f963a71 100644 --- a/scripts/vabh/vabh_item.py +++ b/scripts/vabh/vabh_item.py @@ -7,6 +7,7 @@ import pystac import rasterio as rio from pyproj import Transformer +from pystac import Extent, SpatialExtent, TemporalExtent from pystac.provider import ProviderRole from rasterio.coords import BoundingBox from rasterio.crs import CRS @@ -14,7 +15,7 @@ KEY = "/Users/xiaomanhuang/pl/ETCDI_STAC/uabh_samples/AT001_WIEN_UA2012_DHM_v020/data/AT001_WIEN_UA2012_DHM_V020.tif" -VPP_HOST_AND_LICENSOR: Final[pystac.Provider] = pystac.Provider( +HOST_AND_LICENSOR: Final[pystac.Provider] = pystac.Provider( name="Copernicus Land Monitoring Service", description=( "The Copernicus Land Monitoring Service provides geographical information on land cover and its changes, land" @@ -25,28 +26,15 @@ url="https://land.copernicus.eu", ) -VPP_PRODUCER_AND_PROCESSOR: Final[pystac.Provider] = pystac.Provider( - name="VITO NV", - description=( - "VITO is an independent Flemish research organisation in the area of cleantech and sustainable development." - ), - roles=[ProviderRole.PROCESSOR, ProviderRole.PRODUCER], - url="https://vito.be", -) - -def read_metadata_from_tif(key: str) -> tuple[BoundingBox, CRS, int, int]: +def get_metadata_from_tif(key: str) -> tuple[BoundingBox, CRS, int, int]: with rio.open(key) as tif: bounds = tif.bounds crs = tif.crs height = tif.height width = tif.width tif.close() - return (bounds, crs, height, width) # obj.last_modified - - -def read_metadata_from_xml(key: str) -> tuple[BoundingBox, CRS, int, int]: - return (bounds, crs, height, width, created) + return (bounds, crs, height, width) def get_geom_wgs84(bounds: BoundingBox, crs: CRS) -> Polygon: @@ -63,29 +51,93 @@ def get_description(product_id: str) -> str: def get_datetime(product_id: str) -> tuple[datetime, datetime]: - year = int(product_id.split("_")[1]) + year = int(product_id.split("_")[2][2:]) return (datetime(year=year, month=1, day=1), datetime(year=year, month=12, day=31)) +def get_collection_extent(bbox, start_datetime) -> Extent: + spatial_extent = SpatialExtent(bboxes=bbox) + temporal_extent = TemporalExtent(intervals=[[start_datetime, None]]) + return Extent(spatial=spatial_extent, temporal=temporal_extent) + + +def create_asset(asset_key: str) -> pystac.Asset: + parameter = asset_key.split("_")[-1].split(".")[0] + version = asset_key.split("_")[-3] + return pystac.Asset( + href=f"s3://{BUCKET}/" + asset_key, + media_type=pystac.MediaType.GEOTIFF, + title=TITLE_MAP[parameter] + f" {version}", + roles=["data"], + ) + +def get_item_assets() + +def get_links() + + + if __name__ == "__main__": head, tail = os.path.split(KEY) (product_id,) = tail.split(".")[0].rsplit("_", 0) - bounds, crs, height, width = read_metadata_from_tif(KEY) + bounds, crs, height, width = get_metadata_from_tif(KEY) geom_wgs84 = get_geom_wgs84(bounds, crs) description = get_description(product_id) start_datetime, end_datetime = get_datetime(product_id) - - item = pystac.Item( - id=product_id, - geometry=mapping(geom_wgs84), - bbox=geom_wgs84.bounds, - datetime=None, - start_datetime=start_datetime, - end_datetime=end_datetime, - properties={"created": created.strftime("%Y-%m-%dT%H:%M:%SZ"), "description": description}, - collection="urban-atlas-building-height", + collection_extent = get_collection_extent(list(geom_wgs84.bounds), start_datetime) + summaries = pystac.Summaries({"proj:epsg": [crs.to_epsg()]}) + + collection = pystac.Collection( + id="urban-atlas-building-height", + title="Urban Atlas Building Height 10m", + description="Urban Atlas building height over capital cities.", + keywords=["Buildings", "Building height", "Elevation"], + extent=collection_extent, + summaries=summaries, + providers=[HOST_AND_LICENSOR], ) - item.common_metadata.providers = [VPP_HOST_AND_LICENSOR, VPP_PRODUCER_AND_PROCESSOR] - item.set_self_href("scripts/vpp/test_item.json") - item.save_object() + collection.set_self_href("scripts/vabh/test_item.json") + collection.save_object() + + +# def create_item(aws_session: boto3.Session, bucket: str, tile: str) -> pystac.Item: +# client = aws_session.client("s3") +# parameters = client.list_objects(Bucket=bucket, Prefix=tile, Delimiter=".")["CommonPrefixes"] +# asset_keys = [parameter["Prefix"] + "tif" for parameter in parameters] +# _, tail = os.path.split(asset_keys[0]) +# product_id = "_".join((tail[:23], tail[29:31])) +# bounds, crs, height, width, created = read_metadata_from_s3(bucket, asset_keys[0], aws_session) +# geom_wgs84 = get_geom_wgs84(bounds, crs) +# description = get_description(product_id) +# start_datetime, end_datetime = get_datetime(product_id) + +# # common metadata +# item = pystac.Item( +# id=product_id, +# geometry=mapping(geom_wgs84), +# bbox=list(geom_wgs84.bounds), +# datetime=None, +# start_datetime=start_datetime, +# end_datetime=end_datetime, +# properties={"created": created.strftime("%Y-%m-%dT%H:%M:%SZ"), "description": description}, +# collection=COLLECTION_ID, +# ) +# item.common_metadata.providers = [HOST_AND_LICENSOR] + +# # extensions +# projection = ProjectionExtension.ext(item, add_if_missing=True) +# projection.epsg = crs.to_epsg() +# projection.bbox = [int(bounds.left), int(bounds.bottom), int(bounds.right), int(bounds.top)] +# projection.shape = [height, width] + +# # links +# links = [CLMS_LICENSE, CLMS_CATALOG_LINK, ITEM_PARENT_LINK, COLLECTION_LINK] +# for link in links: +# item.links.append(link) + +# # assets +# assets = {os.path.split(key)[-1][:-4].lower(): create_asset(key) for key in asset_keys} +# for key, asset in assets.items(): +# item.add_asset(key, asset) +# return item From efc83b2591cb80e47cfba60690731d041d561146 Mon Sep 17 00:00:00 2001 From: Sebastian Boeck Date: Fri, 26 Apr 2024 15:43:52 +0200 Subject: [PATCH 12/80] adds dev notebook for CLC item createion --- clms_item_generator.ipynb | 598 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 598 insertions(+) create mode 100644 clms_item_generator.ipynb diff --git a/clms_item_generator.ipynb b/clms_item_generator.ipynb new file mode 100644 index 0000000..b700877 --- /dev/null +++ b/clms_item_generator.ipynb @@ -0,0 +1,598 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import re\n", + "\n", + "from pyproj import Transformer\n", + "from pystac.extensions.projection import ProjectionExtension\n", + "import pystac.item\n", + "from shapely.geometry import GeometryCollection, box, shape, mapping\n", + "from datetime import datetime, UTC\n", + "import pystac\n", + "\n", + "import rasterio as rio\n", + "import rasterio.warp\n", + "import rasterio.crs\n", + "\n", + "import xml.etree.cElementTree as ET\n", + "from xml.dom import minidom" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [], + "source": [ + "img_path = 'X:/EO/u2018_clc2012_v2020_20u1_raster100m/DATA/U2018_CLC2012_V2020_20u1.tif'\n", + "\n", + "id = os.path.basename(img_path).split('.')[0]\n", + "_ = re.search('(?<=CLC)[0-9]{4}', id)\n", + "year = _.group(0)\n", + "\n", + "# Wrap in def?\n", + "\n", + "props = {'description': (f'Corine Land Cover {year} (CLC{year}) is one of the Corine Land Cover (CLC) ' \n", + " f'datasets produced within the frame the Copernicus Land Monitoring Service '\n", + " f'referring to land cover / land use status of year {year}. '\n", + " f'CLC service has a long-time heritage (formerly known as \\\"CORINE Land Cover Programme\\\"), '\n", + " f'coordinated by the European Environment Agency (EEA). It provides consistent '\n", + " f'and thematically detailed information on land cover and land cover changes across Europe. '\n", + " f'CLC datasets are based on the classification of satellite images produced by the national '\n", + " f'teams of the participating countries - the EEA members and cooperating countries (EEA39). '\n", + " f'National CLC inventories are then further integrated into a seamless land cover map of Europe. '\n", + " f'The resulting European database relies on standard methodology and nomenclature with following '\n", + " f'base parameters: 44 classes in the hierarchical 3-level CLC nomenclature; '\n", + " f'minimum mapping unit (MMU) for status layers is 25 hectares; '\n", + " f'minimum width of linear elements is 100 metres. '\n", + " f'Change layers have higher resolution, i.e. minimum mapping unit (MMU) is 5 hectares '\n", + " f'for Land Cover Changes (LCC), and the minimum width of linear elements is 100 metres. '\n", + " f'The CLC service delivers important data sets supporting the implementation of key priority '\n", + " f'areas of the Environment Action Programmes of the European Union as e.g. protecting ecosystems, '\n", + " f'halting the loss of biological diversity, tracking the impacts of climate change, '\n", + " f'monitoring urban land take, assessing developments in agriculture or dealing with '\n", + " f'water resources directives. CLC belongs to the Pan-European component of the '\n", + " f'Copernicus Land Monitoring Service (https://land.copernicus.eu/), part of the '\n", + " f'European Copernicus Programme coordinated by the European Environment Agency, '\n", + " f'providing environmental information from a combination of air- and space-based observation '\n", + " f'systems and in-situ monitoring. Additional information about CLC product description including '\n", + " f'mapping guides can be found at https://land.copernicus.eu/user-corner/technical-library/. '\n", + " f'CLC class descriptions can be found at '\n", + " f'https://land.copernicus.eu/user-corner/technical-library/corine-land-cover-nomenclature-guidelines/html/.'),\n", + " 'created': None,\n", + " 'providers': [{\n", + " 'name': 'Copernicus Land Monitoring Service',\n", + " 'description': ('The Copernicus Land Monitoring Service provides '\n", + " 'geographical information on land cover and its '\n", + " 'changes, land use, ground motions, vegetation state, '\n", + " 'water cycle and Earth\\'s surface energy variables to '\n", + " 'a broad range of users in Europe and across the World '\n", + " 'in the field of environmental terrestrial applications.'),\n", + " 'roles': ['licensor', 'host'],\n", + " 'url': 'https://land.copernicus.eu'\n", + " }],\n", + " \n", + "\n", + "\n", + "}\n", + "\n", + "\n", + "\n", + "with rio.open(img_path) as img:\n", + "\n", + " # xmin, ymin, xmax, ymax = img.bounds\n", + " # transformer = Transformer.from_crs(img.crs, \"EPSG:4326\")\n", + " # xmin_, ymax_ = transformer.transform(xmin, ymax)\n", + " # xmax_, ymin_ = transformer.transform(xmax, ymin)\n", + " # box_ = box(*[xmin_, ymin_, xmin_, ymax_])\n", + " # box_ = box(*transformer.transform_bounds(*img.bounds))\n", + " # bounds_wgs84 = rasterio.warp.transform_bounds(img.crs, CRS.from_epsg(4326), *img.bounds)\n", + " bbox = rio.warp.transform_bounds(img.crs, rio.crs.CRS.from_epsg(4326), *img.bounds)\n", + " params = {\n", + " 'id': id,\n", + " 'bbox': bbox,\n", + " 'geometry': mapping(box(*bbox)),\n", + " 'datetime': None,\n", + " 'start_datetime': datetime(int(year), 1, 1, microsecond=0, tzinfo=UTC),\n", + " 'end_datetime': datetime(int(year), 12, 31, microsecond=0, tzinfo=UTC),\n", + " 'properties': props,\n", + " }\n", + "\n", + "\n", + "\n", + "item = pystac.Item(**params)\n", + "\n", + "\n", + "item.add_asset(\n", + " key='image',\n", + " asset=pystac.Asset(href=img_path, title='Geotiff', media_type=pystac.MediaType.GEOTIFF),\n", + ")\n", + "\n", + "# item.ext.add('proj')" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [], + "source": [ + "from pystac.extensions.projection import ProjectionExtension\n", + "import pystac.link\n", + "\n", + "proj_ext = ProjectionExtension.ext(item.assets['image'], add_if_missing=True)\n", + "\n", + "proj_ext.apply(epsg=rio.crs.CRS(img.crs).to_epsg(),\n", + " bbox=img.bounds,\n", + " shape=[_ for _ in img.shape],\n", + " transform=[_ for _ in img.transform] + [0.0, 0.0, 1.0],\n", + " )\n", + "\n", + "license = pystac.link.Link(rel='LICENSE', target=\"https://land.copernicus.eu/en/data-policy\")\n", + "item.add_link(license)\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [], + "source": [ + "item.save_object(dest_href='testY.json')" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "datetime.datetime(2012, 1, 1, 0, 0)" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "datetime(int(year), 1, 1, microsecond=0)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "# Taken from https://stackoverflow.com/questions/2148119/how-to-convert-an-xml-string-to-a-dictionary\n", + "from xml.etree import cElementTree as ElementTree\n", + "\n", + "\n", + "class XmlListConfig(list):\n", + " def __init__(self, aList):\n", + " for element in aList:\n", + " if element:\n", + " if len(element) == 1 or element[0].tag != element[1].tag:\n", + " self.append(XmlDictConfig(element))\n", + " elif element[0].tag == element[1].tag:\n", + " self.append(XmlListConfig(element))\n", + " elif element.text:\n", + " text = element.text.strip()\n", + " if text:\n", + " self.append(text)\n", + "\n", + "\n", + "class XmlDictConfig(dict):\n", + " def __init__(self, parent_element):\n", + " if parent_element.items():\n", + " self.update(dict(parent_element.items()))\n", + " for element in parent_element:\n", + " if element:\n", + " if len(element) == 1 or element[0].tag != element[1].tag:\n", + " aDict = XmlDictConfig(element)\n", + " else:\n", + " aDict = {element[0].tag: XmlListConfig(element)}\n", + " if element.items():\n", + " aDict.update(dict(element.items()))\n", + " self.update({element.tag: aDict})\n", + " elif element.items():\n", + " self.update({element.tag: dict(element.items())})\n", + " else:\n", + " self.update({element.tag: element.text})\n", + "\n", + "stac_io = pystac.StacIO.default()\n", + "\n", + "def get_metadata(xml: str):\n", + " result = XmlDictConfig(ElementTree.XML(stac_io.read_text(xml)))\n", + " result[\n", + " \"ORIGINAL_URL\"\n", + " ] = xml # Include the original URL in the metadata for use later\n", + " return result" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'{http://www.w3.org/2001/XMLSchema-instance}schemaLocation': 'http://www.isotc211.org/2005/gmd http://schemas.opengis.net/csw/2.0.2/profiles/apiso/1.0.0/apiso.xsd',\n", + " '{http://www.isotc211.org/2005/gmd}fileIdentifier': {'{http://www.isotc211.org/2005/gco}CharacterString': '7e162b2d-5196-41b2-b6dd-e889651e2f1f'},\n", + " '{http://www.isotc211.org/2005/gmd}language': {'{http://www.isotc211.org/2005/gmd}LanguageCode': {'codeList': 'http://www.loc.gov/standards/iso639-2/',\n", + " 'codeListValue': 'eng'}},\n", + " '{http://www.isotc211.org/2005/gmd}characterSet': {'{http://www.isotc211.org/2005/gmd}MD_CharacterSetCode': {'codeList': 'http://standards.iso.org/iso/19139/resources/gmxCodelists.xml#MD_CharacterSetCode',\n", + " 'codeListValue': 'utf8'}},\n", + " '{http://www.isotc211.org/2005/gmd}hierarchyLevel': {'{http://www.isotc211.org/2005/gmd}MD_ScopeCode': {'codeList': 'http://standards.iso.org/iso/19139/resources/gmxCodelists.xml#MD_ScopeCode',\n", + " 'codeListValue': 'dataset'}},\n", + " '{http://www.isotc211.org/2005/gmd}contact': {'{http://www.isotc211.org/2005/gmd}CI_ResponsibleParty': {'{http://www.isotc211.org/2005/gmd}organisationName': {'{http://www.isotc211.org/2005/gco}CharacterString': 'European Environment Agency'},\n", + " '{http://www.isotc211.org/2005/gmd}contactInfo': {'{http://www.isotc211.org/2005/gmd}CI_Contact': {'{http://www.isotc211.org/2005/gmd}address': {'{http://www.isotc211.org/2005/gmd}CI_Address': {'{http://www.isotc211.org/2005/gmd}deliveryPoint': {'{http://www.isotc211.org/2005/gco}CharacterString': 'Kongens Nytorv 6'},\n", + " '{http://www.isotc211.org/2005/gmd}city': {'{http://www.isotc211.org/2005/gco}CharacterString': 'Copenhagen'},\n", + " '{http://www.isotc211.org/2005/gmd}administrativeArea': {'{http://www.isotc211.org/2005/gco}CharacterString': 'K'},\n", + " '{http://www.isotc211.org/2005/gmd}postalCode': {'{http://www.isotc211.org/2005/gco}CharacterString': '1050'},\n", + " '{http://www.isotc211.org/2005/gmd}country': {'{http://www.isotc211.org/2005/gco}CharacterString': 'Denmark'},\n", + " '{http://www.isotc211.org/2005/gmd}electronicMailAddress': {'{http://www.isotc211.org/2005/gco}CharacterString': 'sdi@eea.europa.eu'}}}}},\n", + " '{http://www.isotc211.org/2005/gmd}role': {'{http://www.isotc211.org/2005/gmd}CI_RoleCode': {'codeList': 'http://standards.iso.org/iso/19139/resources/gmxCodelists.xml#CI_RoleCode',\n", + " 'codeListValue': 'pointOfContact'}}}},\n", + " '{http://www.isotc211.org/2005/gmd}dateStamp': {'{http://www.isotc211.org/2005/gco}DateTime': '2019-12-18T22:18:54'},\n", + " '{http://www.isotc211.org/2005/gmd}metadataStandardName': {'{http://www.isotc211.org/2005/gco}CharacterString': 'ISO 19115:2003/19139'},\n", + " '{http://www.isotc211.org/2005/gmd}metadataStandardVersion': {'{http://www.isotc211.org/2005/gco}CharacterString': '1.0'},\n", + " '{http://www.isotc211.org/2005/gmd}referenceSystemInfo': {'{http://www.isotc211.org/2005/gmd}MD_ReferenceSystem': {'{http://www.isotc211.org/2005/gmd}referenceSystemIdentifier': {'{http://www.isotc211.org/2005/gmd}RS_Identifier': {'{http://www.isotc211.org/2005/gmd}code': {'{http://www.isotc211.org/2005/gmx}Anchor': {'{http://www.w3.org/1999/xlink}href': 'http://www.opengis.net/def/crs/EPSG/0/3035'}}}}}},\n", + " '{http://www.isotc211.org/2005/gmd}identificationInfo': {'{http://www.isotc211.org/2005/gmd}MD_DataIdentification': {'{http://www.isotc211.org/2005/gmd}citation': {'{http://www.isotc211.org/2005/gmd}CI_Citation': {'{http://www.isotc211.org/2005/gmd}title': {'{http://www.isotc211.org/2005/gco}CharacterString': 'Corine Land Cover (CLC) 2018, Version 2020 20_1'},\n", + " '{http://www.isotc211.org/2005/gmd}date': {'{http://www.isotc211.org/2005/gmd}CI_Date': {'{http://www.isotc211.org/2005/gmd}date': {'{http://www.isotc211.org/2005/gco}Date': '2020-02-24'},\n", + " '{http://www.isotc211.org/2005/gmd}dateType': {'{http://www.isotc211.org/2005/gmd}CI_DateTypeCode': {'codeList': 'http://standards.iso.org/iso/19139/resources/gmxCodelists.xml#CI_DateTypeCode',\n", + " 'codeListValue': 'publication'}}}},\n", + " '{http://www.isotc211.org/2005/gmd}edition': {'{http://www.isotc211.org/2005/gco}CharacterString': '20_1'},\n", + " '{http://www.isotc211.org/2005/gmd}identifier': {'{http://www.isotc211.org/2005/gmd}MD_Identifier': {'{http://www.isotc211.org/2005/gmd}code': {'{http://www.isotc211.org/2005/gco}CharacterString': 'Corine Land Cover (CLC) 2018, Version 2020 20_1'}}}}},\n", + " '{http://www.isotc211.org/2005/gmd}abstract': {'{http://www.isotc211.org/2005/gco}CharacterString': 'CLC2018 is one of the Corine Land Cover (CLC) datasets produced within the frame the Copernicus Land Monitoring Service referring to land cover / land use status of year 2018. CLC service has a long-time heritage (formerly known as \"CORINE Land Cover Programme\"), coordinated by the European Environment Agency (EEA). It provides consistent and thematically detailed information on land cover and land cover changes across Europe. \\nCLC datasets are based on the classification of satellite images produced by the national teams of the participating countries - the EEA members and cooperating countries (EEA39). National CLC inventories are then further integrated into a seamless land cover map of Europe. The resulting European database relies on standard methodology and nomenclature with following base parameters: 44 classes in the hierarchical 3-level CLC nomenclature; minimum mapping unit (MMU) for status layers is 25 hectares; minimum width of linear elements is 100 metres. Change layers have higher resolution, i.e. minimum mapping unit (MMU) is 5 hectares for Land Cover Changes (LCC), and the minimum width of linear elements is 100 metres. The CLC service delivers important data sets supporting the implementation of key priority areas of the Environment Action Programmes of the European Union as e.g. protecting ecosystems, halting the loss of biological diversity, tracking the impacts of climate change, monitoring urban land take, assessing developments in agriculture or dealing with water resources directives. CLC belongs to the Pan-European component of the Copernicus Land Monitoring Service (https://land.copernicus.eu/), part of the European Copernicus Programme coordinated by the European Environment Agency, providing environmental information from a combination of air- and space-based observation systems and in-situ monitoring. Additional information about CLC product description including mapping guides can be found at https://land.copernicus.eu/user-corner/technical-library/clc2018technicalguidelines_final.pdf. CLC class descriptions can be found at https://land.copernicus.eu/user-corner/technical-library/corine-land-cover-nomenclature-guidelines/html/.'},\n", + " '{http://www.isotc211.org/2005/gmd}pointOfContact': {'{http://www.isotc211.org/2005/gmd}CI_ResponsibleParty': {'{http://www.isotc211.org/2005/gmd}individualName': {'{http://www.isotc211.org/2005/gco}nilReason': 'missing',\n", + " '{http://www.isotc211.org/2005/gco}CharacterString': None},\n", + " '{http://www.isotc211.org/2005/gmd}organisationName': {'{http://www.isotc211.org/2005/gco}nilReason': 'missing',\n", + " '{http://www.isotc211.org/2005/gco}CharacterString': None},\n", + " '{http://www.isotc211.org/2005/gmd}positionName': {'{http://www.isotc211.org/2005/gco}nilReason': 'missing',\n", + " '{http://www.isotc211.org/2005/gco}CharacterString': None},\n", + " '{http://www.isotc211.org/2005/gmd}contactInfo': {'{http://www.isotc211.org/2005/gmd}CI_Contact': {'{http://www.isotc211.org/2005/gmd}phone': {'{http://www.isotc211.org/2005/gmd}CI_Telephone': {'{http://www.isotc211.org/2005/gmd}voice': {'{http://www.isotc211.org/2005/gco}nilReason': 'missing',\n", + " '{http://www.isotc211.org/2005/gco}CharacterString': None},\n", + " '{http://www.isotc211.org/2005/gmd}facsimile': {'{http://www.isotc211.org/2005/gco}nilReason': 'missing',\n", + " '{http://www.isotc211.org/2005/gco}CharacterString': None}}},\n", + " '{http://www.isotc211.org/2005/gmd}address': {'{http://www.isotc211.org/2005/gmd}CI_Address': {'{http://www.isotc211.org/2005/gmd}deliveryPoint': {'{http://www.isotc211.org/2005/gco}nilReason': 'missing',\n", + " '{http://www.isotc211.org/2005/gco}CharacterString': None},\n", + " '{http://www.isotc211.org/2005/gmd}city': {'{http://www.isotc211.org/2005/gco}nilReason': 'missing',\n", + " '{http://www.isotc211.org/2005/gco}CharacterString': None},\n", + " '{http://www.isotc211.org/2005/gmd}administrativeArea': {'{http://www.isotc211.org/2005/gco}nilReason': 'missing',\n", + " '{http://www.isotc211.org/2005/gco}CharacterString': None},\n", + " '{http://www.isotc211.org/2005/gmd}postalCode': {'{http://www.isotc211.org/2005/gco}nilReason': 'missing',\n", + " '{http://www.isotc211.org/2005/gco}CharacterString': None},\n", + " '{http://www.isotc211.org/2005/gmd}country': {'{http://www.isotc211.org/2005/gco}nilReason': 'missing',\n", + " '{http://www.isotc211.org/2005/gco}CharacterString': None},\n", + " '{http://www.isotc211.org/2005/gmd}electronicMailAddress': {'{http://www.isotc211.org/2005/gco}nilReason': 'missing',\n", + " '{http://www.isotc211.org/2005/gco}CharacterString': None}}}}},\n", + " '{http://www.isotc211.org/2005/gmd}role': {'{http://www.isotc211.org/2005/gmd}CI_RoleCode': {'codeList': 'http://standards.iso.org/iso/19139/resources/gmxCodelists.xml#CI_RoleCode',\n", + " 'codeListValue': 'resourceProvider'}}}},\n", + " '{http://www.isotc211.org/2005/gmd}resourceMaintenance': {'{http://www.isotc211.org/2005/gmd}MD_MaintenanceInformation': {'{http://www.isotc211.org/2005/gmd}maintenanceAndUpdateFrequency': {'{http://www.isotc211.org/2005/gmd}MD_MaintenanceFrequencyCode': {'codeList': 'http://standards.iso.org/iso/19139/resources/gmxCodelists.xml#MD_MaintenanceFrequencyCode',\n", + " 'codeListValue': ''}}}},\n", + " '{http://www.isotc211.org/2005/gmd}graphicOverview': {'{http://www.isotc211.org/2005/gmd}MD_BrowseGraphic': {'{http://www.isotc211.org/2005/gmd}fileName': {'{http://www.isotc211.org/2005/gco}CharacterString': 'https://sdi.eea.europa.eu/public/catalogue-graphic-overview/blank.png'}}},\n", + " '{http://www.isotc211.org/2005/gmd}descriptiveKeywords': {'{http://www.isotc211.org/2005/gmd}MD_Keywords': {'{http://www.isotc211.org/2005/gmd}keyword': {'{http://www.isotc211.org/2005/gco}CharacterString': 'Albania, Austria, Belgium, Bosnia and Herzegovina, Bulgaria, Croatia, Cyprus, Czech Republic, Denmark, Estonia, Finland, France, Germany, Greece, Hungary, Iceland, Ireland, Italy, Kosovo, Latvia, Liechtenstein, Lithuania, Luxembourg, Malta, Montenegro, Netherlands, North Macedonia, Norway, Poland, Portugal, Romania, Serbia, Slovakia, Slovenia, Spain, Sweden, Switzerland, Turkey, United Kingdom'},\n", + " '{http://www.isotc211.org/2005/gmd}type': {'{http://www.isotc211.org/2005/gmd}MD_KeywordTypeCode': {'codeList': 'http://standards.iso.org/iso/19139/resources/gmxCodelists.xml#MD_KeywordTypeCode',\n", + " 'codeListValue': 'place'}},\n", + " '{http://www.isotc211.org/2005/gmd}thesaurusName': {'{http://www.isotc211.org/2005/gmd}CI_Citation': {'{http://www.isotc211.org/2005/gmd}title': {'{http://www.isotc211.org/2005/gco}CharacterString': 'Continents, countries, sea regions of the world'},\n", + " '{http://www.isotc211.org/2005/gmd}date': {'{http://www.isotc211.org/2005/gmd}CI_Date': {'{http://www.isotc211.org/2005/gmd}date': {'{http://www.isotc211.org/2005/gco}Date': '2015-07-17'},\n", + " '{http://www.isotc211.org/2005/gmd}dateType': {'{http://www.isotc211.org/2005/gmd}CI_DateTypeCode': {'codeList': 'http://standards.iso.org/iso/19139/resources/gmxCodelists.xml#CI_DateTypeCode',\n", + " 'codeListValue': 'publication'}}}},\n", + " '{http://www.isotc211.org/2005/gmd}identifier': {'{http://www.isotc211.org/2005/gmd}MD_Identifier': {'{http://www.isotc211.org/2005/gmd}code': {'{http://www.isotc211.org/2005/gmx}Anchor': {'{http://www.w3.org/1999/xlink}href': 'http://sdi.eea.europa.eu/editor-catalogue/srv/eng/thesaurus.download?ref=external.place.regions'}}}}}}}},\n", + " '{http://www.isotc211.org/2005/gmd}resourceConstraints': {'{http://www.isotc211.org/2005/gmd}MD_LegalConstraints': {'{http://www.isotc211.org/2005/gmd}useConstraints': {'{http://www.isotc211.org/2005/gmd}MD_RestrictionCode': {'codeList': 'http://standards.iso.org/iso/19139/resources/gmxCodelists.xml#MD_RestrictionCode',\n", + " 'codeListValue': 'otherRestrictions'}}}},\n", + " '{http://www.isotc211.org/2005/gmd}language': {'{http://www.isotc211.org/2005/gmd}LanguageCode': {'codeList': 'http://www.loc.gov/standards/iso639-2/',\n", + " 'codeListValue': 'eng'}},\n", + " '{http://www.isotc211.org/2005/gmd}topicCategory': {'{http://www.isotc211.org/2005/gmd}MD_TopicCategoryCode': 'imageryBaseMapsEarthCover'},\n", + " '{http://www.isotc211.org/2005/gmd}extent': {'{http://www.isotc211.org/2005/gmd}EX_Extent': {'{http://www.isotc211.org/2005/gmd}temporalElement': None}}}},\n", + " '{http://www.isotc211.org/2005/gmd}distributionInfo': {'{http://www.isotc211.org/2005/gmd}MD_Distribution': {'{http://www.isotc211.org/2005/gmd}distributionFormat': {'{http://www.isotc211.org/2005/gmd}MD_Format': {'{http://www.isotc211.org/2005/gmd}name': {'{http://www.isotc211.org/2005/gco}nilReason': 'missing',\n", + " '{http://www.isotc211.org/2005/gco}CharacterString': None},\n", + " '{http://www.isotc211.org/2005/gmd}version': {'{http://www.isotc211.org/2005/gco}nilReason': 'unknown',\n", + " '{http://www.isotc211.org/2005/gco}CharacterString': None}}},\n", + " '{http://www.isotc211.org/2005/gmd}distributor': None,\n", + " '{http://www.isotc211.org/2005/gmd}transferOptions': {'{http://www.isotc211.org/2005/gmd}MD_DigitalTransferOptions': None}}},\n", + " '{http://www.isotc211.org/2005/gmd}dataQualityInfo': {'{http://www.isotc211.org/2005/gmd}DQ_DataQuality': {'{http://www.isotc211.org/2005/gmd}scope': {'{http://www.isotc211.org/2005/gmd}DQ_Scope': {'{http://www.isotc211.org/2005/gmd}level': {'{http://www.isotc211.org/2005/gmd}MD_ScopeCode': {'codeListValue': 'dataset',\n", + " 'codeList': 'http://standards.iso.org/iso/19139/resources/gmxCodelists.xml#MD_ScopeCode'}}}},\n", + " '{http://www.isotc211.org/2005/gmd}report': {'{http://www.isotc211.org/2005/gmd}DQ_DomainConsistency': {'{http://www.isotc211.org/2005/gmd}result': {'{http://www.isotc211.org/2005/gmd}DQ_ConformanceResult': {'{http://www.isotc211.org/2005/gmd}specification': {'{http://www.isotc211.org/2005/gmd}CI_Citation': {'{http://www.isotc211.org/2005/gmd}title': {'{http://www.isotc211.org/2005/gco}CharacterString': 'Commission Regulation (EU) No 1089/2010 of 23 November 2010 implementing Directive 2007/2/EC of the European Parliament and of the Council as regards interoperability of spatial data sets and services'},\n", + " '{http://www.isotc211.org/2005/gmd}date': {'{http://www.isotc211.org/2005/gmd}CI_Date': {'{http://www.isotc211.org/2005/gmd}date': {'{http://www.isotc211.org/2005/gco}Date': '2010-12-08'},\n", + " '{http://www.isotc211.org/2005/gmd}dateType': {'{http://www.isotc211.org/2005/gmd}CI_DateTypeCode': {'codeList': 'http://standards.iso.org/iso/19139/resources/gmxCodelists.xml#CI_DateTypeCode',\n", + " 'codeListValue': 'publication'}}}}}},\n", + " '{http://www.isotc211.org/2005/gmd}explanation': {'{http://www.isotc211.org/2005/gco}CharacterString': 'See the referenced specification'},\n", + " '{http://www.isotc211.org/2005/gmd}pass': {'{http://www.isotc211.org/2005/gco}nilReason': 'unknown'}}}}},\n", + " '{http://www.isotc211.org/2005/gmd}lineage': {'{http://www.isotc211.org/2005/gmd}LI_Lineage': {'{http://www.isotc211.org/2005/gmd}statement': {'{http://www.isotc211.org/2005/gco}CharacterString': 'CLC products are based in majority of EEA39 countries on the photointerpretation of satellite images by the national teams of the participating countries - the EEA member and cooperating countries. All features in original vector database are delineated and classified on satellite images according to CLC specifications i.e. with better than 100 m positional accuracy and 25 ha minimum mapping unit (5 ha MMU for change layer) into the standardized CLC nomenclature (44 CLC classes). The change layer is derived from satellite imagery by direct mapping of changes taken place between two consecutive inventories, based on image-to-image comparison. Some countries follow alternative approaches by utilizing semiautomatic methodology e.g. generalisation of higher resolution national datasets. Production of national CLC inventories is supported by training and is under systematic control of the CLC Technical Team, both for thematic and semantic aspects, to assure harmonized European products. The process of European data integration starts when national deliveries are accepted and the Database Acceptance Report (DBTA) issued. National CLC data are then transformed into the common European reference (ETRS89/LAEA) and pan-European seamless dataset is produced. Integration step includes also harmonization of database along country borders. Rigorous harmonization of country borders has been done only for CLC2000 and CHA9000 layers (in 2 km wide strips along borders) as part of CLC2000 update. Currently, only simplified harmonisation is applied i.e. small border polygons (area < 5 ha) are generalised according to predefined rules to largest and/or thematically most similar neighbour, sliver polygons along borders (< 0.1 ha) are eliminated. European Corine Land Cover seamless database represents the final product of European data integration. Some artificial lines (dividing polygons with the same code) can be still present in database due to technical constraints of current ArcGIS technology and complexity of dataset (adaptive tiling) but this has no impact on dataset contents and can be dissolved for smaller data extracts.\\nRevised versions\\nStarted from the publication of CLC2006 (Version 16) the previous inventory is substituted by its revised version by most of the participating countries (see CLC seamless data coverage table https://land.copernicus.eu/user-corner/technical-library/clc-country-coverage-1990-2018-v20_1). However, due to their specific methodology not all countries are producing revised version of the previous inventory. The revision of previous CLC layer is a “by-product” of the standard updating process, including corrections to the original data identified during the update. Revisions (correcting mistakes) are needed because of the following factors: \\n- availability of higher resolution satellite imagery;\\n- a new satellite image or time series of satellite imagery provides additional key to correctly recognise a feature;\\n- improved availability and better quality of in-situ data;\\n- improved skills of experts, i.e. better understanding and application of CLC nomenclature;\\n- decision of the national team to improve the product between two inventories.\\n\\nThese revisions are not propagated backward to older datasets (e.g. during CLC2018 revision of CLC2012 might be provided, but the older datasets were not revised). Thus, consecutive inventories might include not only real changes, but also differences due to revisions. Therefore, it is recommended that in time series analysis CLC-Change layers should be used. If status layers from past are needed, these could be derived backward from deducting CLC-Change layers from the latest (best available) status layer as it is done for EEA accounting layers (see at https://www.eea.europa.eu/data-and-maps/data/corine-land-cover-accounting-layers)\\nMore details to be available soon in upcoming \"Users\\' Manual for all Copernicus data” document. \\nVersion 20_1\\nRelease date: 24-02-2020\\nFile naming conventions simplified and better described. New file naming convention has been introduced based on user feedback on version 20. Filename is composed of combination of information about update campaign, data theme and reference year and version specification (including release year and release number). \\nSee https://land.copernicus.eu/user-corner/technical-library/clc-file-naming-conventions-guide-v20_1 for details.\\n\\nThe French DOMs are provided in separate databases (files both for vector and raster version of data).\\n\\nAll raster layers are back in 8 bit GeoTIFF. Modification is introduced based on the user feedback on version 20. In order to keep 8 bit resolution for raster change layers, they are divided into two files - representing consumption (from) and formation (to) part of change.\\n\\nSee https://land.copernicus.eu/user-corner/technical-library/clc-country-coverage-1990-2018-v20_1 for full information about the coverage of this version.\\nSee http://land.copernicus.eu/user-corner/technical-library/clc-and-clcc-release-lineage for full information about lineage history.'}}}}},\n", + " 'ORIGINAL_URL': '../CLC_samples/U2018_CLC2018_V2020_20u1.xml'}" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "xml_path = '../CLC_samples/U2018_CLC2018_V2020_20u1.xml'\n", + "\n", + "get_metadata(xml_path)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "def xml_to_dict(xml, result):\n", + " for child in xml:\n", + " if len(child) == 0:\n", + " result[child.tag] = child.text\n", + " else:\n", + " if child.tag in result:\n", + " if not isinstance(result[child.tag], list):\n", + " result[child.tag] = [result[child.tag]]\n", + " result[child.tag].append(xml_to_dict(child, {}))\n", + " else:\n", + " result[child.tag] = xml_to_dict(child, {})\n", + " return result\n" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'{http://www.isotc211.org/2005/gmd}fileIdentifier': {'{http://www.isotc211.org/2005/gco}CharacterString': '7e162b2d-5196-41b2-b6dd-e889651e2f1f'},\n", + " '{http://www.isotc211.org/2005/gmd}language': {'{http://www.isotc211.org/2005/gmd}LanguageCode': None},\n", + " '{http://www.isotc211.org/2005/gmd}characterSet': {'{http://www.isotc211.org/2005/gmd}MD_CharacterSetCode': None},\n", + " '{http://www.isotc211.org/2005/gmd}hierarchyLevel': {'{http://www.isotc211.org/2005/gmd}MD_ScopeCode': None},\n", + " '{http://www.isotc211.org/2005/gmd}contact': {'{http://www.isotc211.org/2005/gmd}CI_ResponsibleParty': {'{http://www.isotc211.org/2005/gmd}organisationName': {'{http://www.isotc211.org/2005/gco}CharacterString': 'European Environment Agency'},\n", + " '{http://www.isotc211.org/2005/gmd}contactInfo': {'{http://www.isotc211.org/2005/gmd}CI_Contact': {'{http://www.isotc211.org/2005/gmd}address': {'{http://www.isotc211.org/2005/gmd}CI_Address': {'{http://www.isotc211.org/2005/gmd}deliveryPoint': {'{http://www.isotc211.org/2005/gco}CharacterString': 'Kongens Nytorv 6'},\n", + " '{http://www.isotc211.org/2005/gmd}city': {'{http://www.isotc211.org/2005/gco}CharacterString': 'Copenhagen'},\n", + " '{http://www.isotc211.org/2005/gmd}administrativeArea': {'{http://www.isotc211.org/2005/gco}CharacterString': 'K'},\n", + " '{http://www.isotc211.org/2005/gmd}postalCode': {'{http://www.isotc211.org/2005/gco}CharacterString': '1050'},\n", + " '{http://www.isotc211.org/2005/gmd}country': {'{http://www.isotc211.org/2005/gco}CharacterString': 'Denmark'},\n", + " '{http://www.isotc211.org/2005/gmd}electronicMailAddress': {'{http://www.isotc211.org/2005/gco}CharacterString': 'sdi@eea.europa.eu'}}}}},\n", + " '{http://www.isotc211.org/2005/gmd}role': {'{http://www.isotc211.org/2005/gmd}CI_RoleCode': None}}},\n", + " '{http://www.isotc211.org/2005/gmd}dateStamp': {'{http://www.isotc211.org/2005/gco}DateTime': '2019-12-18T22:18:54'},\n", + " '{http://www.isotc211.org/2005/gmd}metadataStandardName': {'{http://www.isotc211.org/2005/gco}CharacterString': 'ISO 19115:2003/19139'},\n", + " '{http://www.isotc211.org/2005/gmd}metadataStandardVersion': {'{http://www.isotc211.org/2005/gco}CharacterString': '1.0'},\n", + " '{http://www.isotc211.org/2005/gmd}referenceSystemInfo': {'{http://www.isotc211.org/2005/gmd}MD_ReferenceSystem': {'{http://www.isotc211.org/2005/gmd}referenceSystemIdentifier': {'{http://www.isotc211.org/2005/gmd}RS_Identifier': {'{http://www.isotc211.org/2005/gmd}code': {'{http://www.isotc211.org/2005/gmx}Anchor': 'EPSG:3035'}}}}},\n", + " '{http://www.isotc211.org/2005/gmd}identificationInfo': {'{http://www.isotc211.org/2005/gmd}MD_DataIdentification': {'{http://www.isotc211.org/2005/gmd}citation': {'{http://www.isotc211.org/2005/gmd}CI_Citation': {'{http://www.isotc211.org/2005/gmd}title': {'{http://www.isotc211.org/2005/gco}CharacterString': 'Corine Land Cover (CLC) 2018, Version 2020 20_1'},\n", + " '{http://www.isotc211.org/2005/gmd}date': {'{http://www.isotc211.org/2005/gmd}CI_Date': {'{http://www.isotc211.org/2005/gmd}date': {'{http://www.isotc211.org/2005/gco}Date': '2020-02-24'},\n", + " '{http://www.isotc211.org/2005/gmd}dateType': {'{http://www.isotc211.org/2005/gmd}CI_DateTypeCode': None}}},\n", + " '{http://www.isotc211.org/2005/gmd}edition': {'{http://www.isotc211.org/2005/gco}CharacterString': '20_1'},\n", + " '{http://www.isotc211.org/2005/gmd}identifier': {'{http://www.isotc211.org/2005/gmd}MD_Identifier': {'{http://www.isotc211.org/2005/gmd}code': {'{http://www.isotc211.org/2005/gco}CharacterString': 'Corine Land Cover (CLC) 2018, Version 2020 20_1'}}}}},\n", + " '{http://www.isotc211.org/2005/gmd}abstract': {'{http://www.isotc211.org/2005/gco}CharacterString': 'CLC2018 is one of the Corine Land Cover (CLC) datasets produced within the frame the Copernicus Land Monitoring Service referring to land cover / land use status of year 2018. CLC service has a long-time heritage (formerly known as \"CORINE Land Cover Programme\"), coordinated by the European Environment Agency (EEA). It provides consistent and thematically detailed information on land cover and land cover changes across Europe. \\nCLC datasets are based on the classification of satellite images produced by the national teams of the participating countries - the EEA members and cooperating countries (EEA39). National CLC inventories are then further integrated into a seamless land cover map of Europe. The resulting European database relies on standard methodology and nomenclature with following base parameters: 44 classes in the hierarchical 3-level CLC nomenclature; minimum mapping unit (MMU) for status layers is 25 hectares; minimum width of linear elements is 100 metres. Change layers have higher resolution, i.e. minimum mapping unit (MMU) is 5 hectares for Land Cover Changes (LCC), and the minimum width of linear elements is 100 metres. The CLC service delivers important data sets supporting the implementation of key priority areas of the Environment Action Programmes of the European Union as e.g. protecting ecosystems, halting the loss of biological diversity, tracking the impacts of climate change, monitoring urban land take, assessing developments in agriculture or dealing with water resources directives. CLC belongs to the Pan-European component of the Copernicus Land Monitoring Service (https://land.copernicus.eu/), part of the European Copernicus Programme coordinated by the European Environment Agency, providing environmental information from a combination of air- and space-based observation systems and in-situ monitoring. Additional information about CLC product description including mapping guides can be found at https://land.copernicus.eu/user-corner/technical-library/clc2018technicalguidelines_final.pdf. CLC class descriptions can be found at https://land.copernicus.eu/user-corner/technical-library/corine-land-cover-nomenclature-guidelines/html/.'},\n", + " '{http://www.isotc211.org/2005/gmd}pointOfContact': [{'{http://www.isotc211.org/2005/gmd}CI_ResponsibleParty': {'{http://www.isotc211.org/2005/gmd}organisationName': {'{http://www.isotc211.org/2005/gco}CharacterString': 'European Environment Agency (EEA) under the framework of the Copernicus programme'},\n", + " '{http://www.isotc211.org/2005/gmd}contactInfo': {'{http://www.isotc211.org/2005/gmd}CI_Contact': {'{http://www.isotc211.org/2005/gmd}address': {'{http://www.isotc211.org/2005/gmd}CI_Address': {'{http://www.isotc211.org/2005/gmd}deliveryPoint': {'{http://www.isotc211.org/2005/gco}CharacterString': 'Kongens Nytorv 6'},\n", + " '{http://www.isotc211.org/2005/gmd}city': {'{http://www.isotc211.org/2005/gco}CharacterString': 'Copenhagen'},\n", + " '{http://www.isotc211.org/2005/gmd}administrativeArea': {'{http://www.isotc211.org/2005/gco}CharacterString': 'K'},\n", + " '{http://www.isotc211.org/2005/gmd}postalCode': {'{http://www.isotc211.org/2005/gco}CharacterString': '1050'},\n", + " '{http://www.isotc211.org/2005/gmd}country': {'{http://www.isotc211.org/2005/gco}CharacterString': 'Denmark'},\n", + " '{http://www.isotc211.org/2005/gmd}electronicMailAddress': {'{http://www.isotc211.org/2005/gco}CharacterString': 'copernicus@eea.europa.eu'}}},\n", + " '{http://www.isotc211.org/2005/gmd}onlineResource': {'{http://www.isotc211.org/2005/gmd}CI_OnlineResource': {'{http://www.isotc211.org/2005/gmd}linkage': {'{http://www.isotc211.org/2005/gmd}URL': 'http://www.eea.europa.eu'},\n", + " '{http://www.isotc211.org/2005/gmd}protocol': {'{http://www.isotc211.org/2005/gco}CharacterString': 'WWW:LINK-1.0-http--link'},\n", + " '{http://www.isotc211.org/2005/gmd}name': {'{http://www.isotc211.org/2005/gco}CharacterString': 'European Environment Agency public website'},\n", + " '{http://www.isotc211.org/2005/gmd}function': {'{http://www.isotc211.org/2005/gmd}CI_OnLineFunctionCode': None}}}}},\n", + " '{http://www.isotc211.org/2005/gmd}role': {'{http://www.isotc211.org/2005/gmd}CI_RoleCode': None}}},\n", + " {'{http://www.isotc211.org/2005/gmd}CI_ResponsibleParty': {'{http://www.isotc211.org/2005/gmd}individualName': {'{http://www.isotc211.org/2005/gco}CharacterString': None},\n", + " '{http://www.isotc211.org/2005/gmd}organisationName': {'{http://www.isotc211.org/2005/gco}CharacterString': None},\n", + " '{http://www.isotc211.org/2005/gmd}positionName': {'{http://www.isotc211.org/2005/gco}CharacterString': None},\n", + " '{http://www.isotc211.org/2005/gmd}contactInfo': {'{http://www.isotc211.org/2005/gmd}CI_Contact': {'{http://www.isotc211.org/2005/gmd}phone': {'{http://www.isotc211.org/2005/gmd}CI_Telephone': {'{http://www.isotc211.org/2005/gmd}voice': {'{http://www.isotc211.org/2005/gco}CharacterString': None},\n", + " '{http://www.isotc211.org/2005/gmd}facsimile': {'{http://www.isotc211.org/2005/gco}CharacterString': None}}},\n", + " '{http://www.isotc211.org/2005/gmd}address': {'{http://www.isotc211.org/2005/gmd}CI_Address': {'{http://www.isotc211.org/2005/gmd}deliveryPoint': {'{http://www.isotc211.org/2005/gco}CharacterString': None},\n", + " '{http://www.isotc211.org/2005/gmd}city': {'{http://www.isotc211.org/2005/gco}CharacterString': None},\n", + " '{http://www.isotc211.org/2005/gmd}administrativeArea': {'{http://www.isotc211.org/2005/gco}CharacterString': None},\n", + " '{http://www.isotc211.org/2005/gmd}postalCode': {'{http://www.isotc211.org/2005/gco}CharacterString': None},\n", + " '{http://www.isotc211.org/2005/gmd}country': {'{http://www.isotc211.org/2005/gco}CharacterString': None},\n", + " '{http://www.isotc211.org/2005/gmd}electronicMailAddress': {'{http://www.isotc211.org/2005/gco}CharacterString': None}}}}},\n", + " '{http://www.isotc211.org/2005/gmd}role': {'{http://www.isotc211.org/2005/gmd}CI_RoleCode': None}}}],\n", + " '{http://www.isotc211.org/2005/gmd}resourceMaintenance': {'{http://www.isotc211.org/2005/gmd}MD_MaintenanceInformation': {'{http://www.isotc211.org/2005/gmd}maintenanceAndUpdateFrequency': {'{http://www.isotc211.org/2005/gmd}MD_MaintenanceFrequencyCode': None}}},\n", + " '{http://www.isotc211.org/2005/gmd}graphicOverview': {'{http://www.isotc211.org/2005/gmd}MD_BrowseGraphic': {'{http://www.isotc211.org/2005/gmd}fileName': {'{http://www.isotc211.org/2005/gco}CharacterString': 'https://sdi.eea.europa.eu/public/catalogue-graphic-overview/blank.png'}}},\n", + " '{http://www.isotc211.org/2005/gmd}descriptiveKeywords': [{'{http://www.isotc211.org/2005/gmd}MD_Keywords': {'{http://www.isotc211.org/2005/gmd}keyword': {'{http://www.isotc211.org/2005/gmx}Anchor': None},\n", + " '{http://www.isotc211.org/2005/gmd}type': {'{http://www.isotc211.org/2005/gmd}MD_KeywordTypeCode': None},\n", + " '{http://www.isotc211.org/2005/gmd}thesaurusName': {'{http://www.isotc211.org/2005/gmd}CI_Citation': {'{http://www.isotc211.org/2005/gmd}title': {'{http://www.isotc211.org/2005/gmx}Anchor': 'GEMET - INSPIRE themes, version 1.0'},\n", + " '{http://www.isotc211.org/2005/gmd}date': {'{http://www.isotc211.org/2005/gmd}CI_Date': {'{http://www.isotc211.org/2005/gmd}date': {'{http://www.isotc211.org/2005/gco}Date': '2008-06-01'},\n", + " '{http://www.isotc211.org/2005/gmd}dateType': {'{http://www.isotc211.org/2005/gmd}CI_DateTypeCode': None}}},\n", + " '{http://www.isotc211.org/2005/gmd}identifier': {'{http://www.isotc211.org/2005/gmd}MD_Identifier': {'{http://www.isotc211.org/2005/gmd}code': {'{http://www.isotc211.org/2005/gmx}Anchor': 'geonetwork.thesaurus.external.theme.httpinspireeceuropaeutheme-theme'}}}}}}},\n", + " {'{http://www.isotc211.org/2005/gmd}MD_Keywords': {'{http://www.isotc211.org/2005/gmd}keyword': {'{http://www.isotc211.org/2005/gco}CharacterString': 'Copernicus Land Satellite Image Interpretation 2018 Corine Land Cover Raster CLC Polygon'},\n", + " '{http://www.isotc211.org/2005/gmd}thesaurusName': {'{http://www.isotc211.org/2005/gmd}CI_Citation': {'{http://www.isotc211.org/2005/gmd}title': {'{http://www.isotc211.org/2005/gco}CharacterString': 'EEA keyword list'},\n", + " '{http://www.isotc211.org/2005/gmd}date': {'{http://www.isotc211.org/2005/gmd}CI_Date': {'{http://www.isotc211.org/2005/gmd}date': {'{http://www.isotc211.org/2005/gco}Date': '2002-03-01'},\n", + " '{http://www.isotc211.org/2005/gmd}dateType': {'{http://www.isotc211.org/2005/gmd}CI_DateTypeCode': None}}}}}}},\n", + " {'{http://www.isotc211.org/2005/gmd}MD_Keywords': {'{http://www.isotc211.org/2005/gmd}keyword': {'{http://www.isotc211.org/2005/gco}CharacterString': 'geospatial data'},\n", + " '{http://www.isotc211.org/2005/gmd}type': {'{http://www.isotc211.org/2005/gmd}MD_KeywordTypeCode': None},\n", + " '{http://www.isotc211.org/2005/gmd}thesaurusName': {'{http://www.isotc211.org/2005/gmd}CI_Citation': {'{http://www.isotc211.org/2005/gmd}title': {'{http://www.isotc211.org/2005/gco}CharacterString': 'EEA categories'},\n", + " '{http://www.isotc211.org/2005/gmd}date': {'{http://www.isotc211.org/2005/gmd}CI_Date': {'{http://www.isotc211.org/2005/gmd}date': {'{http://www.isotc211.org/2005/gco}Date': '2010-07-06'},\n", + " '{http://www.isotc211.org/2005/gmd}dateType': {'{http://www.isotc211.org/2005/gmd}CI_DateTypeCode': None}}},\n", + " '{http://www.isotc211.org/2005/gmd}identifier': {'{http://www.isotc211.org/2005/gmd}MD_Identifier': {'{http://www.isotc211.org/2005/gmd}code': {'{http://www.isotc211.org/2005/gmx}Anchor': 'geonetwork.thesaurus.local.theme.eea-categories'}}}}}}},\n", + " {'{http://www.isotc211.org/2005/gmd}MD_Keywords': {'{http://www.isotc211.org/2005/gmd}keyword': {'{http://www.isotc211.org/2005/gco}CharacterString': None},\n", + " '{http://www.isotc211.org/2005/gmd}type': {'{http://www.isotc211.org/2005/gmd}MD_KeywordTypeCode': None},\n", + " '{http://www.isotc211.org/2005/gmd}thesaurusName': {'{http://www.isotc211.org/2005/gmd}CI_Citation': {'{http://www.isotc211.org/2005/gmd}title': {'{http://www.isotc211.org/2005/gco}CharacterString': 'GEMET'},\n", + " '{http://www.isotc211.org/2005/gmd}date': {'{http://www.isotc211.org/2005/gmd}CI_Date': {'{http://www.isotc211.org/2005/gmd}date': {'{http://www.isotc211.org/2005/gco}Date': '2018-08-16'},\n", + " '{http://www.isotc211.org/2005/gmd}dateType': {'{http://www.isotc211.org/2005/gmd}CI_DateTypeCode': None}}},\n", + " '{http://www.isotc211.org/2005/gmd}identifier': {'{http://www.isotc211.org/2005/gmd}MD_Identifier': {'{http://www.isotc211.org/2005/gmd}code': {'{http://www.isotc211.org/2005/gmx}Anchor': 'geonetwork.thesaurus.external.theme.gemet'}}}}}}},\n", + " {'{http://www.isotc211.org/2005/gmd}MD_Keywords': {'{http://www.isotc211.org/2005/gmd}keyword': {'{http://www.isotc211.org/2005/gco}CharacterString': 'Albania, Austria, Belgium, Bosnia and Herzegovina, Bulgaria, Croatia, Cyprus, Czech Republic, Denmark, Estonia, Finland, France, Germany, Greece, Hungary, Iceland, Ireland, Italy, Kosovo, Latvia, Liechtenstein, Lithuania, Luxembourg, Malta, Montenegro, Netherlands, North Macedonia, Norway, Poland, Portugal, Romania, Serbia, Slovakia, Slovenia, Spain, Sweden, Switzerland, Turkey, United Kingdom'},\n", + " '{http://www.isotc211.org/2005/gmd}type': {'{http://www.isotc211.org/2005/gmd}MD_KeywordTypeCode': None},\n", + " '{http://www.isotc211.org/2005/gmd}thesaurusName': {'{http://www.isotc211.org/2005/gmd}CI_Citation': {'{http://www.isotc211.org/2005/gmd}title': {'{http://www.isotc211.org/2005/gco}CharacterString': 'Continents, countries, sea regions of the world'},\n", + " '{http://www.isotc211.org/2005/gmd}date': {'{http://www.isotc211.org/2005/gmd}CI_Date': {'{http://www.isotc211.org/2005/gmd}date': {'{http://www.isotc211.org/2005/gco}Date': '2015-07-17'},\n", + " '{http://www.isotc211.org/2005/gmd}dateType': {'{http://www.isotc211.org/2005/gmd}CI_DateTypeCode': None}}},\n", + " '{http://www.isotc211.org/2005/gmd}identifier': {'{http://www.isotc211.org/2005/gmd}MD_Identifier': {'{http://www.isotc211.org/2005/gmd}code': {'{http://www.isotc211.org/2005/gmx}Anchor': 'geonetwork.thesaurus.external.place.regions'}}}}}}}],\n", + " '{http://www.isotc211.org/2005/gmd}resourceConstraints': [{'{http://www.isotc211.org/2005/gmd}MD_LegalConstraints': {'{http://www.isotc211.org/2005/gmd}accessConstraints': {'{http://www.isotc211.org/2005/gco}CharacterString': \"Access to data is based on a principle of full, open and free access as established by the Copernicus data and information policy Regulation (EU) No 1159/2013 of 12 July 2013. This regulation establishes registration and licensing conditions for GMES/Copernicus users and can be found on http://eur-lex.europa.eu/legal-content/EN/TXT/?uri=CELEX%3A32013R1159. Free, full and open access to this data set is made on the conditions that: 1. When distributing or communicating Copernicus dedicated data and Copernicus service information to the public, users shall inform the public of the source of that data and information. 2. Users shall make sure not to convey the impression to the public that the user's activities are officially endorsed by the Union. 3. Where that data or information has been adapted or modified, the user shall clearly state this. 4. The data remain the sole property of the European Union. Any information and data produced in the framework of the action shall be the sole property of the European Union. Any communication and publication by the beneficiary shall acknowledge that the data were produced “with funding by the European Union”.\"},\n", + " '{http://www.isotc211.org/2005/gmd}otherConstraints': {'{http://www.isotc211.org/2005/gmx}Anchor': 'no limitations to public access'}}},\n", + " {'{http://www.isotc211.org/2005/gmd}MD_LegalConstraints': {'{http://www.isotc211.org/2005/gmd}useConstraints': {'{http://www.isotc211.org/2005/gmd}MD_RestrictionCode': None}}}],\n", + " '{http://www.isotc211.org/2005/gmd}language': {'{http://www.isotc211.org/2005/gmd}LanguageCode': None},\n", + " '{http://www.isotc211.org/2005/gmd}topicCategory': [{'{http://www.isotc211.org/2005/gmd}MD_TopicCategoryCode': 'environment'},\n", + " {'{http://www.isotc211.org/2005/gmd}MD_TopicCategoryCode': 'imageryBaseMapsEarthCover'}],\n", + " '{http://www.isotc211.org/2005/gmd}extent': [{'{http://www.isotc211.org/2005/gmd}EX_Extent': {'{http://www.isotc211.org/2005/gmd}geographicElement': {'{http://www.isotc211.org/2005/gmd}EX_GeographicBoundingBox': {'{http://www.isotc211.org/2005/gmd}westBoundLongitude': {'{http://www.isotc211.org/2005/gco}Decimal': '-31.561261'},\n", + " '{http://www.isotc211.org/2005/gmd}eastBoundLongitude': {'{http://www.isotc211.org/2005/gco}Decimal': '44.820775'},\n", + " '{http://www.isotc211.org/2005/gmd}southBoundLatitude': {'{http://www.isotc211.org/2005/gco}Decimal': '27.405827'},\n", + " '{http://www.isotc211.org/2005/gmd}northBoundLatitude': {'{http://www.isotc211.org/2005/gco}Decimal': '71.409109'}}},\n", + " '{http://www.isotc211.org/2005/gmd}temporalElement': {'{http://www.isotc211.org/2005/gmd}EX_TemporalExtent': {'{http://www.isotc211.org/2005/gmd}extent': {'{http://www.opengis.net/gml/3.2}TimePeriod': {'{http://www.opengis.net/gml/3.2}beginPosition': '2017-01-01',\n", + " '{http://www.opengis.net/gml/3.2}endPosition': '2018-12-31'}}}}}},\n", + " {'{http://www.isotc211.org/2005/gmd}EX_Extent': {'{http://www.isotc211.org/2005/gmd}temporalElement': None}}]}},\n", + " '{http://www.isotc211.org/2005/gmd}distributionInfo': {'{http://www.isotc211.org/2005/gmd}MD_Distribution': {'{http://www.isotc211.org/2005/gmd}distributionFormat': {'{http://www.isotc211.org/2005/gmd}MD_Format': {'{http://www.isotc211.org/2005/gmd}name': {'{http://www.isotc211.org/2005/gco}CharacterString': None},\n", + " '{http://www.isotc211.org/2005/gmd}version': {'{http://www.isotc211.org/2005/gco}CharacterString': None}}},\n", + " '{http://www.isotc211.org/2005/gmd}distributor': None,\n", + " '{http://www.isotc211.org/2005/gmd}transferOptions': [{'{http://www.isotc211.org/2005/gmd}MD_DigitalTransferOptions': {'{http://www.isotc211.org/2005/gmd}onLine': {'{http://www.isotc211.org/2005/gmd}CI_OnlineResource': {'{http://www.isotc211.org/2005/gmd}linkage': {'{http://www.isotc211.org/2005/gmd}URL': 'http://land.copernicus.eu/pan-european/corine-land-cover/clc2018/view'},\n", + " '{http://www.isotc211.org/2005/gmd}protocol': {'{http://www.isotc211.org/2005/gco}CharacterString': 'WWW:LINK-1.0-http--link'},\n", + " '{http://www.isotc211.org/2005/gmd}function': {'{http://www.isotc211.org/2005/gmd}CI_OnLineFunctionCode': None}}}}},\n", + " {'{http://www.isotc211.org/2005/gmd}MD_DigitalTransferOptions': None}]}},\n", + " '{http://www.isotc211.org/2005/gmd}dataQualityInfo': {'{http://www.isotc211.org/2005/gmd}DQ_DataQuality': {'{http://www.isotc211.org/2005/gmd}scope': {'{http://www.isotc211.org/2005/gmd}DQ_Scope': {'{http://www.isotc211.org/2005/gmd}level': {'{http://www.isotc211.org/2005/gmd}MD_ScopeCode': None}}},\n", + " '{http://www.isotc211.org/2005/gmd}report': {'{http://www.isotc211.org/2005/gmd}DQ_DomainConsistency': {'{http://www.isotc211.org/2005/gmd}result': {'{http://www.isotc211.org/2005/gmd}DQ_ConformanceResult': {'{http://www.isotc211.org/2005/gmd}specification': {'{http://www.isotc211.org/2005/gmd}CI_Citation': {'{http://www.isotc211.org/2005/gmd}title': {'{http://www.isotc211.org/2005/gco}CharacterString': 'Commission Regulation (EU) No 1089/2010 of 23 November 2010 implementing Directive 2007/2/EC of the European Parliament and of the Council as regards interoperability of spatial data sets and services'},\n", + " '{http://www.isotc211.org/2005/gmd}date': {'{http://www.isotc211.org/2005/gmd}CI_Date': {'{http://www.isotc211.org/2005/gmd}date': {'{http://www.isotc211.org/2005/gco}Date': '2010-12-08'},\n", + " '{http://www.isotc211.org/2005/gmd}dateType': {'{http://www.isotc211.org/2005/gmd}CI_DateTypeCode': None}}}}},\n", + " '{http://www.isotc211.org/2005/gmd}explanation': {'{http://www.isotc211.org/2005/gco}CharacterString': 'See the referenced specification'},\n", + " '{http://www.isotc211.org/2005/gmd}pass': None}}}},\n", + " '{http://www.isotc211.org/2005/gmd}lineage': {'{http://www.isotc211.org/2005/gmd}LI_Lineage': {'{http://www.isotc211.org/2005/gmd}statement': {'{http://www.isotc211.org/2005/gco}CharacterString': 'CLC products are based in majority of EEA39 countries on the photointerpretation of satellite images by the national teams of the participating countries - the EEA member and cooperating countries. All features in original vector database are delineated and classified on satellite images according to CLC specifications i.e. with better than 100 m positional accuracy and 25 ha minimum mapping unit (5 ha MMU for change layer) into the standardized CLC nomenclature (44 CLC classes). The change layer is derived from satellite imagery by direct mapping of changes taken place between two consecutive inventories, based on image-to-image comparison. Some countries follow alternative approaches by utilizing semiautomatic methodology e.g. generalisation of higher resolution national datasets. Production of national CLC inventories is supported by training and is under systematic control of the CLC Technical Team, both for thematic and semantic aspects, to assure harmonized European products. The process of European data integration starts when national deliveries are accepted and the Database Acceptance Report (DBTA) issued. National CLC data are then transformed into the common European reference (ETRS89/LAEA) and pan-European seamless dataset is produced. Integration step includes also harmonization of database along country borders. Rigorous harmonization of country borders has been done only for CLC2000 and CHA9000 layers (in 2 km wide strips along borders) as part of CLC2000 update. Currently, only simplified harmonisation is applied i.e. small border polygons (area < 5 ha) are generalised according to predefined rules to largest and/or thematically most similar neighbour, sliver polygons along borders (< 0.1 ha) are eliminated. European Corine Land Cover seamless database represents the final product of European data integration. Some artificial lines (dividing polygons with the same code) can be still present in database due to technical constraints of current ArcGIS technology and complexity of dataset (adaptive tiling) but this has no impact on dataset contents and can be dissolved for smaller data extracts.\\nRevised versions\\nStarted from the publication of CLC2006 (Version 16) the previous inventory is substituted by its revised version by most of the participating countries (see CLC seamless data coverage table https://land.copernicus.eu/user-corner/technical-library/clc-country-coverage-1990-2018-v20_1). However, due to their specific methodology not all countries are producing revised version of the previous inventory. The revision of previous CLC layer is a “by-product” of the standard updating process, including corrections to the original data identified during the update. Revisions (correcting mistakes) are needed because of the following factors: \\n- availability of higher resolution satellite imagery;\\n- a new satellite image or time series of satellite imagery provides additional key to correctly recognise a feature;\\n- improved availability and better quality of in-situ data;\\n- improved skills of experts, i.e. better understanding and application of CLC nomenclature;\\n- decision of the national team to improve the product between two inventories.\\n\\nThese revisions are not propagated backward to older datasets (e.g. during CLC2018 revision of CLC2012 might be provided, but the older datasets were not revised). Thus, consecutive inventories might include not only real changes, but also differences due to revisions. Therefore, it is recommended that in time series analysis CLC-Change layers should be used. If status layers from past are needed, these could be derived backward from deducting CLC-Change layers from the latest (best available) status layer as it is done for EEA accounting layers (see at https://www.eea.europa.eu/data-and-maps/data/corine-land-cover-accounting-layers)\\nMore details to be available soon in upcoming \"Users\\' Manual for all Copernicus data” document. \\nVersion 20_1\\nRelease date: 24-02-2020\\nFile naming conventions simplified and better described. New file naming convention has been introduced based on user feedback on version 20. Filename is composed of combination of information about update campaign, data theme and reference year and version specification (including release year and release number). \\nSee https://land.copernicus.eu/user-corner/technical-library/clc-file-naming-conventions-guide-v20_1 for details.\\n\\nThe French DOMs are provided in separate databases (files both for vector and raster version of data).\\n\\nAll raster layers are back in 8 bit GeoTIFF. Modification is introduced based on the user feedback on version 20. In order to keep 8 bit resolution for raster change layers, they are divided into two files - representing consumption (from) and formation (to) part of change.\\n\\nSee https://land.copernicus.eu/user-corner/technical-library/clc-country-coverage-1990-2018-v20_1 for full information about the coverage of this version.\\nSee http://land.copernicus.eu/user-corner/technical-library/clc-and-clcc-release-lineage for full information about lineage history.'}}}}}}" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "xml_to_dict(tree.getroot(), {})" + ] + }, + { + "cell_type": "code", + "execution_count": 118, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "BoundingBox(left=-23.825993823627275, bottom=24.28417701147754, right=104.35721125172725, top=80.00331843607006)" + ] + }, + "execution_count": 118, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# bbox coordinates become meaningless...\n", + "\n", + "crs = rio.crs.CRS(img.crs)\n", + "bounds = img.bounds\n", + "\n", + "transformer = Transformer.from_crs(crs.to_epsg(), 4326)\n", + "miny, minx = transformer.transform(bounds.left, bounds.bottom)\n", + "maxy, maxx = transformer.transform(bounds.right, bounds.top)\n", + "bbox = (minx, miny, maxx, maxy)\n", + "\n", + "rio.coords.BoundingBox(*bbox)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 105, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "BoundingBox(left=24.28417701147754, bottom=-23.825993823627275, right=80.00331843607006, top=104.35721125172725)" + ] + }, + "execution_count": 105, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "crs = CRS(img.crs)\n", + "bounds = img.bounds\n", + "\n", + "transformer = Transformer.from_crs(crs.to_epsg(), 4326)\n", + "miny, minx = transformer.transform(bounds.left, bounds.bottom)\n", + "maxy, maxx = transformer.transform(bounds.right, bounds.top)\n", + "bbox = (miny, minx, maxy, maxx)\n", + "# bbox = (minx, miny, maxx, maxy)\n", + "rio.coords.BoundingBox(*bbox)" + ] + }, + { + "cell_type": "code", + "execution_count": 98, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "BoundingBox(left=24.28417701147754, bottom=-180.0, right=90.0, top=180.0)" + ] + }, + "execution_count": 98, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# wrong order...\n", + "from pyproj import Transformer\n", + "\n", + "transformer = Transformer.from_crs(img.crs.to_epsg(), \"EPSG:4326\")\n", + "rio.coords.BoundingBox(*transformer.transform_bounds(*img.bounds))" + ] + }, + { + "cell_type": "code", + "execution_count": 119, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "BoundingBox(left=-56.50514190170437, bottom=24.28417701147754, right=72.90613675900903, top=72.63376966542347)" + ] + }, + "execution_count": 119, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# correct order...\n", + "from rasterio.crs import CRS\n", + "\n", + "rio.coords.BoundingBox(*rio.warp.transform_bounds(img.crs, CRS.from_epsg(4326), *img.bounds))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From 937dfbcee1fa94881951089b89d7028332f1d690 Mon Sep 17 00:00:00 2001 From: Sebastian Boeck Date: Fri, 26 Apr 2024 15:44:34 +0200 Subject: [PATCH 13/80] appends VS code workspace --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 4b381b7..50511a3 100644 --- a/.gitignore +++ b/.gitignore @@ -100,3 +100,4 @@ target/ # Pycopy __pycache__/ +clms-stac.code-workspace From 13670a37af8d65a99905fff254ee5e45e3a58b16 Mon Sep 17 00:00:00 2001 From: Xiaoman Huang Date: Mon, 29 Apr 2024 13:27:06 +0800 Subject: [PATCH 14/80] update for collection --- .DS_Store | Bin 0 -> 6148 bytes scripts/.DS_Store | Bin 0 -> 8196 bytes scripts/vabh/vabh_collection.py | 106 ++++++++++++++++++++++++++++++++ scripts/vabh/vabh_item.py | 12 +++- 4 files changed, 117 insertions(+), 1 deletion(-) create mode 100644 .DS_Store create mode 100644 scripts/.DS_Store create mode 100644 scripts/vabh/vabh_collection.py diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..f88b6e8312bcef6843e3c0294cde9c6d4ae27a06 GIT binary patch literal 6148 zcmeHK%}T>S5T3QwrWBzE1-&hJt=N_#7B8XJ7cim+mD-r1!I&*gY7eE5yS|Vw;`2DO zy8){?coMNQu=~x<&u->}><<8l-ZW?d)B(W3Mkq)r5Hc6K)@(4L&~uC-g~XpFV?QdG z=r5Y++bfX39ZVpEPv5V6^t}1wC`{63^Ifc!%Ig~yr{dI{8}C8QymXjOliqN0MWb`2 zB0txM{zW*N4eDDbDo%%CJeuf)Fc@LT^<@|bYSvTJI7oD^XBS40%_N>X)%XaRKAq;>`mfG#rNpRv+`(0 zW`G%B2AF}BV!$2)PHm;C=3=0j;_z-FB6iWPHzc9Y05i zhNQ$8e?T9M`ex9mkD`g9!9-&GN%TPzNrRE7(L|$(@xjEz@FJc&cNSZq`esNvH<|m* zJ@=e5d(M34-rgx=46Oydk+C|)m`oQ(t%|xkG(OL-+nN+eDhWaMEL(C%vX+yj@ul0e zLqV8Y3oj@)hXDz>)R&9gsjNQ zpcWY&ZER|8ipTD6*?2l08*SOx5RWx&X*zvck!$NWY~4S2+#0r>vmyor>j0Z`OHAh` z24uO#Ht|8CVpln!Yvq8H*;VSCNV+fGuMVWmoMR`=Q^ufH?NU^oG~AariWE(6&an!^ zwmYcRpe#tEDVn7eSvvK4DLG_`DR+uf5Kuhwej-J{m3 z{ggu6Gmn@?zAxkE3|lwtqX$Z+?hW-8P1np?M#|0%8%mATp!imGkwuYoA!|NTFujwq zRxgjwtI?{}dm}MLyC@In^keoqI`^piGm(2E_sQ}hE^o^l)C;k?DzZk=GIGkLL)Mi=Rqx2Re_t|CkIlIb!V8628*>(08y8#ImP*IBos6zzHQI8c^iA{JA z+pry7NFs%U=*0jG9KjGw*eKxyPGJ;h@D$GCX*`1$a2_w=6}*bq@eba_CA^2va0TDs zTYQI~@f-fc4QaNdN^_*S(n4vCv{qUtHA%ar4yjY>l9JMZWJiT-FUr+OE7c7kNB5U5(dXtKRyk)*ltOK9=lU}_~ zeZSsrZ~s#+x#rZT>GQKU{)6FU^X9B9nfZEGh_yv+Wa*#AoF zFWEKrJ^O|I!LDN_s-Pi?)mV=vBJnPCU=O;nmx!E3KMwh1K1x(RhU4&1Bs!lYLO+Vf z@Hn2plXw=-;d!6bFZ-l^4HxhR-o#tDh|Bl@AL1i?j8E_dzQT{UkhXg=Y4OuU(iW3> z+j8uqBrB7AvikHHRftgQ`1yb5)ZhQ7PeQ^-!wgKr44|?-+1^TlPqiTOvv!QGhw0*t z>x~KYO{nvau~=M_kW*_)2I6za;j!W literal 0 HcmV?d00001 diff --git a/scripts/vabh/vabh_collection.py b/scripts/vabh/vabh_collection.py new file mode 100644 index 0000000..24ae016 --- /dev/null +++ b/scripts/vabh/vabh_collection.py @@ -0,0 +1,106 @@ +from __future__ import annotations + +import os +from datetime import datetime +from typing import Final + +import pystac +import rasterio as rio +from pyproj import Transformer +from pystac import Extent, SpatialExtent, TemporalExtent +from pystac.provider import ProviderRole +from rasterio.coords import BoundingBox +from rasterio.crs import CRS +from shapely.geometry import Polygon, box, mapping + +# KEY = "/Users/xiaomanhuang/pl/ETCDI_STAC/uabh_samples/AT001_WIEN_UA2012_DHM_v020/data/AT001_WIEN_UA2012_DHM_V020.tif" +KEY = "/Users/xiaomanhuang/pl/ETCDI_STAC/uabh_samples/AT001_WIEN_UA2012_DHM_v020" +head, tail = os.path.split(KEY) +(product_id,product_version) = tail.rsplit("_", 1) + +HOST_AND_LICENSOR: Final[pystac.Provider] = pystac.Provider( + name="Copernicus Land Monitoring Service", + description=( + "The Copernicus Land Monitoring Service provides geographical information on land cover and its changes, land" + " use, ground motions, vegetation state, water cycle and Earth's surface energy variables to a broad range of" + " users in Europe and across the World in the field of environmental terrestrial applications." + ), + roles=[ProviderRole.HOST, ProviderRole.LICENSOR], + url="https://land.copernicus.eu", +) + + +def get_metadata_from_tif(key: str) -> tuple[BoundingBox, CRS, int, int]: + with rio.open(key) as tif: + bounds = tif.bounds + crs = tif.crs + height = tif.height + width = tif.width + tif.close() + return (bounds, crs, height, width) + + +def get_geom_wgs84(bounds: BoundingBox, crs: CRS) -> Polygon: + transformer = Transformer.from_crs(crs.to_epsg(), 4326) + miny, minx = transformer.transform(bounds.left, bounds.bottom) + maxy, maxx = transformer.transform(bounds.right, bounds.top) + bbox = (minx, miny, maxx, maxy) + return box(*bbox) + + +def get_description(product_id: str) -> str: + country, city, year, product, version = product_id.split("_") + return f"{year[2:]} {city.title()} building height" + + +def get_datetime(product_id: str) -> tuple[datetime, datetime]: + year = int(product_id.split("_")[2][2:]) + return (datetime(year=year, month=1, day=1), datetime(year=year, month=12, day=31)) + + +def get_collection_extent(bbox, start_datetime) -> Extent: + spatial_extent = SpatialExtent(bboxes=bbox) + temporal_extent = TemporalExtent(intervals=[[start_datetime, None]]) + return Extent(spatial=spatial_extent, temporal=temporal_extent) + + +def create_asset(asset_key: str) -> pystac.Asset: + parameter = asset_key.split("_")[-1].split(".")[0] + version = asset_key.split("_")[-3] + return pystac.Asset( + href=f"s3://{BUCKET}/" + asset_key, + media_type=pystac.MediaType.GEOTIFF, + title=TITLE_MAP[parameter] + f" {version}", + roles=["data"], + ) + +def get_item_assets() + +def get_links() + + + +if __name__ == "__main__": + head, tail = os.path.split(KEY) + (product_id,) = tail.split(".")[0].rsplit("_", 0) + bounds, crs, height, width = get_metadata_from_tif(KEY) + geom_wgs84 = get_geom_wgs84(bounds, crs) + description = get_description(product_id) + start_datetime, end_datetime = get_datetime(product_id) + collection_extent = get_collection_extent(list(geom_wgs84.bounds), start_datetime) + summaries = pystac.Summaries({"proj:epsg": [crs.to_epsg()]}) + + collection = pystac.Collection( + stac_extensions=["https://stac-extensions.github.io/item-assets/v1.0.0/schema.json", + "https://stac-extensions.github.io/projection/v1.1.0/schema.json"], + id="urban-atlas-building-height", + title="Urban Atlas Building Height 10m", + description="Urban Atlas building height over capital cities.", + keywords=["Buildings", "Building height", "Elevation"], + extent=collection_extent, + summaries=summaries, + providers=[HOST_AND_LICENSOR], + ) + + collection.set_self_href("scripts/vabh/test_collection.json") + collection.save_object() diff --git a/scripts/vabh/vabh_item.py b/scripts/vabh/vabh_item.py index f963a71..0a5f124 100644 --- a/scripts/vabh/vabh_item.py +++ b/scripts/vabh/vabh_item.py @@ -13,7 +13,15 @@ from rasterio.crs import CRS from shapely.geometry import Polygon, box, mapping -KEY = "/Users/xiaomanhuang/pl/ETCDI_STAC/uabh_samples/AT001_WIEN_UA2012_DHM_v020/data/AT001_WIEN_UA2012_DHM_V020.tif" +# KEY = "/Users/xiaomanhuang/pl/ETCDI_STAC/uabh_samples/AT001_WIEN_UA2012_DHM_v020/data/AT001_WIEN_UA2012_DHM_V020.tif" +KEY = "/Users/xiaomanhuang/pl/ETCDI_STAC/uabh_samples/AT001_WIEN_UA2012_DHM_v020" +head, tail = os.path.split(KEY) +(product_id,product_version) = tail.rsplit("_", 1) + +PATH_Dataset = os.path.join(KEY, "Dataset/"+tail+".tif") +PATH_QC = os.path.join(KEY, "Dataset/"+tail+".tif") +PATH_Metadata = +PATH_Doc = HOST_AND_LICENSOR: Final[pystac.Provider] = pystac.Provider( name="Copernicus Land Monitoring Service", @@ -88,6 +96,8 @@ def get_links() summaries = pystac.Summaries({"proj:epsg": [crs.to_epsg()]}) collection = pystac.Collection( + stac_extensions=["https://stac-extensions.github.io/item-assets/v1.0.0/schema.json", + "https://stac-extensions.github.io/projection/v1.1.0/schema.json"], id="urban-atlas-building-height", title="Urban Atlas Building Height 10m", description="Urban Atlas building height over capital cities.", From cc0c8600bc693d27b6bc34ca46dd6f090581613f Mon Sep 17 00:00:00 2001 From: chorng Date: Mon, 29 Apr 2024 12:03:49 +0200 Subject: [PATCH 15/80] Put projection extension back --- schema/products/vpp.json | 1 + scripts/vpp/collection.py | 1 + 2 files changed, 2 insertions(+) diff --git a/schema/products/vpp.json b/schema/products/vpp.json index 3b4eba6..dbccdbb 100644 --- a/schema/products/vpp.json +++ b/schema/products/vpp.json @@ -227,6 +227,7 @@ "stac_version": { "const": "1.0.0" }, "stac_extensions": { "const": [ + "https://stac-extensions.github.io/projection/v1.1.0/schema.json", "https://stac-extensions.github.io/item-assets/v1.0.0/schema.json" ] }, diff --git a/scripts/vpp/collection.py b/scripts/vpp/collection.py index 104857d..0d3c81c 100644 --- a/scripts/vpp/collection.py +++ b/scripts/vpp/collection.py @@ -44,6 +44,7 @@ def create_collection(item_list: list[str]) -> pystac.Collection: description=COLLECTION_DESCRIPTION, extent=COLLECTION_EXTENT, title=COLLECTION_TITLE, + stac_extensions=["https://stac-extensions.github.io/projection/v1.1.0/schema.json"], keywords=COLLECTION_KEYWORDS, providers=[VPP_HOST_AND_LICENSOR, VPP_PRODUCER_AND_PROCESSOR], summaries=COLLECTION_SUMMARIES, From 0eb46adf8fc3cfbc4cd17200c53be09822216998 Mon Sep 17 00:00:00 2001 From: chorng Date: Mon, 29 Apr 2024 12:29:34 +0200 Subject: [PATCH 16/80] Implement projection extension summaries --- scripts/vpp/collection.py | 32 ++++++++++++++++++++++++++++---- scripts/vpp/constants.py | 25 ------------------------- 2 files changed, 28 insertions(+), 29 deletions(-) diff --git a/scripts/vpp/collection.py b/scripts/vpp/collection.py index 0d3c81c..80640e0 100644 --- a/scripts/vpp/collection.py +++ b/scripts/vpp/collection.py @@ -6,9 +6,12 @@ from glob import glob import pystac +import pystac.extensions +import pystac.extensions.projection from jsonschema import Draft7Validator from jsonschema.exceptions import best_match from pystac.extensions.item_assets import AssetDefinition, ItemAssetsExtension +from pystac.extensions.projection import ProjectionExtension from referencing import Registry, Resource from .constants import ( @@ -17,7 +20,6 @@ COLLECTION_EXTENT, COLLECTION_ID, COLLECTION_KEYWORDS, - COLLECTION_SUMMARIES, COLLECTION_TITLE, STAC_DIR, TITLE_MAP, @@ -44,12 +46,34 @@ def create_collection(item_list: list[str]) -> pystac.Collection: description=COLLECTION_DESCRIPTION, extent=COLLECTION_EXTENT, title=COLLECTION_TITLE, - stac_extensions=["https://stac-extensions.github.io/projection/v1.1.0/schema.json"], keywords=COLLECTION_KEYWORDS, providers=[VPP_HOST_AND_LICENSOR, VPP_PRODUCER_AND_PROCESSOR], - summaries=COLLECTION_SUMMARIES, ) + # summaries + summaries = ProjectionExtension.summaries(collection, add_if_missing=True) + summaries.epsg = [ + 32620, + 32621, + 32622, + 32625, + 32626, + 32627, + 32628, + 32629, + 32630, + 32631, + 32632, + 32633, + 32634, + 32635, + 32636, + 32637, + 32638, + 32738, + 32740, + ] + # extensions item_assets = ItemAssetsExtension.ext(collection, add_if_missing=True) item_assets.item_assets = { @@ -74,6 +98,6 @@ def create_collection(item_list: list[str]) -> pystac.Collection: try: error_msg = best_match(validator.iter_errors(collection.to_dict())) assert error_msg is None, f"Failed to create {collection.id} collection. Reason: {error_msg}." + collection.save_object() except AssertionError as error: LOGGER.error(error) - collection.save_object() diff --git a/scripts/vpp/constants.py b/scripts/vpp/constants.py index 7769e54..6b3ef1d 100644 --- a/scripts/vpp/constants.py +++ b/scripts/vpp/constants.py @@ -35,31 +35,6 @@ "plant phenology index", "vegetation", ] -COLLECTION_SUMMARIES = pystac.Summaries( - { - "proj:epsg": [ - 32620, - 32621, - 32622, - 32625, - 32626, - 32627, - 32628, - 32629, - 32630, - 32631, - 32632, - 32633, - 32634, - 32635, - 32636, - 32637, - 32638, - 32738, - 32740, - ] - } -) COLLECTION_TITLE = "Vegetation Phenology and Productivity Parameters" STAC_DIR = "stac_tests" TITLE_MAP = { From 8269e29551d188325c66fd5a716f5fee8a6191e6 Mon Sep 17 00:00:00 2001 From: chorng Date: Mon, 29 Apr 2024 12:31:33 +0200 Subject: [PATCH 17/80] Update vpp sample --- .../vegetation-phenology-and-productivity.json | 1 + 1 file changed, 1 insertion(+) diff --git a/stacs/vegetation-phenology-and-productivity/vegetation-phenology-and-productivity.json b/stacs/vegetation-phenology-and-productivity/vegetation-phenology-and-productivity.json index bd79ea9..1b1b8a5 100644 --- a/stacs/vegetation-phenology-and-productivity/vegetation-phenology-and-productivity.json +++ b/stacs/vegetation-phenology-and-productivity/vegetation-phenology-and-productivity.json @@ -2,6 +2,7 @@ "type": "Collection", "stac_version": "1.0.0", "stac_extensions": [ + "https://stac-extensions.github.io/projection/v1.1.0/schema.json", "https://stac-extensions.github.io/item-assets/v1.0.0/schema.json" ], "id": "vegetation-phenology-and-productivity", From 25fa4c3d03b6d85abc458d7cf9c747362a2a1e4c Mon Sep 17 00:00:00 2001 From: Sebastian Boeck Date: Mon, 29 Apr 2024 17:34:50 +0200 Subject: [PATCH 18/80] introduces funtions to gather assets --- clms_item_generator.ipynb | 552 ++++++++++++++++++-------------------- 1 file changed, 268 insertions(+), 284 deletions(-) diff --git a/clms_item_generator.ipynb b/clms_item_generator.ipynb index b700877..b1911d5 100644 --- a/clms_item_generator.ipynb +++ b/clms_item_generator.ipynb @@ -2,19 +2,21 @@ "cells": [ { "cell_type": "code", - "execution_count": 40, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import os\n", "import re\n", "\n", - "from pyproj import Transformer\n", + "import pystac\n", "from pystac.extensions.projection import ProjectionExtension\n", "import pystac.item\n", + "from pystac.provider import ProviderRole\n", + "\n", + "from pyproj import Transformer\n", "from shapely.geometry import GeometryCollection, box, shape, mapping\n", "from datetime import datetime, UTC\n", - "import pystac\n", "\n", "import rasterio as rio\n", "import rasterio.warp\n", @@ -26,17 +28,272 @@ }, { "cell_type": "code", - "execution_count": 41, + "execution_count": 43, "metadata": {}, "outputs": [], "source": [ "img_path = 'X:/EO/u2018_clc2012_v2020_20u1_raster100m/DATA/U2018_CLC2012_V2020_20u1.tif'\n", + "# os.path.split(img_path)" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [], + "source": [ + "def deconstruct_clc_name(filename: str):\n", + " id = os.path.basename(filename).split('.')[0]\n", + " p = re.compile((\"U(?P[0-9]{4})_\"\n", + " \"(?PCLC|CHA)(?P[0-9]{4})_\"\n", + " \"V(?P[0-9]{4})_(?P[0-9a-z]*)\"\n", + " \"_?(?P[A-Z]*)?\"\n", + " \"_?(?P[A-Z]*)?\"))\n", + " m = p.search(id)\n", + "\n", + " return(m.groupdict())\n", + "\n", + "DOM_DICT = {\n", + " 'GLP': 'Guadeloupe',\n", + " 'GUF': 'French Guyana',\n", + " 'MTQ': 'Martinique',\n", + " 'MYT': 'Mayotte',\n", + " 'REU': 'Réunion',\n", + " '': 'Europe',\n", + "}\n" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [], + "source": [ + "# label = DOM_DICT.get('')\n", + "\n", + "def create_asset(filename: str, label: str):\n", + "\n", + " MEDIA_TYPE_DICT = {\n", + " 'tif': pystac.MediaType.COG,\n", + " 'tif_xml': pystac.MediaType.XML,\n", + " 'tif_ovr': 'image/tiff; application=geotiff; profile=pyramid',\n", + " 'vat_cpg': pystac.MediaType.TEXT,\n", + " 'vat_dbf': 'application/dbf',\n", + " 'txt': pystac.MediaType.TEXT,\n", + " 'lyr': 'image/tiff; application=geotiff; profile=layer',\n", + " }\n", "\n", - "id = os.path.basename(img_path).split('.')[0]\n", - "_ = re.search('(?<=CLC)[0-9]{4}', id)\n", - "year = _.group(0)\n", "\n", - "# Wrap in def?\n", + " TITLE_DICT = {\n", + " 'tif': f'Single Band Land Classification {label}',\n", + " 'tif_xml': f'TIFF Metadata {label}',\n", + " 'tif_ovr': f'Pyramid {label}',\n", + " 'vat_cpg': f'Encoding {label}',\n", + " 'vat_dbf': f'Database {label}',\n", + " 'txt': f'Legends {label}',\n", + " 'lyr': f'LEgend Layer {label}',\n", + " }\n", + "\n", + " ROLES_DICT = {\n", + " 'tif': ['data', 'visual'],\n", + " 'tif_xml': ['metadata'],\n", + " 'tif_ovr': ['metadata'],\n", + " 'vat_cpg': ['metadata'],\n", + " 'vat_dbf': ['metadata'],\n", + " 'txt': ['metadata'],\n", + " 'lyr': ['metadata'],\n", + " }" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'Single Band Land Classification Europe'" + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'2012'" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "file_metadata = deconstruct_clc_name(img_path)\n", + "\n", + "year = file_metadata.get('reference_year')\n", + "year" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [], + "source": [ + "def get_tif_files(path: str): \n", + " tif_files=[]\n", + " for root, dirs, files in os.walk(path):\n", + " if root.endswith(('DATA', 'French_DOMs')):\n", + " for file in files:\n", + " if file.endswith('.tif'):\n", + " tif_files.append(os.path.join(root, file))\n", + "\n", + " return(tif_files)\n", + "\n", + "\n", + "def extract_clc_name(path: str):\n", + " clc_name = os.path.basename(path).split('.')[0]\n", + " return(clc_name)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 97, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['U2018_CLC2012_V2020_20u1',\n", + " 'U2018_CLC2012_V2020_20u1_FR_GLP',\n", + " 'U2018_CLC2012_V2020_20u1_FR_GUF',\n", + " 'U2018_CLC2012_V2020_20u1_FR_MTQ',\n", + " 'U2018_CLC2012_V2020_20u1_FR_MYT',\n", + " 'U2018_CLC2012_V2020_20u1_FR_REU']" + ] + }, + "execution_count": 97, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "root = 'X:/EO/u2018_clc2012_v2020_20u1_raster100m'\n", + "\n", + "tif_files = get_tif_files(path=root)\n", + "clc_names = [extract_clc_name(f) for f in tif_files]\n", + "clc_names" + ] + }, + { + "cell_type": "code", + "execution_count": 108, + "metadata": {}, + "outputs": [], + "source": [ + "def get_asset_files(path, clc_name):\n", + "\n", + " clc_name_elements = deconstruct_clc_name(clc_name)\n", + "\n", + " # clc_pattern = ('U{update_campaign}_{theme}{reference_year}_V{release_year}_'.format(**clc_name_elements),\n", + " # clc_name_elements['release_number'][:2] + '[0-9a-z]{0,2}',\n", + " # '_?({})'\n", + "\n", + " if clc_name_elements['DOM_code']:\n", + " allowed_dirs = ['DATA', 'French_DOMs', 'Metadata']\n", + " else:\n", + " allowed_dirs = ['DATA', 'Legend', 'Metadata']\n", + " \n", + " print(allowed_dirs)\n", + "\n", + " asset_files = []\n", + " \n", + " for root, dirs, files in os.walk(path, topdown=True):\n", + " [dirs.remove(d) for d in list(dirs) if d not in allowed_dirs]\n", + " for file in files:\n", + " if file.startswith(clc_name + '.') or file.endswith(('.lyr', 'QGIS.txt',)):\n", + " asset_files.append(os.path.join(root, file))\n", + "\n", + " return(asset_files)\n", + " \n", + "\n", + " with os.scandir(path) as it:\n", + " for entry in it:\n", + " if entry.is_file() and entry.name.startswith(clc_name) and not entry.name.endswith('.tif'):\n", + " asset_files.append(entry.name)\n", + " \n", + " tif_file_assets.append(asset_files)" + ] + }, + { + "cell_type": "code", + "execution_count": 109, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['DATA', 'Legend', 'Metadata']\n" + ] + }, + { + "data": { + "text/plain": [ + "['X:/EO/u2018_clc2012_v2020_20u1_raster100m\\\\DATA\\\\U2018_CLC2012_V2020_20u1.tfw',\n", + " 'X:/EO/u2018_clc2012_v2020_20u1_raster100m\\\\DATA\\\\U2018_CLC2012_V2020_20u1.tif',\n", + " 'X:/EO/u2018_clc2012_v2020_20u1_raster100m\\\\DATA\\\\U2018_CLC2012_V2020_20u1.tif.aux.xml',\n", + " 'X:/EO/u2018_clc2012_v2020_20u1_raster100m\\\\DATA\\\\U2018_CLC2012_V2020_20u1.tif.ovr',\n", + " 'X:/EO/u2018_clc2012_v2020_20u1_raster100m\\\\DATA\\\\U2018_CLC2012_V2020_20u1.tif.vat.cpg',\n", + " 'X:/EO/u2018_clc2012_v2020_20u1_raster100m\\\\DATA\\\\U2018_CLC2012_V2020_20u1.tif.vat.dbf',\n", + " 'X:/EO/u2018_clc2012_v2020_20u1_raster100m\\\\DATA\\\\U2018_CLC2012_V2020_20u1.tif.xml',\n", + " 'X:/EO/u2018_clc2012_v2020_20u1_raster100m\\\\Legend\\\\CLC2018_CLC2012_V2018_20.tif.lyr',\n", + " 'X:/EO/u2018_clc2012_v2020_20u1_raster100m\\\\Legend\\\\CLC2018_CLC2012_V2018_20_QGIS.txt',\n", + " 'X:/EO/u2018_clc2012_v2020_20u1_raster100m\\\\Metadata\\\\U2018_CLC2012_V2020_20u1.xml']" + ] + }, + "execution_count": 109, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# 'bo' in ['bi', 'ba', 'bo']\n", + "# tif_file_assets[3]\n", + "get_asset_files(root, clc_name=clc_names[0])" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "\n", + "CLC_PROVIDER = pystac.Provider(\n", + " name='Copernicus Land Monitoring Service',\n", + " description=('The Copernicus Land Monitoring Service provides '\n", + " 'geographical information on land cover and its '\n", + " 'changes, land use, ground motions, vegetation state, '\n", + " 'water cycle and Earth\\'s surface energy variables to '\n", + " 'a broad range of users in Europe and across the World '\n", + " 'in the field of environmental terrestrial applications.'),\n", + " roles=[ProviderRole.LICENSOR, ProviderRole.HOST],\n", + " url= 'https://land.copernicus.eu'\n", + ")\n", "\n", "props = {'description': (f'Corine Land Cover {year} (CLC{year}) is one of the Corine Land Cover (CLC) ' \n", " f'datasets produced within the frame the Copernicus Land Monitoring Service '\n", @@ -66,33 +323,13 @@ " f'CLC class descriptions can be found at '\n", " f'https://land.copernicus.eu/user-corner/technical-library/corine-land-cover-nomenclature-guidelines/html/.'),\n", " 'created': None,\n", - " 'providers': [{\n", - " 'name': 'Copernicus Land Monitoring Service',\n", - " 'description': ('The Copernicus Land Monitoring Service provides '\n", - " 'geographical information on land cover and its '\n", - " 'changes, land use, ground motions, vegetation state, '\n", - " 'water cycle and Earth\\'s surface energy variables to '\n", - " 'a broad range of users in Europe and across the World '\n", - " 'in the field of environmental terrestrial applications.'),\n", - " 'roles': ['licensor', 'host'],\n", - " 'url': 'https://land.copernicus.eu'\n", - " }],\n", - " \n", - "\n", - "\n", + " 'providers': CLC_PROVIDER\n", "}\n", "\n", "\n", "\n", "with rio.open(img_path) as img:\n", "\n", - " # xmin, ymin, xmax, ymax = img.bounds\n", - " # transformer = Transformer.from_crs(img.crs, \"EPSG:4326\")\n", - " # xmin_, ymax_ = transformer.transform(xmin, ymax)\n", - " # xmax_, ymin_ = transformer.transform(xmax, ymin)\n", - " # box_ = box(*[xmin_, ymin_, xmin_, ymax_])\n", - " # box_ = box(*transformer.transform_bounds(*img.bounds))\n", - " # bounds_wgs84 = rasterio.warp.transform_bounds(img.crs, CRS.from_epsg(4326), *img.bounds)\n", " bbox = rio.warp.transform_bounds(img.crs, rio.crs.CRS.from_epsg(4326), *img.bounds)\n", " params = {\n", " 'id': id,\n", @@ -104,17 +341,12 @@ " 'properties': props,\n", " }\n", "\n", - "\n", - "\n", "item = pystac.Item(**params)\n", "\n", - "\n", "item.add_asset(\n", " key='image',\n", " asset=pystac.Asset(href=img_path, title='Geotiff', media_type=pystac.MediaType.GEOTIFF),\n", - ")\n", - "\n", - "# item.ext.add('proj')" + ")\n" ] }, { @@ -135,8 +367,7 @@ " )\n", "\n", "license = pystac.link.Link(rel='LICENSE', target=\"https://land.copernicus.eu/en/data-policy\")\n", - "item.add_link(license)\n", - "\n" + "item.add_link(license)\n" ] }, { @@ -325,253 +556,6 @@ "\n", "get_metadata(xml_path)\n" ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [], - "source": [ - "def xml_to_dict(xml, result):\n", - " for child in xml:\n", - " if len(child) == 0:\n", - " result[child.tag] = child.text\n", - " else:\n", - " if child.tag in result:\n", - " if not isinstance(result[child.tag], list):\n", - " result[child.tag] = [result[child.tag]]\n", - " result[child.tag].append(xml_to_dict(child, {}))\n", - " else:\n", - " result[child.tag] = xml_to_dict(child, {})\n", - " return result\n" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'{http://www.isotc211.org/2005/gmd}fileIdentifier': {'{http://www.isotc211.org/2005/gco}CharacterString': '7e162b2d-5196-41b2-b6dd-e889651e2f1f'},\n", - " '{http://www.isotc211.org/2005/gmd}language': {'{http://www.isotc211.org/2005/gmd}LanguageCode': None},\n", - " '{http://www.isotc211.org/2005/gmd}characterSet': {'{http://www.isotc211.org/2005/gmd}MD_CharacterSetCode': None},\n", - " '{http://www.isotc211.org/2005/gmd}hierarchyLevel': {'{http://www.isotc211.org/2005/gmd}MD_ScopeCode': None},\n", - " '{http://www.isotc211.org/2005/gmd}contact': {'{http://www.isotc211.org/2005/gmd}CI_ResponsibleParty': {'{http://www.isotc211.org/2005/gmd}organisationName': {'{http://www.isotc211.org/2005/gco}CharacterString': 'European Environment Agency'},\n", - " '{http://www.isotc211.org/2005/gmd}contactInfo': {'{http://www.isotc211.org/2005/gmd}CI_Contact': {'{http://www.isotc211.org/2005/gmd}address': {'{http://www.isotc211.org/2005/gmd}CI_Address': {'{http://www.isotc211.org/2005/gmd}deliveryPoint': {'{http://www.isotc211.org/2005/gco}CharacterString': 'Kongens Nytorv 6'},\n", - " '{http://www.isotc211.org/2005/gmd}city': {'{http://www.isotc211.org/2005/gco}CharacterString': 'Copenhagen'},\n", - " '{http://www.isotc211.org/2005/gmd}administrativeArea': {'{http://www.isotc211.org/2005/gco}CharacterString': 'K'},\n", - " '{http://www.isotc211.org/2005/gmd}postalCode': {'{http://www.isotc211.org/2005/gco}CharacterString': '1050'},\n", - " '{http://www.isotc211.org/2005/gmd}country': {'{http://www.isotc211.org/2005/gco}CharacterString': 'Denmark'},\n", - " '{http://www.isotc211.org/2005/gmd}electronicMailAddress': {'{http://www.isotc211.org/2005/gco}CharacterString': 'sdi@eea.europa.eu'}}}}},\n", - " '{http://www.isotc211.org/2005/gmd}role': {'{http://www.isotc211.org/2005/gmd}CI_RoleCode': None}}},\n", - " '{http://www.isotc211.org/2005/gmd}dateStamp': {'{http://www.isotc211.org/2005/gco}DateTime': '2019-12-18T22:18:54'},\n", - " '{http://www.isotc211.org/2005/gmd}metadataStandardName': {'{http://www.isotc211.org/2005/gco}CharacterString': 'ISO 19115:2003/19139'},\n", - " '{http://www.isotc211.org/2005/gmd}metadataStandardVersion': {'{http://www.isotc211.org/2005/gco}CharacterString': '1.0'},\n", - " '{http://www.isotc211.org/2005/gmd}referenceSystemInfo': {'{http://www.isotc211.org/2005/gmd}MD_ReferenceSystem': {'{http://www.isotc211.org/2005/gmd}referenceSystemIdentifier': {'{http://www.isotc211.org/2005/gmd}RS_Identifier': {'{http://www.isotc211.org/2005/gmd}code': {'{http://www.isotc211.org/2005/gmx}Anchor': 'EPSG:3035'}}}}},\n", - " '{http://www.isotc211.org/2005/gmd}identificationInfo': {'{http://www.isotc211.org/2005/gmd}MD_DataIdentification': {'{http://www.isotc211.org/2005/gmd}citation': {'{http://www.isotc211.org/2005/gmd}CI_Citation': {'{http://www.isotc211.org/2005/gmd}title': {'{http://www.isotc211.org/2005/gco}CharacterString': 'Corine Land Cover (CLC) 2018, Version 2020 20_1'},\n", - " '{http://www.isotc211.org/2005/gmd}date': {'{http://www.isotc211.org/2005/gmd}CI_Date': {'{http://www.isotc211.org/2005/gmd}date': {'{http://www.isotc211.org/2005/gco}Date': '2020-02-24'},\n", - " '{http://www.isotc211.org/2005/gmd}dateType': {'{http://www.isotc211.org/2005/gmd}CI_DateTypeCode': None}}},\n", - " '{http://www.isotc211.org/2005/gmd}edition': {'{http://www.isotc211.org/2005/gco}CharacterString': '20_1'},\n", - " '{http://www.isotc211.org/2005/gmd}identifier': {'{http://www.isotc211.org/2005/gmd}MD_Identifier': {'{http://www.isotc211.org/2005/gmd}code': {'{http://www.isotc211.org/2005/gco}CharacterString': 'Corine Land Cover (CLC) 2018, Version 2020 20_1'}}}}},\n", - " '{http://www.isotc211.org/2005/gmd}abstract': {'{http://www.isotc211.org/2005/gco}CharacterString': 'CLC2018 is one of the Corine Land Cover (CLC) datasets produced within the frame the Copernicus Land Monitoring Service referring to land cover / land use status of year 2018. CLC service has a long-time heritage (formerly known as \"CORINE Land Cover Programme\"), coordinated by the European Environment Agency (EEA). It provides consistent and thematically detailed information on land cover and land cover changes across Europe. \\nCLC datasets are based on the classification of satellite images produced by the national teams of the participating countries - the EEA members and cooperating countries (EEA39). National CLC inventories are then further integrated into a seamless land cover map of Europe. The resulting European database relies on standard methodology and nomenclature with following base parameters: 44 classes in the hierarchical 3-level CLC nomenclature; minimum mapping unit (MMU) for status layers is 25 hectares; minimum width of linear elements is 100 metres. Change layers have higher resolution, i.e. minimum mapping unit (MMU) is 5 hectares for Land Cover Changes (LCC), and the minimum width of linear elements is 100 metres. The CLC service delivers important data sets supporting the implementation of key priority areas of the Environment Action Programmes of the European Union as e.g. protecting ecosystems, halting the loss of biological diversity, tracking the impacts of climate change, monitoring urban land take, assessing developments in agriculture or dealing with water resources directives. CLC belongs to the Pan-European component of the Copernicus Land Monitoring Service (https://land.copernicus.eu/), part of the European Copernicus Programme coordinated by the European Environment Agency, providing environmental information from a combination of air- and space-based observation systems and in-situ monitoring. Additional information about CLC product description including mapping guides can be found at https://land.copernicus.eu/user-corner/technical-library/clc2018technicalguidelines_final.pdf. CLC class descriptions can be found at https://land.copernicus.eu/user-corner/technical-library/corine-land-cover-nomenclature-guidelines/html/.'},\n", - " '{http://www.isotc211.org/2005/gmd}pointOfContact': [{'{http://www.isotc211.org/2005/gmd}CI_ResponsibleParty': {'{http://www.isotc211.org/2005/gmd}organisationName': {'{http://www.isotc211.org/2005/gco}CharacterString': 'European Environment Agency (EEA) under the framework of the Copernicus programme'},\n", - " '{http://www.isotc211.org/2005/gmd}contactInfo': {'{http://www.isotc211.org/2005/gmd}CI_Contact': {'{http://www.isotc211.org/2005/gmd}address': {'{http://www.isotc211.org/2005/gmd}CI_Address': {'{http://www.isotc211.org/2005/gmd}deliveryPoint': {'{http://www.isotc211.org/2005/gco}CharacterString': 'Kongens Nytorv 6'},\n", - " '{http://www.isotc211.org/2005/gmd}city': {'{http://www.isotc211.org/2005/gco}CharacterString': 'Copenhagen'},\n", - " '{http://www.isotc211.org/2005/gmd}administrativeArea': {'{http://www.isotc211.org/2005/gco}CharacterString': 'K'},\n", - " '{http://www.isotc211.org/2005/gmd}postalCode': {'{http://www.isotc211.org/2005/gco}CharacterString': '1050'},\n", - " '{http://www.isotc211.org/2005/gmd}country': {'{http://www.isotc211.org/2005/gco}CharacterString': 'Denmark'},\n", - " '{http://www.isotc211.org/2005/gmd}electronicMailAddress': {'{http://www.isotc211.org/2005/gco}CharacterString': 'copernicus@eea.europa.eu'}}},\n", - " '{http://www.isotc211.org/2005/gmd}onlineResource': {'{http://www.isotc211.org/2005/gmd}CI_OnlineResource': {'{http://www.isotc211.org/2005/gmd}linkage': {'{http://www.isotc211.org/2005/gmd}URL': 'http://www.eea.europa.eu'},\n", - " '{http://www.isotc211.org/2005/gmd}protocol': {'{http://www.isotc211.org/2005/gco}CharacterString': 'WWW:LINK-1.0-http--link'},\n", - " '{http://www.isotc211.org/2005/gmd}name': {'{http://www.isotc211.org/2005/gco}CharacterString': 'European Environment Agency public website'},\n", - " '{http://www.isotc211.org/2005/gmd}function': {'{http://www.isotc211.org/2005/gmd}CI_OnLineFunctionCode': None}}}}},\n", - " '{http://www.isotc211.org/2005/gmd}role': {'{http://www.isotc211.org/2005/gmd}CI_RoleCode': None}}},\n", - " {'{http://www.isotc211.org/2005/gmd}CI_ResponsibleParty': {'{http://www.isotc211.org/2005/gmd}individualName': {'{http://www.isotc211.org/2005/gco}CharacterString': None},\n", - " '{http://www.isotc211.org/2005/gmd}organisationName': {'{http://www.isotc211.org/2005/gco}CharacterString': None},\n", - " '{http://www.isotc211.org/2005/gmd}positionName': {'{http://www.isotc211.org/2005/gco}CharacterString': None},\n", - " '{http://www.isotc211.org/2005/gmd}contactInfo': {'{http://www.isotc211.org/2005/gmd}CI_Contact': {'{http://www.isotc211.org/2005/gmd}phone': {'{http://www.isotc211.org/2005/gmd}CI_Telephone': {'{http://www.isotc211.org/2005/gmd}voice': {'{http://www.isotc211.org/2005/gco}CharacterString': None},\n", - " '{http://www.isotc211.org/2005/gmd}facsimile': {'{http://www.isotc211.org/2005/gco}CharacterString': None}}},\n", - " '{http://www.isotc211.org/2005/gmd}address': {'{http://www.isotc211.org/2005/gmd}CI_Address': {'{http://www.isotc211.org/2005/gmd}deliveryPoint': {'{http://www.isotc211.org/2005/gco}CharacterString': None},\n", - " '{http://www.isotc211.org/2005/gmd}city': {'{http://www.isotc211.org/2005/gco}CharacterString': None},\n", - " '{http://www.isotc211.org/2005/gmd}administrativeArea': {'{http://www.isotc211.org/2005/gco}CharacterString': None},\n", - " '{http://www.isotc211.org/2005/gmd}postalCode': {'{http://www.isotc211.org/2005/gco}CharacterString': None},\n", - " '{http://www.isotc211.org/2005/gmd}country': {'{http://www.isotc211.org/2005/gco}CharacterString': None},\n", - " '{http://www.isotc211.org/2005/gmd}electronicMailAddress': {'{http://www.isotc211.org/2005/gco}CharacterString': None}}}}},\n", - " '{http://www.isotc211.org/2005/gmd}role': {'{http://www.isotc211.org/2005/gmd}CI_RoleCode': None}}}],\n", - " '{http://www.isotc211.org/2005/gmd}resourceMaintenance': {'{http://www.isotc211.org/2005/gmd}MD_MaintenanceInformation': {'{http://www.isotc211.org/2005/gmd}maintenanceAndUpdateFrequency': {'{http://www.isotc211.org/2005/gmd}MD_MaintenanceFrequencyCode': None}}},\n", - " '{http://www.isotc211.org/2005/gmd}graphicOverview': {'{http://www.isotc211.org/2005/gmd}MD_BrowseGraphic': {'{http://www.isotc211.org/2005/gmd}fileName': {'{http://www.isotc211.org/2005/gco}CharacterString': 'https://sdi.eea.europa.eu/public/catalogue-graphic-overview/blank.png'}}},\n", - " '{http://www.isotc211.org/2005/gmd}descriptiveKeywords': [{'{http://www.isotc211.org/2005/gmd}MD_Keywords': {'{http://www.isotc211.org/2005/gmd}keyword': {'{http://www.isotc211.org/2005/gmx}Anchor': None},\n", - " '{http://www.isotc211.org/2005/gmd}type': {'{http://www.isotc211.org/2005/gmd}MD_KeywordTypeCode': None},\n", - " '{http://www.isotc211.org/2005/gmd}thesaurusName': {'{http://www.isotc211.org/2005/gmd}CI_Citation': {'{http://www.isotc211.org/2005/gmd}title': {'{http://www.isotc211.org/2005/gmx}Anchor': 'GEMET - INSPIRE themes, version 1.0'},\n", - " '{http://www.isotc211.org/2005/gmd}date': {'{http://www.isotc211.org/2005/gmd}CI_Date': {'{http://www.isotc211.org/2005/gmd}date': {'{http://www.isotc211.org/2005/gco}Date': '2008-06-01'},\n", - " '{http://www.isotc211.org/2005/gmd}dateType': {'{http://www.isotc211.org/2005/gmd}CI_DateTypeCode': None}}},\n", - " '{http://www.isotc211.org/2005/gmd}identifier': {'{http://www.isotc211.org/2005/gmd}MD_Identifier': {'{http://www.isotc211.org/2005/gmd}code': {'{http://www.isotc211.org/2005/gmx}Anchor': 'geonetwork.thesaurus.external.theme.httpinspireeceuropaeutheme-theme'}}}}}}},\n", - " {'{http://www.isotc211.org/2005/gmd}MD_Keywords': {'{http://www.isotc211.org/2005/gmd}keyword': {'{http://www.isotc211.org/2005/gco}CharacterString': 'Copernicus Land Satellite Image Interpretation 2018 Corine Land Cover Raster CLC Polygon'},\n", - " '{http://www.isotc211.org/2005/gmd}thesaurusName': {'{http://www.isotc211.org/2005/gmd}CI_Citation': {'{http://www.isotc211.org/2005/gmd}title': {'{http://www.isotc211.org/2005/gco}CharacterString': 'EEA keyword list'},\n", - " '{http://www.isotc211.org/2005/gmd}date': {'{http://www.isotc211.org/2005/gmd}CI_Date': {'{http://www.isotc211.org/2005/gmd}date': {'{http://www.isotc211.org/2005/gco}Date': '2002-03-01'},\n", - " '{http://www.isotc211.org/2005/gmd}dateType': {'{http://www.isotc211.org/2005/gmd}CI_DateTypeCode': None}}}}}}},\n", - " {'{http://www.isotc211.org/2005/gmd}MD_Keywords': {'{http://www.isotc211.org/2005/gmd}keyword': {'{http://www.isotc211.org/2005/gco}CharacterString': 'geospatial data'},\n", - " '{http://www.isotc211.org/2005/gmd}type': {'{http://www.isotc211.org/2005/gmd}MD_KeywordTypeCode': None},\n", - " '{http://www.isotc211.org/2005/gmd}thesaurusName': {'{http://www.isotc211.org/2005/gmd}CI_Citation': {'{http://www.isotc211.org/2005/gmd}title': {'{http://www.isotc211.org/2005/gco}CharacterString': 'EEA categories'},\n", - " '{http://www.isotc211.org/2005/gmd}date': {'{http://www.isotc211.org/2005/gmd}CI_Date': {'{http://www.isotc211.org/2005/gmd}date': {'{http://www.isotc211.org/2005/gco}Date': '2010-07-06'},\n", - " '{http://www.isotc211.org/2005/gmd}dateType': {'{http://www.isotc211.org/2005/gmd}CI_DateTypeCode': None}}},\n", - " '{http://www.isotc211.org/2005/gmd}identifier': {'{http://www.isotc211.org/2005/gmd}MD_Identifier': {'{http://www.isotc211.org/2005/gmd}code': {'{http://www.isotc211.org/2005/gmx}Anchor': 'geonetwork.thesaurus.local.theme.eea-categories'}}}}}}},\n", - " {'{http://www.isotc211.org/2005/gmd}MD_Keywords': {'{http://www.isotc211.org/2005/gmd}keyword': {'{http://www.isotc211.org/2005/gco}CharacterString': None},\n", - " '{http://www.isotc211.org/2005/gmd}type': {'{http://www.isotc211.org/2005/gmd}MD_KeywordTypeCode': None},\n", - " '{http://www.isotc211.org/2005/gmd}thesaurusName': {'{http://www.isotc211.org/2005/gmd}CI_Citation': {'{http://www.isotc211.org/2005/gmd}title': {'{http://www.isotc211.org/2005/gco}CharacterString': 'GEMET'},\n", - " '{http://www.isotc211.org/2005/gmd}date': {'{http://www.isotc211.org/2005/gmd}CI_Date': {'{http://www.isotc211.org/2005/gmd}date': {'{http://www.isotc211.org/2005/gco}Date': '2018-08-16'},\n", - " '{http://www.isotc211.org/2005/gmd}dateType': {'{http://www.isotc211.org/2005/gmd}CI_DateTypeCode': None}}},\n", - " '{http://www.isotc211.org/2005/gmd}identifier': {'{http://www.isotc211.org/2005/gmd}MD_Identifier': {'{http://www.isotc211.org/2005/gmd}code': {'{http://www.isotc211.org/2005/gmx}Anchor': 'geonetwork.thesaurus.external.theme.gemet'}}}}}}},\n", - " {'{http://www.isotc211.org/2005/gmd}MD_Keywords': {'{http://www.isotc211.org/2005/gmd}keyword': {'{http://www.isotc211.org/2005/gco}CharacterString': 'Albania, Austria, Belgium, Bosnia and Herzegovina, Bulgaria, Croatia, Cyprus, Czech Republic, Denmark, Estonia, Finland, France, Germany, Greece, Hungary, Iceland, Ireland, Italy, Kosovo, Latvia, Liechtenstein, Lithuania, Luxembourg, Malta, Montenegro, Netherlands, North Macedonia, Norway, Poland, Portugal, Romania, Serbia, Slovakia, Slovenia, Spain, Sweden, Switzerland, Turkey, United Kingdom'},\n", - " '{http://www.isotc211.org/2005/gmd}type': {'{http://www.isotc211.org/2005/gmd}MD_KeywordTypeCode': None},\n", - " '{http://www.isotc211.org/2005/gmd}thesaurusName': {'{http://www.isotc211.org/2005/gmd}CI_Citation': {'{http://www.isotc211.org/2005/gmd}title': {'{http://www.isotc211.org/2005/gco}CharacterString': 'Continents, countries, sea regions of the world'},\n", - " '{http://www.isotc211.org/2005/gmd}date': {'{http://www.isotc211.org/2005/gmd}CI_Date': {'{http://www.isotc211.org/2005/gmd}date': {'{http://www.isotc211.org/2005/gco}Date': '2015-07-17'},\n", - " '{http://www.isotc211.org/2005/gmd}dateType': {'{http://www.isotc211.org/2005/gmd}CI_DateTypeCode': None}}},\n", - " '{http://www.isotc211.org/2005/gmd}identifier': {'{http://www.isotc211.org/2005/gmd}MD_Identifier': {'{http://www.isotc211.org/2005/gmd}code': {'{http://www.isotc211.org/2005/gmx}Anchor': 'geonetwork.thesaurus.external.place.regions'}}}}}}}],\n", - " '{http://www.isotc211.org/2005/gmd}resourceConstraints': [{'{http://www.isotc211.org/2005/gmd}MD_LegalConstraints': {'{http://www.isotc211.org/2005/gmd}accessConstraints': {'{http://www.isotc211.org/2005/gco}CharacterString': \"Access to data is based on a principle of full, open and free access as established by the Copernicus data and information policy Regulation (EU) No 1159/2013 of 12 July 2013. This regulation establishes registration and licensing conditions for GMES/Copernicus users and can be found on http://eur-lex.europa.eu/legal-content/EN/TXT/?uri=CELEX%3A32013R1159. Free, full and open access to this data set is made on the conditions that: 1. When distributing or communicating Copernicus dedicated data and Copernicus service information to the public, users shall inform the public of the source of that data and information. 2. Users shall make sure not to convey the impression to the public that the user's activities are officially endorsed by the Union. 3. Where that data or information has been adapted or modified, the user shall clearly state this. 4. The data remain the sole property of the European Union. Any information and data produced in the framework of the action shall be the sole property of the European Union. Any communication and publication by the beneficiary shall acknowledge that the data were produced “with funding by the European Union”.\"},\n", - " '{http://www.isotc211.org/2005/gmd}otherConstraints': {'{http://www.isotc211.org/2005/gmx}Anchor': 'no limitations to public access'}}},\n", - " {'{http://www.isotc211.org/2005/gmd}MD_LegalConstraints': {'{http://www.isotc211.org/2005/gmd}useConstraints': {'{http://www.isotc211.org/2005/gmd}MD_RestrictionCode': None}}}],\n", - " '{http://www.isotc211.org/2005/gmd}language': {'{http://www.isotc211.org/2005/gmd}LanguageCode': None},\n", - " '{http://www.isotc211.org/2005/gmd}topicCategory': [{'{http://www.isotc211.org/2005/gmd}MD_TopicCategoryCode': 'environment'},\n", - " {'{http://www.isotc211.org/2005/gmd}MD_TopicCategoryCode': 'imageryBaseMapsEarthCover'}],\n", - " '{http://www.isotc211.org/2005/gmd}extent': [{'{http://www.isotc211.org/2005/gmd}EX_Extent': {'{http://www.isotc211.org/2005/gmd}geographicElement': {'{http://www.isotc211.org/2005/gmd}EX_GeographicBoundingBox': {'{http://www.isotc211.org/2005/gmd}westBoundLongitude': {'{http://www.isotc211.org/2005/gco}Decimal': '-31.561261'},\n", - " '{http://www.isotc211.org/2005/gmd}eastBoundLongitude': {'{http://www.isotc211.org/2005/gco}Decimal': '44.820775'},\n", - " '{http://www.isotc211.org/2005/gmd}southBoundLatitude': {'{http://www.isotc211.org/2005/gco}Decimal': '27.405827'},\n", - " '{http://www.isotc211.org/2005/gmd}northBoundLatitude': {'{http://www.isotc211.org/2005/gco}Decimal': '71.409109'}}},\n", - " '{http://www.isotc211.org/2005/gmd}temporalElement': {'{http://www.isotc211.org/2005/gmd}EX_TemporalExtent': {'{http://www.isotc211.org/2005/gmd}extent': {'{http://www.opengis.net/gml/3.2}TimePeriod': {'{http://www.opengis.net/gml/3.2}beginPosition': '2017-01-01',\n", - " '{http://www.opengis.net/gml/3.2}endPosition': '2018-12-31'}}}}}},\n", - " {'{http://www.isotc211.org/2005/gmd}EX_Extent': {'{http://www.isotc211.org/2005/gmd}temporalElement': None}}]}},\n", - " '{http://www.isotc211.org/2005/gmd}distributionInfo': {'{http://www.isotc211.org/2005/gmd}MD_Distribution': {'{http://www.isotc211.org/2005/gmd}distributionFormat': {'{http://www.isotc211.org/2005/gmd}MD_Format': {'{http://www.isotc211.org/2005/gmd}name': {'{http://www.isotc211.org/2005/gco}CharacterString': None},\n", - " '{http://www.isotc211.org/2005/gmd}version': {'{http://www.isotc211.org/2005/gco}CharacterString': None}}},\n", - " '{http://www.isotc211.org/2005/gmd}distributor': None,\n", - " '{http://www.isotc211.org/2005/gmd}transferOptions': [{'{http://www.isotc211.org/2005/gmd}MD_DigitalTransferOptions': {'{http://www.isotc211.org/2005/gmd}onLine': {'{http://www.isotc211.org/2005/gmd}CI_OnlineResource': {'{http://www.isotc211.org/2005/gmd}linkage': {'{http://www.isotc211.org/2005/gmd}URL': 'http://land.copernicus.eu/pan-european/corine-land-cover/clc2018/view'},\n", - " '{http://www.isotc211.org/2005/gmd}protocol': {'{http://www.isotc211.org/2005/gco}CharacterString': 'WWW:LINK-1.0-http--link'},\n", - " '{http://www.isotc211.org/2005/gmd}function': {'{http://www.isotc211.org/2005/gmd}CI_OnLineFunctionCode': None}}}}},\n", - " {'{http://www.isotc211.org/2005/gmd}MD_DigitalTransferOptions': None}]}},\n", - " '{http://www.isotc211.org/2005/gmd}dataQualityInfo': {'{http://www.isotc211.org/2005/gmd}DQ_DataQuality': {'{http://www.isotc211.org/2005/gmd}scope': {'{http://www.isotc211.org/2005/gmd}DQ_Scope': {'{http://www.isotc211.org/2005/gmd}level': {'{http://www.isotc211.org/2005/gmd}MD_ScopeCode': None}}},\n", - " '{http://www.isotc211.org/2005/gmd}report': {'{http://www.isotc211.org/2005/gmd}DQ_DomainConsistency': {'{http://www.isotc211.org/2005/gmd}result': {'{http://www.isotc211.org/2005/gmd}DQ_ConformanceResult': {'{http://www.isotc211.org/2005/gmd}specification': {'{http://www.isotc211.org/2005/gmd}CI_Citation': {'{http://www.isotc211.org/2005/gmd}title': {'{http://www.isotc211.org/2005/gco}CharacterString': 'Commission Regulation (EU) No 1089/2010 of 23 November 2010 implementing Directive 2007/2/EC of the European Parliament and of the Council as regards interoperability of spatial data sets and services'},\n", - " '{http://www.isotc211.org/2005/gmd}date': {'{http://www.isotc211.org/2005/gmd}CI_Date': {'{http://www.isotc211.org/2005/gmd}date': {'{http://www.isotc211.org/2005/gco}Date': '2010-12-08'},\n", - " '{http://www.isotc211.org/2005/gmd}dateType': {'{http://www.isotc211.org/2005/gmd}CI_DateTypeCode': None}}}}},\n", - " '{http://www.isotc211.org/2005/gmd}explanation': {'{http://www.isotc211.org/2005/gco}CharacterString': 'See the referenced specification'},\n", - " '{http://www.isotc211.org/2005/gmd}pass': None}}}},\n", - " '{http://www.isotc211.org/2005/gmd}lineage': {'{http://www.isotc211.org/2005/gmd}LI_Lineage': {'{http://www.isotc211.org/2005/gmd}statement': {'{http://www.isotc211.org/2005/gco}CharacterString': 'CLC products are based in majority of EEA39 countries on the photointerpretation of satellite images by the national teams of the participating countries - the EEA member and cooperating countries. All features in original vector database are delineated and classified on satellite images according to CLC specifications i.e. with better than 100 m positional accuracy and 25 ha minimum mapping unit (5 ha MMU for change layer) into the standardized CLC nomenclature (44 CLC classes). The change layer is derived from satellite imagery by direct mapping of changes taken place between two consecutive inventories, based on image-to-image comparison. Some countries follow alternative approaches by utilizing semiautomatic methodology e.g. generalisation of higher resolution national datasets. Production of national CLC inventories is supported by training and is under systematic control of the CLC Technical Team, both for thematic and semantic aspects, to assure harmonized European products. The process of European data integration starts when national deliveries are accepted and the Database Acceptance Report (DBTA) issued. National CLC data are then transformed into the common European reference (ETRS89/LAEA) and pan-European seamless dataset is produced. Integration step includes also harmonization of database along country borders. Rigorous harmonization of country borders has been done only for CLC2000 and CHA9000 layers (in 2 km wide strips along borders) as part of CLC2000 update. Currently, only simplified harmonisation is applied i.e. small border polygons (area < 5 ha) are generalised according to predefined rules to largest and/or thematically most similar neighbour, sliver polygons along borders (< 0.1 ha) are eliminated. European Corine Land Cover seamless database represents the final product of European data integration. Some artificial lines (dividing polygons with the same code) can be still present in database due to technical constraints of current ArcGIS technology and complexity of dataset (adaptive tiling) but this has no impact on dataset contents and can be dissolved for smaller data extracts.\\nRevised versions\\nStarted from the publication of CLC2006 (Version 16) the previous inventory is substituted by its revised version by most of the participating countries (see CLC seamless data coverage table https://land.copernicus.eu/user-corner/technical-library/clc-country-coverage-1990-2018-v20_1). However, due to their specific methodology not all countries are producing revised version of the previous inventory. The revision of previous CLC layer is a “by-product” of the standard updating process, including corrections to the original data identified during the update. Revisions (correcting mistakes) are needed because of the following factors: \\n- availability of higher resolution satellite imagery;\\n- a new satellite image or time series of satellite imagery provides additional key to correctly recognise a feature;\\n- improved availability and better quality of in-situ data;\\n- improved skills of experts, i.e. better understanding and application of CLC nomenclature;\\n- decision of the national team to improve the product between two inventories.\\n\\nThese revisions are not propagated backward to older datasets (e.g. during CLC2018 revision of CLC2012 might be provided, but the older datasets were not revised). Thus, consecutive inventories might include not only real changes, but also differences due to revisions. Therefore, it is recommended that in time series analysis CLC-Change layers should be used. If status layers from past are needed, these could be derived backward from deducting CLC-Change layers from the latest (best available) status layer as it is done for EEA accounting layers (see at https://www.eea.europa.eu/data-and-maps/data/corine-land-cover-accounting-layers)\\nMore details to be available soon in upcoming \"Users\\' Manual for all Copernicus data” document. \\nVersion 20_1\\nRelease date: 24-02-2020\\nFile naming conventions simplified and better described. New file naming convention has been introduced based on user feedback on version 20. Filename is composed of combination of information about update campaign, data theme and reference year and version specification (including release year and release number). \\nSee https://land.copernicus.eu/user-corner/technical-library/clc-file-naming-conventions-guide-v20_1 for details.\\n\\nThe French DOMs are provided in separate databases (files both for vector and raster version of data).\\n\\nAll raster layers are back in 8 bit GeoTIFF. Modification is introduced based on the user feedback on version 20. In order to keep 8 bit resolution for raster change layers, they are divided into two files - representing consumption (from) and formation (to) part of change.\\n\\nSee https://land.copernicus.eu/user-corner/technical-library/clc-country-coverage-1990-2018-v20_1 for full information about the coverage of this version.\\nSee http://land.copernicus.eu/user-corner/technical-library/clc-and-clcc-release-lineage for full information about lineage history.'}}}}}}" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "xml_to_dict(tree.getroot(), {})" - ] - }, - { - "cell_type": "code", - "execution_count": 118, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "BoundingBox(left=-23.825993823627275, bottom=24.28417701147754, right=104.35721125172725, top=80.00331843607006)" - ] - }, - "execution_count": 118, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# bbox coordinates become meaningless...\n", - "\n", - "crs = rio.crs.CRS(img.crs)\n", - "bounds = img.bounds\n", - "\n", - "transformer = Transformer.from_crs(crs.to_epsg(), 4326)\n", - "miny, minx = transformer.transform(bounds.left, bounds.bottom)\n", - "maxy, maxx = transformer.transform(bounds.right, bounds.top)\n", - "bbox = (minx, miny, maxx, maxy)\n", - "\n", - "rio.coords.BoundingBox(*bbox)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 105, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "BoundingBox(left=24.28417701147754, bottom=-23.825993823627275, right=80.00331843607006, top=104.35721125172725)" - ] - }, - "execution_count": 105, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "crs = CRS(img.crs)\n", - "bounds = img.bounds\n", - "\n", - "transformer = Transformer.from_crs(crs.to_epsg(), 4326)\n", - "miny, minx = transformer.transform(bounds.left, bounds.bottom)\n", - "maxy, maxx = transformer.transform(bounds.right, bounds.top)\n", - "bbox = (miny, minx, maxy, maxx)\n", - "# bbox = (minx, miny, maxx, maxy)\n", - "rio.coords.BoundingBox(*bbox)" - ] - }, - { - "cell_type": "code", - "execution_count": 98, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "BoundingBox(left=24.28417701147754, bottom=-180.0, right=90.0, top=180.0)" - ] - }, - "execution_count": 98, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# wrong order...\n", - "from pyproj import Transformer\n", - "\n", - "transformer = Transformer.from_crs(img.crs.to_epsg(), \"EPSG:4326\")\n", - "rio.coords.BoundingBox(*transformer.transform_bounds(*img.bounds))" - ] - }, - { - "cell_type": "code", - "execution_count": 119, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "BoundingBox(left=-56.50514190170437, bottom=24.28417701147754, right=72.90613675900903, top=72.63376966542347)" - ] - }, - "execution_count": 119, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# correct order...\n", - "from rasterio.crs import CRS\n", - "\n", - "rio.coords.BoundingBox(*rio.warp.transform_bounds(img.crs, CRS.from_epsg(4326), *img.bounds))" - ] } ], "metadata": { From ab25f13941670d02b2faead59d94f3acce8d4ec5 Mon Sep 17 00:00:00 2001 From: Xiaoman Huang Date: Tue, 30 Apr 2024 01:58:51 +0800 Subject: [PATCH 19/80] Update item, collection --- .DS_Store | Bin 6148 -> 6148 bytes scripts/.DS_Store | Bin 8196 -> 8196 bytes scripts/vabh/test_collection.json | 66 ++++++++++++++++ scripts/vabh/test_item.json | 117 +++++++++++++++++----------- scripts/vabh/vabh_collection.py | 54 ++++++------- scripts/vabh/vabh_item.py | 123 ++++++++++++++++++------------ 6 files changed, 235 insertions(+), 125 deletions(-) create mode 100644 scripts/vabh/test_collection.json diff --git a/.DS_Store b/.DS_Store index f88b6e8312bcef6843e3c0294cde9c6d4ae27a06..d2c92cbf939cb9f45f03101e9b39f43feaf7f0a3 100644 GIT binary patch delta 97 zcmZoMXffEJ$;h~CvKC{Zr&M*dk+Hdfj)JbSnMtjVLbau_v5tb7u|aJuCx@uAzI9N1 hc1~_yeh&i}Ffu}D23{x)qk1<#W>jX|%+B$b9{@&K6J-DZ delta 96 zcmZoMXffEJ$;h~KvKC{Zhh%lNp^1fwj)JbGQLTfDN+;2pk9jx|1XjaSa-KdoeUOI4mG7F*h-jp%5DZ ZjFY_(9|4WC-4H1Uvj-UV1hf1U2Lt1w6;c2I delta 92 zcmV-i0HgneK!iY$V*$Cbah3;>fDN+;2pk9jxsxOiaVQykdoeUPG%O%3IWm1LAbUA7 yFf1T6HZ*;G2?!M tuple[BoundingBox, CRS, int, int]: @@ -48,11 +53,6 @@ def get_geom_wgs84(bounds: BoundingBox, crs: CRS) -> Polygon: return box(*bbox) -def get_description(product_id: str) -> str: - country, city, year, product, version = product_id.split("_") - return f"{year[2:]} {city.title()} building height" - - def get_datetime(product_id: str) -> tuple[datetime, datetime]: year = int(product_id.split("_")[2][2:]) return (datetime(year=year, month=1, day=1), datetime(year=year, month=12, day=31)) @@ -64,41 +64,31 @@ def get_collection_extent(bbox, start_datetime) -> Extent: return Extent(spatial=spatial_extent, temporal=temporal_extent) -def create_asset(asset_key: str) -> pystac.Asset: - parameter = asset_key.split("_")[-1].split(".")[0] - version = asset_key.split("_")[-3] - return pystac.Asset( - href=f"s3://{BUCKET}/" + asset_key, - media_type=pystac.MediaType.GEOTIFF, - title=TITLE_MAP[parameter] + f" {version}", - roles=["data"], - ) - -def get_item_assets() - -def get_links() +# def get_item_assets() +# def get_links() if __name__ == "__main__": head, tail = os.path.split(KEY) (product_id,) = tail.split(".")[0].rsplit("_", 0) - bounds, crs, height, width = get_metadata_from_tif(KEY) + bounds, crs, height, width = get_metadata_from_tif(PATH_Dataset) geom_wgs84 = get_geom_wgs84(bounds, crs) - description = get_description(product_id) start_datetime, end_datetime = get_datetime(product_id) - collection_extent = get_collection_extent(list(geom_wgs84.bounds), start_datetime) - summaries = pystac.Summaries({"proj:epsg": [crs.to_epsg()]}) + COLLECTION_extent = get_collection_extent(list(geom_wgs84.bounds), start_datetime) + COLLECTION_summaries = pystac.Summaries({"proj:epsg": [crs.to_epsg()]}) collection = pystac.Collection( - stac_extensions=["https://stac-extensions.github.io/item-assets/v1.0.0/schema.json", - "https://stac-extensions.github.io/projection/v1.1.0/schema.json"], - id="urban-atlas-building-height", - title="Urban Atlas Building Height 10m", - description="Urban Atlas building height over capital cities.", - keywords=["Buildings", "Building height", "Elevation"], - extent=collection_extent, - summaries=summaries, + stac_extensions=[ + "https://stac-extensions.github.io/item-assets/v1.0.0/schema.json", + "https://stac-extensions.github.io/projection/v1.1.0/schema.json", + ], + id=COLLECTION_id, + title=COLLECTION_title, + description=COLLECTION_description, + keywords=COLLECTION_keywords, + extent=COLLECTION_extent, + summaries=COLLECTION_summaries, providers=[HOST_AND_LICENSOR], ) diff --git a/scripts/vabh/vabh_item.py b/scripts/vabh/vabh_item.py index 0a5f124..019bacc 100644 --- a/scripts/vabh/vabh_item.py +++ b/scripts/vabh/vabh_item.py @@ -1,13 +1,15 @@ from __future__ import annotations import os +import xml.etree.ElementTree as ET from datetime import datetime from typing import Final import pystac import rasterio as rio from pyproj import Transformer -from pystac import Extent, SpatialExtent, TemporalExtent +from pystac.extensions.projection import ProjectionExtension +from pystac.link import Link from pystac.provider import ProviderRole from rasterio.coords import BoundingBox from rasterio.crs import CRS @@ -16,12 +18,12 @@ # KEY = "/Users/xiaomanhuang/pl/ETCDI_STAC/uabh_samples/AT001_WIEN_UA2012_DHM_v020/data/AT001_WIEN_UA2012_DHM_V020.tif" KEY = "/Users/xiaomanhuang/pl/ETCDI_STAC/uabh_samples/AT001_WIEN_UA2012_DHM_v020" head, tail = os.path.split(KEY) -(product_id,product_version) = tail.rsplit("_", 1) +(product_id, product_version) = tail.rsplit("_", 1) -PATH_Dataset = os.path.join(KEY, "Dataset/"+tail+".tif") -PATH_QC = os.path.join(KEY, "Dataset/"+tail+".tif") -PATH_Metadata = -PATH_Doc = +PATH_Dataset = os.path.join(KEY, "Dataset/" + tail + ".tif") +PATH_Doc = os.path.join(KEY, "Doc/" + product_id + "_QC_Report" + product_version + ".pdf") +PATH_Metadata = os.path.join(KEY, "Metadata/" + product_id + "_metadata_" + product_version + ".xml") +PATH_Zip = os.path.join(head, tail + ".zip") HOST_AND_LICENSOR: Final[pystac.Provider] = pystac.Provider( name="Copernicus Land Monitoring Service", @@ -34,6 +36,23 @@ url="https://land.copernicus.eu", ) +COLLECTION_id = "urban-atlas-building-height" + +CLMS_LICENSE: Final[Link] = Link(rel="license", target="https://land.copernicus.eu/en/data-policy") + +WORKING_DIR = os.getcwd() +CLMS_CATALOG_LINK: Final[Link] = Link( + rel=pystac.RelType.ROOT, target=pystac.STACObject.from_file(os.path.join(WORKING_DIR, "stacs/clms_catalog.json")) +) +COLLECTION_LINK: Final[Link] = Link( + rel=pystac.RelType.COLLECTION, + target=pystac.STACObject.from_file(os.path.join(WORKING_DIR, f"stacs/{COLLECTION_id}/{COLLECTION_id}.json")), +) +ITEM_PARENT_LINK: Final[Link] = Link( + rel=pystac.RelType.PARENT, + target=pystac.STACObject.from_file(os.path.join(WORKING_DIR, f"stacs/{COLLECTION_id}/{COLLECTION_id}.json")), +) + def get_metadata_from_tif(key: str) -> tuple[BoundingBox, CRS, int, int]: with rio.open(key) as tif: @@ -45,6 +64,23 @@ def get_metadata_from_tif(key: str) -> tuple[BoundingBox, CRS, int, int]: return (bounds, crs, height, width) +def str_to_datetime(datetime_str: str): + year, month, day = datetime_str.split("-") + return datetime(year=int(year), month=int(month), day=int(day)) # .strftime("%Y-%m-%dT%H:%M:%SZ") + + +def get_metadata_from_xml(xml: str) -> tuple[datetime, datetime, datetime]: + tree = ET.parse(xml) + for t in tree.iter("{http://www.opengis.net/gml}beginPosition"): + start_datetime = t.text + for t in tree.iter("{http://www.opengis.net/gml}endPosition"): + end_datetime = t.text + for t in tree.iter("{http://www.isotc211.org/2005/gmd}dateStamp"): + created = t.find("{http://www.isotc211.org/2005/gco}Date").text + + return (str_to_datetime(start_datetime), str_to_datetime(end_datetime), str_to_datetime(created)) + + def get_geom_wgs84(bounds: BoundingBox, crs: CRS) -> Polygon: transformer = Transformer.from_crs(crs.to_epsg(), 4326) miny, minx = transformer.transform(bounds.left, bounds.bottom) @@ -58,57 +94,50 @@ def get_description(product_id: str) -> str: return f"{year[2:]} {city.title()} building height" -def get_datetime(product_id: str) -> tuple[datetime, datetime]: - year = int(product_id.split("_")[2][2:]) - return (datetime(year=year, month=1, day=1), datetime(year=year, month=12, day=31)) - - -def get_collection_extent(bbox, start_datetime) -> Extent: - spatial_extent = SpatialExtent(bboxes=bbox) - temporal_extent = TemporalExtent(intervals=[[start_datetime, None]]) - return Extent(spatial=spatial_extent, temporal=temporal_extent) - - -def create_asset(asset_key: str) -> pystac.Asset: - parameter = asset_key.split("_")[-1].split(".")[0] - version = asset_key.split("_")[-3] - return pystac.Asset( - href=f"s3://{BUCKET}/" + asset_key, - media_type=pystac.MediaType.GEOTIFF, - title=TITLE_MAP[parameter] + f" {version}", - roles=["data"], - ) - -def get_item_assets() - -def get_links() +# def get_item_assets() +# def get_links() if __name__ == "__main__": head, tail = os.path.split(KEY) (product_id,) = tail.split(".")[0].rsplit("_", 0) - bounds, crs, height, width = get_metadata_from_tif(KEY) + bounds, crs, height, width = get_metadata_from_tif(PATH_Dataset) + start_datetime, end_datetime, created = get_metadata_from_xml(PATH_Metadata) geom_wgs84 = get_geom_wgs84(bounds, crs) description = get_description(product_id) - start_datetime, end_datetime = get_datetime(product_id) - collection_extent = get_collection_extent(list(geom_wgs84.bounds), start_datetime) - summaries = pystac.Summaries({"proj:epsg": [crs.to_epsg()]}) - - collection = pystac.Collection( - stac_extensions=["https://stac-extensions.github.io/item-assets/v1.0.0/schema.json", - "https://stac-extensions.github.io/projection/v1.1.0/schema.json"], - id="urban-atlas-building-height", - title="Urban Atlas Building Height 10m", - description="Urban Atlas building height over capital cities.", - keywords=["Buildings", "Building height", "Elevation"], - extent=collection_extent, - summaries=summaries, - providers=[HOST_AND_LICENSOR], + + item = pystac.Item( + stac_extensions=["https://stac-extensions.github.io/projection/v1.1.0/schema.json"], + id=tail, + geometry=mapping(geom_wgs84), + bbox=list(geom_wgs84.bounds), + datetime=None, + start_datetime=start_datetime, + end_datetime=end_datetime, + properties={"created": created.strftime("%Y-%m-%dT%H:%M:%SZ"), "description": description}, + collection=COLLECTION_id, ) - collection.set_self_href("scripts/vabh/test_item.json") - collection.save_object() + # extensions + projection = ProjectionExtension.ext(item, add_if_missing=True) + projection.epsg = crs.to_epsg() + projection.bbox = [int(bounds.left), int(bounds.bottom), int(bounds.right), int(bounds.top)] + projection.shape = [height, width] + + # links + links = [CLMS_LICENSE, CLMS_CATALOG_LINK, ITEM_PARENT_LINK, COLLECTION_LINK] + for link in links: + item.links.append(link) + + # # assets + # assets = {os.path.split(key)[-1][:-4].lower(): create_asset(key) for key in asset_keys} + # for key, asset in assets.items(): + # item.add_asset(key, asset) + + # item.set_self_href(os.path.join(KEY, f"{tail}.json")) + item.set_self_href("scripts/vabh/test_item.json") + item.save_object() # def create_item(aws_session: boto3.Session, bucket: str, tile: str) -> pystac.Item: From 77a0975f37e2fbcff557a88e0a51b5c17b3c16ce Mon Sep 17 00:00:00 2001 From: Sebastian Boeck Date: Mon, 29 Apr 2024 21:26:45 +0200 Subject: [PATCH 20/80] extra steps added to pick the right legend files, as they deviate from the naming convention --- clms_item_generator.ipynb | 114 ++++++++++++++++---------------------- 1 file changed, 48 insertions(+), 66 deletions(-) diff --git a/clms_item_generator.ipynb b/clms_item_generator.ipynb index b1911d5..4e5b6ce 100644 --- a/clms_item_generator.ipynb +++ b/clms_item_generator.ipynb @@ -28,7 +28,7 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -38,7 +38,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -65,7 +65,7 @@ }, { "cell_type": "code", - "execution_count": 41, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -107,25 +107,7 @@ }, { "cell_type": "code", - "execution_count": 38, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'Single Band Land Classification Europe'" - ] - }, - "execution_count": 38, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 4, + "execution_count": 5, "metadata": {}, "outputs": [ { @@ -134,7 +116,7 @@ "'2012'" ] }, - "execution_count": 4, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -148,7 +130,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -170,7 +152,7 @@ }, { "cell_type": "code", - "execution_count": 97, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -184,7 +166,7 @@ " 'U2018_CLC2012_V2020_20u1_FR_REU']" ] }, - "execution_count": 97, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -199,7 +181,7 @@ }, { "cell_type": "code", - "execution_count": 108, + "execution_count": 56, "metadata": {}, "outputs": [], "source": [ @@ -207,48 +189,29 @@ "\n", " clc_name_elements = deconstruct_clc_name(clc_name)\n", "\n", - " # clc_pattern = ('U{update_campaign}_{theme}{reference_year}_V{release_year}_'.format(**clc_name_elements),\n", - " # clc_name_elements['release_number'][:2] + '[0-9a-z]{0,2}',\n", - " # '_?({})'\n", - "\n", - " if clc_name_elements['DOM_code']:\n", - " allowed_dirs = ['DATA', 'French_DOMs', 'Metadata']\n", - " else:\n", - " allowed_dirs = ['DATA', 'Legend', 'Metadata']\n", - " \n", - " print(allowed_dirs)\n", - "\n", " asset_files = []\n", " \n", - " for root, dirs, files in os.walk(path, topdown=True):\n", - " [dirs.remove(d) for d in list(dirs) if d not in allowed_dirs]\n", + " for root, dirs, files in os.walk(path):\n", + " if not clc_name_elements['DOM_code'] and 'French_DOMs' in root:\n", + " continue\n", + " \n", + " if clc_name_elements['DOM_code'] and ('Legend' in root and not 'French_DOMs' in root):\n", + " continue\n", + " \n", " for file in files:\n", - " if file.startswith(clc_name + '.') or file.endswith(('.lyr', 'QGIS.txt',)):\n", + " if file.startswith(clc_name + '.') or file.endswith((f'{clc_name_elements[\"DOM_code\"]}.tif.lyr', 'QGIS.txt',)):\n", " asset_files.append(os.path.join(root, file))\n", "\n", " return(asset_files)\n", " \n", - "\n", - " with os.scandir(path) as it:\n", - " for entry in it:\n", - " if entry.is_file() and entry.name.startswith(clc_name) and not entry.name.endswith('.tif'):\n", - " asset_files.append(entry.name)\n", - " \n", - " tif_file_assets.append(asset_files)" + "\n" ] }, { "cell_type": "code", - "execution_count": 109, + "execution_count": 58, "metadata": {}, "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "['DATA', 'Legend', 'Metadata']\n" - ] - }, { "data": { "text/plain": [ @@ -264,20 +227,18 @@ " 'X:/EO/u2018_clc2012_v2020_20u1_raster100m\\\\Metadata\\\\U2018_CLC2012_V2020_20u1.xml']" ] }, - "execution_count": 109, + "execution_count": 58, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "# 'bo' in ['bi', 'ba', 'bo']\n", - "# tif_file_assets[3]\n", "get_asset_files(root, clc_name=clc_names[0])" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ @@ -351,7 +312,7 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ @@ -372,9 +333,30 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 12, "metadata": {}, - "outputs": [], + "outputs": [ + { + "ename": "TypeError", + "evalue": "Object of type builtin_function_or_method is not JSON serializable", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[1;32mIn[12], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m \u001b[43mitem\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msave_object\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdest_href\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mtestY.json\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m\n", + "File \u001b[1;32mc:\\Users\\boeck\\AppData\\Local\\miniforge3\\envs\\stacdev\\Lib\\site-packages\\pystac\\stac_object.py:477\u001b[0m, in \u001b[0;36mSTACObject.save_object\u001b[1;34m(self, include_self_link, dest_href, stac_io)\u001b[0m\n\u001b[0;32m 472\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m STACError(\n\u001b[0;32m 473\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mSelf HREF must be set before saving without an explicit dest_href.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 474\u001b[0m )\n\u001b[0;32m 475\u001b[0m dest_href \u001b[38;5;241m=\u001b[39m self_href\n\u001b[1;32m--> 477\u001b[0m \u001b[43mstac_io\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msave_json\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdest_href\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mto_dict\u001b[49m\u001b[43m(\u001b[49m\u001b[43minclude_self_link\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minclude_self_link\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[1;32mc:\\Users\\boeck\\AppData\\Local\\miniforge3\\envs\\stacdev\\Lib\\site-packages\\pystac\\stac_io.py:259\u001b[0m, in \u001b[0;36mStacIO.save_json\u001b[1;34m(self, dest, json_dict, *args, **kwargs)\u001b[0m\n\u001b[0;32m 239\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21msave_json\u001b[39m(\n\u001b[0;32m 240\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[0;32m 241\u001b[0m dest: HREF,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 244\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs: Any,\n\u001b[0;32m 245\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m 246\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Write a dict to the given URI as JSON.\u001b[39;00m\n\u001b[0;32m 247\u001b[0m \n\u001b[0;32m 248\u001b[0m \u001b[38;5;124;03m See :func:`StacIO.write_text ` for usage of\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 257\u001b[0m \u001b[38;5;124;03m :meth:`StacIO.json_dumps`.\u001b[39;00m\n\u001b[0;32m 258\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[1;32m--> 259\u001b[0m txt \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mjson_dumps\u001b[49m\u001b[43m(\u001b[49m\u001b[43mjson_dict\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 260\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mwrite_text(dest, txt)\n", + "File \u001b[1;32mc:\\Users\\boeck\\AppData\\Local\\miniforge3\\envs\\stacdev\\Lib\\site-packages\\pystac\\stac_io.py:131\u001b[0m, in \u001b[0;36mStacIO.json_dumps\u001b[1;34m(self, json_dict, *args, **kwargs)\u001b[0m\n\u001b[0;32m 127\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m orjson\u001b[38;5;241m.\u001b[39mdumps(json_dict, option\u001b[38;5;241m=\u001b[39morjson\u001b[38;5;241m.\u001b[39mOPT_INDENT_2, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\u001b[38;5;241m.\u001b[39mdecode(\n\u001b[0;32m 128\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mutf-8\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 129\u001b[0m )\n\u001b[0;32m 130\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m--> 131\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mjson\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdumps\u001b[49m\u001b[43m(\u001b[49m\u001b[43mjson_dict\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mindent\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m2\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[1;32mc:\\Users\\boeck\\AppData\\Local\\miniforge3\\envs\\stacdev\\Lib\\json\\__init__.py:238\u001b[0m, in \u001b[0;36mdumps\u001b[1;34m(obj, skipkeys, ensure_ascii, check_circular, allow_nan, cls, indent, separators, default, sort_keys, **kw)\u001b[0m\n\u001b[0;32m 232\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mcls\u001b[39m \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m 233\u001b[0m \u001b[38;5;28mcls\u001b[39m \u001b[38;5;241m=\u001b[39m JSONEncoder\n\u001b[0;32m 234\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mcls\u001b[39;49m\u001b[43m(\u001b[49m\n\u001b[0;32m 235\u001b[0m \u001b[43m \u001b[49m\u001b[43mskipkeys\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mskipkeys\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mensure_ascii\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mensure_ascii\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 236\u001b[0m \u001b[43m \u001b[49m\u001b[43mcheck_circular\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcheck_circular\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mallow_nan\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mallow_nan\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mindent\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mindent\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 237\u001b[0m \u001b[43m \u001b[49m\u001b[43mseparators\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mseparators\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdefault\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdefault\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msort_keys\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msort_keys\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m--> 238\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkw\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mencode\u001b[49m\u001b[43m(\u001b[49m\u001b[43mobj\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[1;32mc:\\Users\\boeck\\AppData\\Local\\miniforge3\\envs\\stacdev\\Lib\\json\\encoder.py:202\u001b[0m, in \u001b[0;36mJSONEncoder.encode\u001b[1;34m(self, o)\u001b[0m\n\u001b[0;32m 200\u001b[0m chunks \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39miterencode(o, _one_shot\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[0;32m 201\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(chunks, (\u001b[38;5;28mlist\u001b[39m, \u001b[38;5;28mtuple\u001b[39m)):\n\u001b[1;32m--> 202\u001b[0m chunks \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mlist\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mchunks\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 203\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;241m.\u001b[39mjoin(chunks)\n", + "File \u001b[1;32mc:\\Users\\boeck\\AppData\\Local\\miniforge3\\envs\\stacdev\\Lib\\json\\encoder.py:432\u001b[0m, in \u001b[0;36m_make_iterencode.._iterencode\u001b[1;34m(o, _current_indent_level)\u001b[0m\n\u001b[0;32m 430\u001b[0m \u001b[38;5;28;01myield from\u001b[39;00m _iterencode_list(o, _current_indent_level)\n\u001b[0;32m 431\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(o, \u001b[38;5;28mdict\u001b[39m):\n\u001b[1;32m--> 432\u001b[0m \u001b[38;5;28;01myield from\u001b[39;00m _iterencode_dict(o, _current_indent_level)\n\u001b[0;32m 433\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m 434\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m markers \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n", + "File \u001b[1;32mc:\\Users\\boeck\\AppData\\Local\\miniforge3\\envs\\stacdev\\Lib\\json\\encoder.py:406\u001b[0m, in \u001b[0;36m_make_iterencode.._iterencode_dict\u001b[1;34m(dct, _current_indent_level)\u001b[0m\n\u001b[0;32m 404\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m 405\u001b[0m chunks \u001b[38;5;241m=\u001b[39m _iterencode(value, _current_indent_level)\n\u001b[1;32m--> 406\u001b[0m \u001b[38;5;28;01myield from\u001b[39;00m chunks\n\u001b[0;32m 407\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m newline_indent \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m 408\u001b[0m _current_indent_level \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;241m1\u001b[39m\n", + "File \u001b[1;32mc:\\Users\\boeck\\AppData\\Local\\miniforge3\\envs\\stacdev\\Lib\\json\\encoder.py:439\u001b[0m, in \u001b[0;36m_make_iterencode.._iterencode\u001b[1;34m(o, _current_indent_level)\u001b[0m\n\u001b[0;32m 437\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCircular reference detected\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 438\u001b[0m markers[markerid] \u001b[38;5;241m=\u001b[39m o\n\u001b[1;32m--> 439\u001b[0m o \u001b[38;5;241m=\u001b[39m \u001b[43m_default\u001b[49m\u001b[43m(\u001b[49m\u001b[43mo\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 440\u001b[0m \u001b[38;5;28;01myield from\u001b[39;00m _iterencode(o, _current_indent_level)\n\u001b[0;32m 441\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m markers \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n", + "File \u001b[1;32mc:\\Users\\boeck\\AppData\\Local\\miniforge3\\envs\\stacdev\\Lib\\json\\encoder.py:180\u001b[0m, in \u001b[0;36mJSONEncoder.default\u001b[1;34m(self, o)\u001b[0m\n\u001b[0;32m 161\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mdefault\u001b[39m(\u001b[38;5;28mself\u001b[39m, o):\n\u001b[0;32m 162\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Implement this method in a subclass such that it returns\u001b[39;00m\n\u001b[0;32m 163\u001b[0m \u001b[38;5;124;03m a serializable object for ``o``, or calls the base implementation\u001b[39;00m\n\u001b[0;32m 164\u001b[0m \u001b[38;5;124;03m (to raise a ``TypeError``).\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 178\u001b[0m \n\u001b[0;32m 179\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[1;32m--> 180\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mObject of type \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mo\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__class__\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m \u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[0;32m 181\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mis not JSON serializable\u001b[39m\u001b[38;5;124m'\u001b[39m)\n", + "\u001b[1;31mTypeError\u001b[0m: Object of type builtin_function_or_method is not JSON serializable" + ] + } + ], "source": [ "item.save_object(dest_href='testY.json')" ] @@ -560,9 +542,9 @@ ], "metadata": { "kernelspec": { - "display_name": ".venv", + "display_name": "stacdev", "language": "python", - "name": "python3" + "name": "stacdev" }, "language_info": { "codemirror_mode": { @@ -574,7 +556,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.3" + "version": "3.12.3" } }, "nbformat": 4, From 30c43f66c42c53198e38a8d055d1ec7edd703a30 Mon Sep 17 00:00:00 2001 From: Sebastian Boeck Date: Mon, 29 Apr 2024 22:37:33 +0200 Subject: [PATCH 21/80] adds routine to create and add assets --- clms_item_generator.ipynb | 292 +++++++++++++++++++++++++++----------- 1 file changed, 211 insertions(+), 81 deletions(-) diff --git a/clms_item_generator.ipynb b/clms_item_generator.ipynb index 4e5b6ce..8c49d15 100644 --- a/clms_item_generator.ipynb +++ b/clms_item_generator.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": 102, "metadata": {}, "outputs": [], "source": [ @@ -10,9 +10,10 @@ "import re\n", "\n", "import pystac\n", - "from pystac.extensions.projection import ProjectionExtension\n", "import pystac.item\n", + "import pystac.link\n", "from pystac.provider import ProviderRole\n", + "from pystac.extensions.projection import ProjectionExtension\n", "\n", "from pyproj import Transformer\n", "from shapely.geometry import GeometryCollection, box, shape, mapping\n", @@ -28,7 +29,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 103, "metadata": {}, "outputs": [], "source": [ @@ -38,7 +39,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 104, "metadata": {}, "outputs": [], "source": [ @@ -53,26 +54,38 @@ "\n", " return(m.groupdict())\n", "\n", - "DOM_DICT = {\n", - " 'GLP': 'Guadeloupe',\n", - " 'GUF': 'French Guyana',\n", - " 'MTQ': 'Martinique',\n", - " 'MYT': 'Mayotte',\n", - " 'REU': 'Réunion',\n", - " '': 'Europe',\n", - "}\n" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "# label = DOM_DICT.get('')\n", "\n", - "def create_asset(filename: str, label: str):\n", + "def deconstruct_clc_name(filename: str):\n", + " p = re.compile('^(?P[A-Z0-9a-z_]*).(?P.*)$')\n", + " m = p.search(os.path.basename(filename))\n", + "\n", + " filename_split = m.groupdict()\n", + "\n", + " p = re.compile((\"U(?P[0-9]{4})_\"\n", + " \"(?PCLC|CHA)(?P[0-9]{4})_\"\n", + " \"V(?P[0-9]{4})_(?P[0-9a-z]*)\"\n", + " \"_?(?P[A-Z]*)?\"\n", + " \"_?(?P[A-Z]*)?\"))\n", + " m = p.search(filename_split['id'])\n", + " \n", + " return(m.groupdict() | filename_split)\n", + "\n", "\n", + "def create_asset(filename: str): \n", + " filename_elements = deconstruct_clc_name(filename)\n", + " suffix = filename_elements['suffix'].replace('.', '_')\n", + "\n", + " DOM_DICT = {\n", + " 'GLP': 'Guadeloupe',\n", + " 'GUF': 'French Guyana',\n", + " 'MTQ': 'Martinique',\n", + " 'MYT': 'Mayotte',\n", + " 'REU': 'Réunion',\n", + " '': 'Europe',\n", + " }\n", + " \n", + " label = DOM_DICT.get(filename_elements['DOM_code'])\n", + " \n", " MEDIA_TYPE_DICT = {\n", " 'tif': pystac.MediaType.COG,\n", " 'tif_xml': pystac.MediaType.XML,\n", @@ -81,9 +94,9 @@ " 'vat_dbf': 'application/dbf',\n", " 'txt': pystac.MediaType.TEXT,\n", " 'lyr': 'image/tiff; application=geotiff; profile=layer',\n", + " 'tfw': pystac.MediaType.TEXT,\n", " }\n", "\n", - "\n", " TITLE_DICT = {\n", " 'tif': f'Single Band Land Classification {label}',\n", " 'tif_xml': f'TIFF Metadata {label}',\n", @@ -91,7 +104,8 @@ " 'vat_cpg': f'Encoding {label}',\n", " 'vat_dbf': f'Database {label}',\n", " 'txt': f'Legends {label}',\n", - " 'lyr': f'LEgend Layer {label}',\n", + " 'lyr': f'Legend Layer {label}',\n", + " 'tfw': f'World File {label}',\n", " }\n", "\n", " ROLES_DICT = {\n", @@ -102,28 +116,162 @@ " 'vat_dbf': ['metadata'],\n", " 'txt': ['metadata'],\n", " 'lyr': ['metadata'],\n", - " }" + " 'tfw': ['metadata'],\n", + " }\n", + "\n", + " asset = pystac.Asset(href=filename, title=TITLE_DICT[suffix], media_type=MEDIA_TYPE_DICT[suffix], roles=ROLES_DICT[suffix])\n", + " return(f\"{filename_elements['id']}_{suffix}\", asset)\n", + "\n", + "def get_tif_files(path: str): \n", + " tif_files=[]\n", + " for root, dirs, files in os.walk(path):\n", + " if root.endswith(('DATA', 'French_DOMs')):\n", + " for file in files:\n", + " if file.endswith('.tif'):\n", + " tif_files.append(os.path.join(root, file))\n", + "\n", + " return(tif_files)\n", + "\n", + "def extract_clc_name(path: str):\n", + " clc_name = os.path.basename(path).split('.')[0]\n", + " return(clc_name)\n" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 101, "metadata": {}, "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "
\n", + "
\n", + "
    \n", + " \n", + " \n", + " \n", + "
  • \n", + " href\n", + " \"X:/EO/u2018_clc2012_v2020_20u1_raster100m/DATA/French_DOMs/U2018_CLC2012_V2020_20u1_FR_GLP.tfw\"\n", + "
  • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
  • \n", + " type\n", + " \"text/plain\"\n", + "
  • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
  • \n", + " title\n", + " \"World File Guadeloupe\"\n", + "
  • \n", + " \n", + " \n", + " \n", + " \n", + "
  • \n", + " roles[] 1 items\n", + " \n", + "
      \n", + " \n", + " \n", + " \n", + "
    • \n", + " 0\n", + " \"metadata\"\n", + "
    • \n", + " \n", + " \n", + " \n", + "
    \n", + " \n", + "
  • \n", + " \n", + " \n", + "
\n", + "
\n", + "
" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 101, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "key, asset = create_asset(asset_files[0])\n", + "asset" + ] + }, + { + "cell_type": "code", + "execution_count": 84, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'update_campaign': '2018', 'theme': 'CLC', 'reference_year': '2012', 'release_year': '2020', 'release_number': '20u1', 'country_code': '', 'DOM_code': '', 'id': 'U2018_CLC2012_V2020_20u1', 'suffix': 'tif'}\n" + ] + }, { "data": { "text/plain": [ "'2012'" ] }, - "execution_count": 5, + "execution_count": 84, "metadata": {}, "output_type": "execute_result" } ], "source": [ "file_metadata = deconstruct_clc_name(img_path)\n", - "\n", + "print(file_metadata)\n", "year = file_metadata.get('reference_year')\n", "year" ] @@ -134,20 +282,7 @@ "metadata": {}, "outputs": [], "source": [ - "def get_tif_files(path: str): \n", - " tif_files=[]\n", - " for root, dirs, files in os.walk(path):\n", - " if root.endswith(('DATA', 'French_DOMs')):\n", - " for file in files:\n", - " if file.endswith('.tif'):\n", - " tif_files.append(os.path.join(root, file))\n", - "\n", - " return(tif_files)\n", - "\n", - "\n", - "def extract_clc_name(path: str):\n", - " clc_name = os.path.basename(path).split('.')[0]\n", - " return(clc_name)\n" + "\n" ] }, { @@ -203,37 +338,16 @@ " asset_files.append(os.path.join(root, file))\n", "\n", " return(asset_files)\n", - " \n", - "\n" + " " ] }, { "cell_type": "code", - "execution_count": 58, + "execution_count": 66, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['X:/EO/u2018_clc2012_v2020_20u1_raster100m\\\\DATA\\\\U2018_CLC2012_V2020_20u1.tfw',\n", - " 'X:/EO/u2018_clc2012_v2020_20u1_raster100m\\\\DATA\\\\U2018_CLC2012_V2020_20u1.tif',\n", - " 'X:/EO/u2018_clc2012_v2020_20u1_raster100m\\\\DATA\\\\U2018_CLC2012_V2020_20u1.tif.aux.xml',\n", - " 'X:/EO/u2018_clc2012_v2020_20u1_raster100m\\\\DATA\\\\U2018_CLC2012_V2020_20u1.tif.ovr',\n", - " 'X:/EO/u2018_clc2012_v2020_20u1_raster100m\\\\DATA\\\\U2018_CLC2012_V2020_20u1.tif.vat.cpg',\n", - " 'X:/EO/u2018_clc2012_v2020_20u1_raster100m\\\\DATA\\\\U2018_CLC2012_V2020_20u1.tif.vat.dbf',\n", - " 'X:/EO/u2018_clc2012_v2020_20u1_raster100m\\\\DATA\\\\U2018_CLC2012_V2020_20u1.tif.xml',\n", - " 'X:/EO/u2018_clc2012_v2020_20u1_raster100m\\\\Legend\\\\CLC2018_CLC2012_V2018_20.tif.lyr',\n", - " 'X:/EO/u2018_clc2012_v2020_20u1_raster100m\\\\Legend\\\\CLC2018_CLC2012_V2018_20_QGIS.txt',\n", - " 'X:/EO/u2018_clc2012_v2020_20u1_raster100m\\\\Metadata\\\\U2018_CLC2012_V2020_20u1.xml']" - ] - }, - "execution_count": 58, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "get_asset_files(root, clc_name=clc_names[0])" + "asset_files = get_asset_files(root, clc_name=clc_names[1])" ] }, { @@ -242,8 +356,6 @@ "metadata": {}, "outputs": [], "source": [ - "\n", - "\n", "CLC_PROVIDER = pystac.Provider(\n", " name='Copernicus Land Monitoring Service',\n", " description=('The Copernicus Land Monitoring Service provides '\n", @@ -304,20 +416,23 @@ "\n", "item = pystac.Item(**params)\n", "\n", - "item.add_asset(\n", - " key='image',\n", - " asset=pystac.Asset(href=img_path, title='Geotiff', media_type=pystac.MediaType.GEOTIFF),\n", - ")\n" + "for asset_file in asset_files:\n", + " key, asset = create_asset(asset_file)\n", + " item.add_asset(\n", + " key=key,\n", + " asset=asset,\n", + " )\n", + "\n", + "\n" ] }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 60, "metadata": {}, "outputs": [], "source": [ - "from pystac.extensions.projection import ProjectionExtension\n", - "import pystac.link\n", + "\n", "\n", "proj_ext = ProjectionExtension.ext(item.assets['image'], add_if_missing=True)\n", "\n", @@ -333,7 +448,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 61, "metadata": {}, "outputs": [ { @@ -343,7 +458,7 @@ "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[1;32mIn[12], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m \u001b[43mitem\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msave_object\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdest_href\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mtestY.json\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m\n", + "Cell \u001b[1;32mIn[61], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m \u001b[43mitem\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msave_object\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdest_href\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mtestY.json\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m\n", "File \u001b[1;32mc:\\Users\\boeck\\AppData\\Local\\miniforge3\\envs\\stacdev\\Lib\\site-packages\\pystac\\stac_object.py:477\u001b[0m, in \u001b[0;36mSTACObject.save_object\u001b[1;34m(self, include_self_link, dest_href, stac_io)\u001b[0m\n\u001b[0;32m 472\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m STACError(\n\u001b[0;32m 473\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mSelf HREF must be set before saving without an explicit dest_href.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 474\u001b[0m )\n\u001b[0;32m 475\u001b[0m dest_href \u001b[38;5;241m=\u001b[39m self_href\n\u001b[1;32m--> 477\u001b[0m \u001b[43mstac_io\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msave_json\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdest_href\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mto_dict\u001b[49m\u001b[43m(\u001b[49m\u001b[43minclude_self_link\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minclude_self_link\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[1;32mc:\\Users\\boeck\\AppData\\Local\\miniforge3\\envs\\stacdev\\Lib\\site-packages\\pystac\\stac_io.py:259\u001b[0m, in \u001b[0;36mStacIO.save_json\u001b[1;34m(self, dest, json_dict, *args, **kwargs)\u001b[0m\n\u001b[0;32m 239\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21msave_json\u001b[39m(\n\u001b[0;32m 240\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[0;32m 241\u001b[0m dest: HREF,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 244\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs: Any,\n\u001b[0;32m 245\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m 246\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Write a dict to the given URI as JSON.\u001b[39;00m\n\u001b[0;32m 247\u001b[0m \n\u001b[0;32m 248\u001b[0m \u001b[38;5;124;03m See :func:`StacIO.write_text ` for usage of\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 257\u001b[0m \u001b[38;5;124;03m :meth:`StacIO.json_dumps`.\u001b[39;00m\n\u001b[0;32m 258\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[1;32m--> 259\u001b[0m txt \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mjson_dumps\u001b[49m\u001b[43m(\u001b[49m\u001b[43mjson_dict\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 260\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mwrite_text(dest, txt)\n", "File \u001b[1;32mc:\\Users\\boeck\\AppData\\Local\\miniforge3\\envs\\stacdev\\Lib\\site-packages\\pystac\\stac_io.py:131\u001b[0m, in \u001b[0;36mStacIO.json_dumps\u001b[1;34m(self, json_dict, *args, **kwargs)\u001b[0m\n\u001b[0;32m 127\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m orjson\u001b[38;5;241m.\u001b[39mdumps(json_dict, option\u001b[38;5;241m=\u001b[39morjson\u001b[38;5;241m.\u001b[39mOPT_INDENT_2, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\u001b[38;5;241m.\u001b[39mdecode(\n\u001b[0;32m 128\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mutf-8\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 129\u001b[0m )\n\u001b[0;32m 130\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m--> 131\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mjson\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdumps\u001b[49m\u001b[43m(\u001b[49m\u001b[43mjson_dict\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mindent\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m2\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", @@ -383,7 +498,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 62, "metadata": {}, "outputs": [], "source": [ @@ -435,9 +550,17 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 63, "metadata": {}, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\boeck\\AppData\\Local\\Temp\\ipykernel_9832\\556696271.py:24: DeprecationWarning: Testing an element's truth value will raise an exception in future versions. Use specific 'len(elem)' or 'elem is not None' test instead.\n", + " if element:\n" + ] + }, { "data": { "text/plain": [ @@ -528,7 +651,7 @@ " 'ORIGINAL_URL': '../CLC_samples/U2018_CLC2018_V2020_20u1.xml'}" ] }, - "execution_count": 18, + "execution_count": 63, "metadata": {}, "output_type": "execute_result" } @@ -538,6 +661,13 @@ "\n", "get_metadata(xml_path)\n" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { From bc74ab1be4ac14a14c5b0f5deeecf85d783460a6 Mon Sep 17 00:00:00 2001 From: Sebastian Boeck Date: Mon, 29 Apr 2024 23:36:05 +0200 Subject: [PATCH 22/80] various fixes in create_assets --- clms_item_generator.ipynb | 1805 ++++++++++++++++++++++++++++++++----- 1 file changed, 1568 insertions(+), 237 deletions(-) diff --git a/clms_item_generator.ipynb b/clms_item_generator.ipynb index 8c49d15..f0a09e4 100644 --- a/clms_item_generator.ipynb +++ b/clms_item_generator.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 102, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -29,17 +29,17 @@ }, { "cell_type": "code", - "execution_count": 103, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ - "img_path = 'X:/EO/u2018_clc2012_v2020_20u1_raster100m/DATA/U2018_CLC2012_V2020_20u1.tif'\n", + "# img_path = 'X:/EO/u2018_clc2012_v2020_20u1_raster100m/DATA/U2018_CLC2012_V2020_20u1.tif'\n", "# os.path.split(img_path)" ] }, { "cell_type": "code", - "execution_count": 104, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ @@ -68,13 +68,16 @@ " \"_?(?P[A-Z]*)?\"))\n", " m = p.search(filename_split['id'])\n", " \n", - " return(m.groupdict() | filename_split)\n", + " if m:\n", + " return(m.groupdict() | filename_split)\n", + " else:\n", + " return(filename_split)\n", "\n", "\n", - "def create_asset(filename: str): \n", + "def create_asset(filename: str, DOM_code: str):\n", " filename_elements = deconstruct_clc_name(filename)\n", " suffix = filename_elements['suffix'].replace('.', '_')\n", - "\n", + " \n", " DOM_DICT = {\n", " 'GLP': 'Guadeloupe',\n", " 'GUF': 'French Guyana',\n", @@ -84,171 +87,136 @@ " '': 'Europe',\n", " }\n", " \n", - " label = DOM_DICT.get(filename_elements['DOM_code'])\n", - " \n", + "\n", " MEDIA_TYPE_DICT = {\n", " 'tif': pystac.MediaType.COG,\n", " 'tif_xml': pystac.MediaType.XML,\n", + " 'tif_aux_xml': pystac.MediaType.XML,\n", " 'tif_ovr': 'image/tiff; application=geotiff; profile=pyramid',\n", - " 'vat_cpg': pystac.MediaType.TEXT,\n", - " 'vat_dbf': 'application/dbf',\n", + " 'tif_vat_cpg': pystac.MediaType.TEXT,\n", + " 'tif_vat_dbf': 'application/dbf',\n", " 'txt': pystac.MediaType.TEXT,\n", - " 'lyr': 'image/tiff; application=geotiff; profile=layer',\n", + " 'tif_lyr': 'image/tiff; application=geotiff; profile=layer',\n", " 'tfw': pystac.MediaType.TEXT,\n", + " 'xml': pystac.MediaType.XML,\n", " }\n", - "\n", + " \n", + " label = DOM_DICT[DOM_code]\n", + " \n", " TITLE_DICT = {\n", " 'tif': f'Single Band Land Classification {label}',\n", " 'tif_xml': f'TIFF Metadata {label}',\n", + " 'tif_aux_xml': f'TIFF Statistics {label}',\n", " 'tif_ovr': f'Pyramid {label}',\n", - " 'vat_cpg': f'Encoding {label}',\n", - " 'vat_dbf': f'Database {label}',\n", + " 'tif_vat_cpg': f'Encoding {label}',\n", + " 'tif_vat_dbf': f'Database {label}',\n", " 'txt': f'Legends {label}',\n", - " 'lyr': f'Legend Layer {label}',\n", + " 'tif_lyr': f'Legend Layer {label}',\n", " 'tfw': f'World File {label}',\n", + " 'xml': f'Single Band Land Classification Metadata {label}',\n", " }\n", "\n", " ROLES_DICT = {\n", " 'tif': ['data', 'visual'],\n", " 'tif_xml': ['metadata'],\n", + " 'tif_aux_xml': ['metadata'],\n", " 'tif_ovr': ['metadata'],\n", - " 'vat_cpg': ['metadata'],\n", - " 'vat_dbf': ['metadata'],\n", + " 'tif_vat_cpg': ['metadata'],\n", + " 'tif_vat_dbf': ['metadata'],\n", " 'txt': ['metadata'],\n", - " 'lyr': ['metadata'],\n", + " 'tif_lyr': ['metadata'],\n", " 'tfw': ['metadata'],\n", + " 'xml': ['metadata'],\n", " }\n", "\n", " asset = pystac.Asset(href=filename, title=TITLE_DICT[suffix], media_type=MEDIA_TYPE_DICT[suffix], roles=ROLES_DICT[suffix])\n", " return(f\"{filename_elements['id']}_{suffix}\", asset)\n", "\n", - "def get_tif_files(path: str): \n", - " tif_files=[]\n", + "def get_img_paths(path: str): \n", + " img_paths=[]\n", " for root, dirs, files in os.walk(path):\n", " if root.endswith(('DATA', 'French_DOMs')):\n", " for file in files:\n", " if file.endswith('.tif'):\n", - " tif_files.append(os.path.join(root, file))\n", + " img_paths.append(os.path.join(root, file))\n", + "\n", + " return(img_paths)\n", + "\n", + "# Not needed anymore, part of deconstruct_clc_name...\n", + "# def extract_clc_name(path: str):\n", + "# clc_name = os.path.basename(path).split('.')[0]\n", + "# return(clc_name)\n", + "\n", + "def get_asset_files(path, clc_name):\n", + "\n", + " clc_name_elements = deconstruct_clc_name(clc_name)\n", + "\n", + " asset_files = []\n", + " \n", + " for root, dirs, files in os.walk(path):\n", + " if not clc_name_elements['DOM_code'] and 'French_DOMs' in root:\n", + " continue\n", + " \n", + " if clc_name_elements['DOM_code'] and ('Legend' in root and not 'French_DOMs' in root):\n", + " continue\n", + " \n", + " for file in files:\n", + " if file.startswith(clc_name + '.') or file.endswith((f'{clc_name_elements[\"DOM_code\"]}.tif.lyr', 'QGIS.txt',)):\n", + " asset_files.append(os.path.join(root, file))\n", "\n", - " return(tif_files)\n", + " return(asset_files)\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "root = 'X:/EO/u2018_clc2012_v2020_20u1_raster100m'\n", "\n", - "def extract_clc_name(path: str):\n", - " clc_name = os.path.basename(path).split('.')[0]\n", - " return(clc_name)\n" + "img_paths = get_img_paths(path=root)\n", + "img_path = img_paths[0]" ] }, { "cell_type": "code", - "execution_count": 101, + "execution_count": 11, "metadata": {}, "outputs": [ { "data": { - "text/html": [ - "\n", - "\n", - "\n", - "
\n", - "
\n", - "
    \n", - " \n", - " \n", - " \n", - "
  • \n", - " href\n", - " \"X:/EO/u2018_clc2012_v2020_20u1_raster100m/DATA/French_DOMs/U2018_CLC2012_V2020_20u1_FR_GLP.tfw\"\n", - "
  • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
  • \n", - " type\n", - " \"text/plain\"\n", - "
  • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
  • \n", - " title\n", - " \"World File Guadeloupe\"\n", - "
  • \n", - " \n", - " \n", - " \n", - " \n", - "
  • \n", - " roles[] 1 items\n", - " \n", - "
      \n", - " \n", - " \n", - " \n", - "
    • \n", - " 0\n", - " \"metadata\"\n", - "
    • \n", - " \n", - " \n", - " \n", - "
    \n", - " \n", - "
  • \n", - " \n", - " \n", - "
\n", - "
\n", - "
" - ], "text/plain": [ - "" + "['X:/EO/u2018_clc2012_v2020_20u1_raster100m\\\\DATA\\\\U2018_CLC2012_V2020_20u1.tfw',\n", + " 'X:/EO/u2018_clc2012_v2020_20u1_raster100m\\\\DATA\\\\U2018_CLC2012_V2020_20u1.tif',\n", + " 'X:/EO/u2018_clc2012_v2020_20u1_raster100m\\\\DATA\\\\U2018_CLC2012_V2020_20u1.tif.aux.xml',\n", + " 'X:/EO/u2018_clc2012_v2020_20u1_raster100m\\\\DATA\\\\U2018_CLC2012_V2020_20u1.tif.ovr',\n", + " 'X:/EO/u2018_clc2012_v2020_20u1_raster100m\\\\DATA\\\\U2018_CLC2012_V2020_20u1.tif.vat.cpg',\n", + " 'X:/EO/u2018_clc2012_v2020_20u1_raster100m\\\\DATA\\\\U2018_CLC2012_V2020_20u1.tif.vat.dbf',\n", + " 'X:/EO/u2018_clc2012_v2020_20u1_raster100m\\\\DATA\\\\U2018_CLC2012_V2020_20u1.tif.xml',\n", + " 'X:/EO/u2018_clc2012_v2020_20u1_raster100m\\\\Legend\\\\CLC2018_CLC2012_V2018_20.tif.lyr',\n", + " 'X:/EO/u2018_clc2012_v2020_20u1_raster100m\\\\Legend\\\\CLC2018_CLC2012_V2018_20_QGIS.txt',\n", + " 'X:/EO/u2018_clc2012_v2020_20u1_raster100m\\\\Metadata\\\\U2018_CLC2012_V2020_20u1.xml']" ] }, - "execution_count": 101, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "key, asset = create_asset(asset_files[0])\n", - "asset" + "clc_name_elements = deconstruct_clc_name(img_path)\n", + "\n", + "asset_files = get_asset_files(root, clc_name=clc_name_elements['id'])\n", + "\n", + "asset_files = [f for f in asset_files if not f.endswith('aux')]\n", + "asset_files" ] }, { "cell_type": "code", - "execution_count": 84, + "execution_count": 12, "metadata": {}, "outputs": [ { @@ -257,106 +225,38 @@ "text": [ "{'update_campaign': '2018', 'theme': 'CLC', 'reference_year': '2012', 'release_year': '2020', 'release_number': '20u1', 'country_code': '', 'DOM_code': '', 'id': 'U2018_CLC2012_V2020_20u1', 'suffix': 'tif'}\n" ] - }, - { - "data": { - "text/plain": [ - "'2012'" - ] - }, - "execution_count": 84, - "metadata": {}, - "output_type": "execute_result" } ], "source": [ - "file_metadata = deconstruct_clc_name(img_path)\n", - "print(file_metadata)\n", - "year = file_metadata.get('reference_year')\n", - "year" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "\n" + "print(clc_name_elements)" ] }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 28, "metadata": {}, "outputs": [ { - "data": { - "text/plain": [ - "['U2018_CLC2012_V2020_20u1',\n", - " 'U2018_CLC2012_V2020_20u1_FR_GLP',\n", - " 'U2018_CLC2012_V2020_20u1_FR_GUF',\n", - " 'U2018_CLC2012_V2020_20u1_FR_MTQ',\n", - " 'U2018_CLC2012_V2020_20u1_FR_MYT',\n", - " 'U2018_CLC2012_V2020_20u1_FR_REU']" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + "X:/EO/u2018_clc2012_v2020_20u1_raster100m\\DATA\\U2018_CLC2012_V2020_20u1.tfw\n", + "X:/EO/u2018_clc2012_v2020_20u1_raster100m\\DATA\\U2018_CLC2012_V2020_20u1.tif\n", + "X:/EO/u2018_clc2012_v2020_20u1_raster100m\\DATA\\U2018_CLC2012_V2020_20u1.tif.aux.xml\n", + "X:/EO/u2018_clc2012_v2020_20u1_raster100m\\DATA\\U2018_CLC2012_V2020_20u1.tif.ovr\n", + "X:/EO/u2018_clc2012_v2020_20u1_raster100m\\DATA\\U2018_CLC2012_V2020_20u1.tif.vat.cpg\n", + "X:/EO/u2018_clc2012_v2020_20u1_raster100m\\DATA\\U2018_CLC2012_V2020_20u1.tif.vat.dbf\n", + "X:/EO/u2018_clc2012_v2020_20u1_raster100m\\DATA\\U2018_CLC2012_V2020_20u1.tif.xml\n", + "X:/EO/u2018_clc2012_v2020_20u1_raster100m\\Legend\\CLC2018_CLC2012_V2018_20.tif.lyr\n", + "X:/EO/u2018_clc2012_v2020_20u1_raster100m\\Legend\\CLC2018_CLC2012_V2018_20_QGIS.txt\n", + "X:/EO/u2018_clc2012_v2020_20u1_raster100m\\Metadata\\U2018_CLC2012_V2020_20u1.xml\n" + ] } ], "source": [ - "root = 'X:/EO/u2018_clc2012_v2020_20u1_raster100m'\n", - "\n", - "tif_files = get_tif_files(path=root)\n", - "clc_names = [extract_clc_name(f) for f in tif_files]\n", - "clc_names" - ] - }, - { - "cell_type": "code", - "execution_count": 56, - "metadata": {}, - "outputs": [], - "source": [ - "def get_asset_files(path, clc_name):\n", - "\n", - " clc_name_elements = deconstruct_clc_name(clc_name)\n", - "\n", - " asset_files = []\n", - " \n", - " for root, dirs, files in os.walk(path):\n", - " if not clc_name_elements['DOM_code'] and 'French_DOMs' in root:\n", - " continue\n", - " \n", - " if clc_name_elements['DOM_code'] and ('Legend' in root and not 'French_DOMs' in root):\n", - " continue\n", - " \n", - " for file in files:\n", - " if file.startswith(clc_name + '.') or file.endswith((f'{clc_name_elements[\"DOM_code\"]}.tif.lyr', 'QGIS.txt',)):\n", - " asset_files.append(os.path.join(root, file))\n", + "year = clc_name_elements.get('reference_year')\n", "\n", - " return(asset_files)\n", - " " - ] - }, - { - "cell_type": "code", - "execution_count": 66, - "metadata": {}, - "outputs": [], - "source": [ - "asset_files = get_asset_files(root, clc_name=clc_names[1])" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [], - "source": [ - "CLC_PROVIDER = pystac.Provider(\n", + "CLC_PROVIDER = pystac.provider.Provider(\n", " name='Copernicus Land Monitoring Service',\n", " description=('The Copernicus Land Monitoring Service provides '\n", " 'geographical information on land cover and its '\n", @@ -365,7 +265,7 @@ " 'a broad range of users in Europe and across the World '\n", " 'in the field of environmental terrestrial applications.'),\n", " roles=[ProviderRole.LICENSOR, ProviderRole.HOST],\n", - " url= 'https://land.copernicus.eu'\n", + " url='https://land.copernicus.eu'\n", ")\n", "\n", "props = {'description': (f'Corine Land Cover {year} (CLC{year}) is one of the Corine Land Cover (CLC) ' \n", @@ -396,16 +296,15 @@ " f'CLC class descriptions can be found at '\n", " f'https://land.copernicus.eu/user-corner/technical-library/corine-land-cover-nomenclature-guidelines/html/.'),\n", " 'created': None,\n", - " 'providers': CLC_PROVIDER\n", + " 'providers': CLC_PROVIDER.to_dict(),\n", "}\n", "\n", "\n", - "\n", "with rio.open(img_path) as img:\n", "\n", " bbox = rio.warp.transform_bounds(img.crs, rio.crs.CRS.from_epsg(4326), *img.bounds)\n", " params = {\n", - " 'id': id,\n", + " 'id': clc_name_elements.get('id'),\n", " 'bbox': bbox,\n", " 'geometry': mapping(box(*bbox)),\n", " 'datetime': None,\n", @@ -416,25 +315,53 @@ "\n", "item = pystac.Item(**params)\n", "\n", + "\n", + "\n", "for asset_file in asset_files:\n", - " key, asset = create_asset(asset_file)\n", + " print(asset_file)\n", + " key, asset = create_asset(asset_file, DOM_code=clc_name_elements.get('DOM_code'))\n", " item.add_asset(\n", " key=key,\n", " asset=asset,\n", " )\n", - "\n", "\n" ] }, { "cell_type": "code", - "execution_count": 60, + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'description': 'Corine Land Cover 2012 (CLC2012) is one of the Corine Land Cover (CLC) datasets produced within the frame the Copernicus Land Monitoring Service referring to land cover / land use status of year 2012. CLC service has a long-time heritage (formerly known as \"CORINE Land Cover Programme\"), coordinated by the European Environment Agency (EEA). It provides consistent and thematically detailed information on land cover and land cover changes across Europe. CLC datasets are based on the classification of satellite images produced by the national teams of the participating countries - the EEA members and cooperating countries (EEA39). National CLC inventories are then further integrated into a seamless land cover map of Europe. The resulting European database relies on standard methodology and nomenclature with following base parameters: 44 classes in the hierarchical 3-level CLC nomenclature; minimum mapping unit (MMU) for status layers is 25 hectares; minimum width of linear elements is 100 metres. Change layers have higher resolution, i.e. minimum mapping unit (MMU) is 5 hectares for Land Cover Changes (LCC), and the minimum width of linear elements is 100 metres. The CLC service delivers important data sets supporting the implementation of key priority areas of the Environment Action Programmes of the European Union as e.g. protecting ecosystems, halting the loss of biological diversity, tracking the impacts of climate change, monitoring urban land take, assessing developments in agriculture or dealing with water resources directives. CLC belongs to the Pan-European component of the Copernicus Land Monitoring Service (https://land.copernicus.eu/), part of the European Copernicus Programme coordinated by the European Environment Agency, providing environmental information from a combination of air- and space-based observation systems and in-situ monitoring. Additional information about CLC product description including mapping guides can be found at https://land.copernicus.eu/user-corner/technical-library/. CLC class descriptions can be found at https://land.copernicus.eu/user-corner/technical-library/corine-land-cover-nomenclature-guidelines/html/.',\n", + " 'created': None,\n", + " 'providers': {'name': 'Copernicus Land Monitoring Service',\n", + " 'description': \"The Copernicus Land Monitoring Service provides geographical information on land cover and its changes, land use, ground motions, vegetation state, water cycle and Earth's surface energy variables to a broad range of users in Europe and across the World in the field of environmental terrestrial applications.\",\n", + " 'roles': [licensor, host],\n", + " 'url': 'https://land.copernicus.eu'},\n", + " 'start_datetime': '2012-01-01T00:00:00Z',\n", + " 'end_datetime': '2012-12-31T00:00:00Z'}" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "asset_file\n", + "props" + ] + }, + { + "cell_type": "code", + "execution_count": 30, "metadata": {}, "outputs": [], "source": [ - "\n", - "\n", - "proj_ext = ProjectionExtension.ext(item.assets['image'], add_if_missing=True)\n", + "proj_ext = ProjectionExtension.ext(item.assets[os.path.basename(img_path).replace('.', '_')], add_if_missing=True)\n", "\n", "proj_ext.apply(epsg=rio.crs.CRS(img.crs).to_epsg(),\n", " bbox=img.bounds,\n", @@ -448,30 +375,1434 @@ }, { "cell_type": "code", - "execution_count": 61, + "execution_count": 31, "metadata": {}, "outputs": [ { - "ename": "TypeError", - "evalue": "Object of type builtin_function_or_method is not JSON serializable", - "output_type": "error", - "traceback": [ - "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[1;32mIn[61], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m \u001b[43mitem\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msave_object\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdest_href\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mtestY.json\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m\n", - "File \u001b[1;32mc:\\Users\\boeck\\AppData\\Local\\miniforge3\\envs\\stacdev\\Lib\\site-packages\\pystac\\stac_object.py:477\u001b[0m, in \u001b[0;36mSTACObject.save_object\u001b[1;34m(self, include_self_link, dest_href, stac_io)\u001b[0m\n\u001b[0;32m 472\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m STACError(\n\u001b[0;32m 473\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mSelf HREF must be set before saving without an explicit dest_href.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 474\u001b[0m )\n\u001b[0;32m 475\u001b[0m dest_href \u001b[38;5;241m=\u001b[39m self_href\n\u001b[1;32m--> 477\u001b[0m \u001b[43mstac_io\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msave_json\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdest_href\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mto_dict\u001b[49m\u001b[43m(\u001b[49m\u001b[43minclude_self_link\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minclude_self_link\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[1;32mc:\\Users\\boeck\\AppData\\Local\\miniforge3\\envs\\stacdev\\Lib\\site-packages\\pystac\\stac_io.py:259\u001b[0m, in \u001b[0;36mStacIO.save_json\u001b[1;34m(self, dest, json_dict, *args, **kwargs)\u001b[0m\n\u001b[0;32m 239\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21msave_json\u001b[39m(\n\u001b[0;32m 240\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[0;32m 241\u001b[0m dest: HREF,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 244\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs: Any,\n\u001b[0;32m 245\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m 246\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Write a dict to the given URI as JSON.\u001b[39;00m\n\u001b[0;32m 247\u001b[0m \n\u001b[0;32m 248\u001b[0m \u001b[38;5;124;03m See :func:`StacIO.write_text ` for usage of\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 257\u001b[0m \u001b[38;5;124;03m :meth:`StacIO.json_dumps`.\u001b[39;00m\n\u001b[0;32m 258\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[1;32m--> 259\u001b[0m txt \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mjson_dumps\u001b[49m\u001b[43m(\u001b[49m\u001b[43mjson_dict\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 260\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mwrite_text(dest, txt)\n", - "File \u001b[1;32mc:\\Users\\boeck\\AppData\\Local\\miniforge3\\envs\\stacdev\\Lib\\site-packages\\pystac\\stac_io.py:131\u001b[0m, in \u001b[0;36mStacIO.json_dumps\u001b[1;34m(self, json_dict, *args, **kwargs)\u001b[0m\n\u001b[0;32m 127\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m orjson\u001b[38;5;241m.\u001b[39mdumps(json_dict, option\u001b[38;5;241m=\u001b[39morjson\u001b[38;5;241m.\u001b[39mOPT_INDENT_2, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\u001b[38;5;241m.\u001b[39mdecode(\n\u001b[0;32m 128\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mutf-8\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 129\u001b[0m )\n\u001b[0;32m 130\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m--> 131\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mjson\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdumps\u001b[49m\u001b[43m(\u001b[49m\u001b[43mjson_dict\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mindent\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m2\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[1;32mc:\\Users\\boeck\\AppData\\Local\\miniforge3\\envs\\stacdev\\Lib\\json\\__init__.py:238\u001b[0m, in \u001b[0;36mdumps\u001b[1;34m(obj, skipkeys, ensure_ascii, check_circular, allow_nan, cls, indent, separators, default, sort_keys, **kw)\u001b[0m\n\u001b[0;32m 232\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mcls\u001b[39m \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m 233\u001b[0m \u001b[38;5;28mcls\u001b[39m \u001b[38;5;241m=\u001b[39m JSONEncoder\n\u001b[0;32m 234\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mcls\u001b[39;49m\u001b[43m(\u001b[49m\n\u001b[0;32m 235\u001b[0m \u001b[43m \u001b[49m\u001b[43mskipkeys\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mskipkeys\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mensure_ascii\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mensure_ascii\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 236\u001b[0m \u001b[43m \u001b[49m\u001b[43mcheck_circular\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcheck_circular\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mallow_nan\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mallow_nan\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mindent\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mindent\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 237\u001b[0m \u001b[43m \u001b[49m\u001b[43mseparators\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mseparators\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdefault\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdefault\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msort_keys\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msort_keys\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m--> 238\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkw\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mencode\u001b[49m\u001b[43m(\u001b[49m\u001b[43mobj\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[1;32mc:\\Users\\boeck\\AppData\\Local\\miniforge3\\envs\\stacdev\\Lib\\json\\encoder.py:202\u001b[0m, in \u001b[0;36mJSONEncoder.encode\u001b[1;34m(self, o)\u001b[0m\n\u001b[0;32m 200\u001b[0m chunks \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39miterencode(o, _one_shot\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[0;32m 201\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(chunks, (\u001b[38;5;28mlist\u001b[39m, \u001b[38;5;28mtuple\u001b[39m)):\n\u001b[1;32m--> 202\u001b[0m chunks \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mlist\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mchunks\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 203\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;241m.\u001b[39mjoin(chunks)\n", - "File \u001b[1;32mc:\\Users\\boeck\\AppData\\Local\\miniforge3\\envs\\stacdev\\Lib\\json\\encoder.py:432\u001b[0m, in \u001b[0;36m_make_iterencode.._iterencode\u001b[1;34m(o, _current_indent_level)\u001b[0m\n\u001b[0;32m 430\u001b[0m \u001b[38;5;28;01myield from\u001b[39;00m _iterencode_list(o, _current_indent_level)\n\u001b[0;32m 431\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(o, \u001b[38;5;28mdict\u001b[39m):\n\u001b[1;32m--> 432\u001b[0m \u001b[38;5;28;01myield from\u001b[39;00m _iterencode_dict(o, _current_indent_level)\n\u001b[0;32m 433\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m 434\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m markers \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n", - "File \u001b[1;32mc:\\Users\\boeck\\AppData\\Local\\miniforge3\\envs\\stacdev\\Lib\\json\\encoder.py:406\u001b[0m, in \u001b[0;36m_make_iterencode.._iterencode_dict\u001b[1;34m(dct, _current_indent_level)\u001b[0m\n\u001b[0;32m 404\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m 405\u001b[0m chunks \u001b[38;5;241m=\u001b[39m _iterencode(value, _current_indent_level)\n\u001b[1;32m--> 406\u001b[0m \u001b[38;5;28;01myield from\u001b[39;00m chunks\n\u001b[0;32m 407\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m newline_indent \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m 408\u001b[0m _current_indent_level \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;241m1\u001b[39m\n", - "File \u001b[1;32mc:\\Users\\boeck\\AppData\\Local\\miniforge3\\envs\\stacdev\\Lib\\json\\encoder.py:439\u001b[0m, in \u001b[0;36m_make_iterencode.._iterencode\u001b[1;34m(o, _current_indent_level)\u001b[0m\n\u001b[0;32m 437\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCircular reference detected\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 438\u001b[0m markers[markerid] \u001b[38;5;241m=\u001b[39m o\n\u001b[1;32m--> 439\u001b[0m o \u001b[38;5;241m=\u001b[39m \u001b[43m_default\u001b[49m\u001b[43m(\u001b[49m\u001b[43mo\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 440\u001b[0m \u001b[38;5;28;01myield from\u001b[39;00m _iterencode(o, _current_indent_level)\n\u001b[0;32m 441\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m markers \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n", - "File \u001b[1;32mc:\\Users\\boeck\\AppData\\Local\\miniforge3\\envs\\stacdev\\Lib\\json\\encoder.py:180\u001b[0m, in \u001b[0;36mJSONEncoder.default\u001b[1;34m(self, o)\u001b[0m\n\u001b[0;32m 161\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mdefault\u001b[39m(\u001b[38;5;28mself\u001b[39m, o):\n\u001b[0;32m 162\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Implement this method in a subclass such that it returns\u001b[39;00m\n\u001b[0;32m 163\u001b[0m \u001b[38;5;124;03m a serializable object for ``o``, or calls the base implementation\u001b[39;00m\n\u001b[0;32m 164\u001b[0m \u001b[38;5;124;03m (to raise a ``TypeError``).\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 178\u001b[0m \n\u001b[0;32m 179\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[1;32m--> 180\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mObject of type \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mo\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__class__\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m \u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[0;32m 181\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mis not JSON serializable\u001b[39m\u001b[38;5;124m'\u001b[39m)\n", - "\u001b[1;31mTypeError\u001b[0m: Object of type builtin_function_or_method is not JSON serializable" - ] - } - ], + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "
\n", + "
\n", + "
    \n", + " \n", + " \n", + " \n", + "
  • \n", + " type\n", + " \"Feature\"\n", + "
  • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
  • \n", + " stac_version\n", + " \"1.0.0\"\n", + "
  • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
  • \n", + " id\n", + " \"U2018_CLC2012_V2020_20u1\"\n", + "
  • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
  • \n", + " properties\n", + "
      \n", + " \n", + " \n", + " \n", + "
    • \n", + " description\n", + " \"Corine Land Cover 2012 (CLC2012) is one of the Corine Land Cover (CLC) datasets produced within the frame the Copernicus Land Monitoring Service referring to land cover / land use status of year 2012. CLC service has a long-time heritage (formerly known as \"CORINE Land Cover Programme\"), coordinated by the European Environment Agency (EEA). It provides consistent and thematically detailed information on land cover and land cover changes across Europe. CLC datasets are based on the classification of satellite images produced by the national teams of the participating countries - the EEA members and cooperating countries (EEA39). National CLC inventories are then further integrated into a seamless land cover map of Europe. The resulting European database relies on standard methodology and nomenclature with following base parameters: 44 classes in the hierarchical 3-level CLC nomenclature; minimum mapping unit (MMU) for status layers is 25 hectares; minimum width of linear elements is 100 metres. Change layers have higher resolution, i.e. minimum mapping unit (MMU) is 5 hectares for Land Cover Changes (LCC), and the minimum width of linear elements is 100 metres. The CLC service delivers important data sets supporting the implementation of key priority areas of the Environment Action Programmes of the European Union as e.g. protecting ecosystems, halting the loss of biological diversity, tracking the impacts of climate change, monitoring urban land take, assessing developments in agriculture or dealing with water resources directives. CLC belongs to the Pan-European component of the Copernicus Land Monitoring Service (https://land.copernicus.eu/), part of the European Copernicus Programme coordinated by the European Environment Agency, providing environmental information from a combination of air- and space-based observation systems and in-situ monitoring. Additional information about CLC product description including mapping guides can be found at https://land.copernicus.eu/user-corner/technical-library/. CLC class descriptions can be found at https://land.copernicus.eu/user-corner/technical-library/corine-land-cover-nomenclature-guidelines/html/.\"\n", + "
    • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    • \n", + " created\n", + " None\n", + "
    • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    • \n", + " providers\n", + "
        \n", + " \n", + " \n", + " \n", + "
      • \n", + " name\n", + " \"Copernicus Land Monitoring Service\"\n", + "
      • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
      • \n", + " description\n", + " \"The Copernicus Land Monitoring Service provides geographical information on land cover and its changes, land use, ground motions, vegetation state, water cycle and Earth's surface energy variables to a broad range of users in Europe and across the World in the field of environmental terrestrial applications.\"\n", + "
      • \n", + " \n", + " \n", + " \n", + " \n", + "
      • \n", + " roles[] 2 items\n", + " \n", + "
          \n", + " \n", + " \n", + " \n", + "
        • \n", + " 0\n", + " \"licensor\"\n", + "
        • \n", + " \n", + " \n", + " \n", + "
        \n", + " \n", + "
          \n", + " \n", + " \n", + " \n", + "
        • \n", + " 1\n", + " \"host\"\n", + "
        • \n", + " \n", + " \n", + " \n", + "
        \n", + " \n", + "
      • \n", + " \n", + " \n", + " \n", + " \n", + "
      • \n", + " url\n", + " \"https://land.copernicus.eu\"\n", + "
      • \n", + " \n", + " \n", + " \n", + "
      \n", + "
    • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    • \n", + " start_datetime\n", + " \"2012-01-01T00:00:00Z\"\n", + "
    • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    • \n", + " end_datetime\n", + " \"2012-12-31T00:00:00Z\"\n", + "
    • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    • \n", + " datetime\n", + " None\n", + "
    • \n", + " \n", + " \n", + " \n", + "
    \n", + "
  • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
  • \n", + " geometry\n", + "
      \n", + " \n", + " \n", + " \n", + "
    • \n", + " type\n", + " \"Polygon\"\n", + "
    • \n", + " \n", + " \n", + " \n", + " \n", + "
    • \n", + " coordinates[] 1 items\n", + " \n", + "
        \n", + " \n", + " \n", + "
      • \n", + " 0[] 5 items\n", + " \n", + "
          \n", + " \n", + " \n", + "
        • \n", + " 0[] 2 items\n", + " \n", + "
            \n", + " \n", + " \n", + " \n", + "
          • \n", + " 0\n", + " 72.90613675900903\n", + "
          • \n", + " \n", + " \n", + " \n", + "
          \n", + " \n", + "
            \n", + " \n", + " \n", + " \n", + "
          • \n", + " 1\n", + " 24.28417701147754\n", + "
          • \n", + " \n", + " \n", + " \n", + "
          \n", + " \n", + "
        • \n", + " \n", + " \n", + "
        \n", + " \n", + "
          \n", + " \n", + " \n", + "
        • \n", + " 1[] 2 items\n", + " \n", + "
            \n", + " \n", + " \n", + " \n", + "
          • \n", + " 0\n", + " 72.90613675900903\n", + "
          • \n", + " \n", + " \n", + " \n", + "
          \n", + " \n", + "
            \n", + " \n", + " \n", + " \n", + "
          • \n", + " 1\n", + " 72.63376966542347\n", + "
          • \n", + " \n", + " \n", + " \n", + "
          \n", + " \n", + "
        • \n", + " \n", + " \n", + "
        \n", + " \n", + "
          \n", + " \n", + " \n", + "
        • \n", + " 2[] 2 items\n", + " \n", + "
            \n", + " \n", + " \n", + " \n", + "
          • \n", + " 0\n", + " -56.50514190170437\n", + "
          • \n", + " \n", + " \n", + " \n", + "
          \n", + " \n", + "
            \n", + " \n", + " \n", + " \n", + "
          • \n", + " 1\n", + " 72.63376966542347\n", + "
          • \n", + " \n", + " \n", + " \n", + "
          \n", + " \n", + "
        • \n", + " \n", + " \n", + "
        \n", + " \n", + "
          \n", + " \n", + " \n", + "
        • \n", + " 3[] 2 items\n", + " \n", + "
            \n", + " \n", + " \n", + " \n", + "
          • \n", + " 0\n", + " -56.50514190170437\n", + "
          • \n", + " \n", + " \n", + " \n", + "
          \n", + " \n", + "
            \n", + " \n", + " \n", + " \n", + "
          • \n", + " 1\n", + " 24.28417701147754\n", + "
          • \n", + " \n", + " \n", + " \n", + "
          \n", + " \n", + "
        • \n", + " \n", + " \n", + "
        \n", + " \n", + "
          \n", + " \n", + " \n", + "
        • \n", + " 4[] 2 items\n", + " \n", + "
            \n", + " \n", + " \n", + " \n", + "
          • \n", + " 0\n", + " 72.90613675900903\n", + "
          • \n", + " \n", + " \n", + " \n", + "
          \n", + " \n", + "
            \n", + " \n", + " \n", + " \n", + "
          • \n", + " 1\n", + " 24.28417701147754\n", + "
          • \n", + " \n", + " \n", + " \n", + "
          \n", + " \n", + "
        • \n", + " \n", + " \n", + "
        \n", + " \n", + "
      • \n", + " \n", + " \n", + "
      \n", + " \n", + "
    • \n", + " \n", + " \n", + "
    \n", + "
  • \n", + " \n", + " \n", + " \n", + " \n", + "
  • \n", + " links[] 1 items\n", + " \n", + "
      \n", + " \n", + " \n", + " \n", + "
    • \n", + " 0\n", + "
        \n", + " \n", + " \n", + " \n", + "
      • \n", + " rel\n", + " \"LICENSE\"\n", + "
      • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
      • \n", + " href\n", + " \"https://land.copernicus.eu/en/data-policy\"\n", + "
      • \n", + " \n", + " \n", + " \n", + "
      \n", + "
    • \n", + " \n", + " \n", + " \n", + "
    \n", + " \n", + "
  • \n", + " \n", + " \n", + " \n", + " \n", + "
  • \n", + " assets\n", + "
      \n", + " \n", + " \n", + " \n", + "
    • \n", + " U2018_CLC2012_V2020_20u1_tfw\n", + "
        \n", + " \n", + " \n", + " \n", + "
      • \n", + " href\n", + " \"X:/EO/u2018_clc2012_v2020_20u1_raster100m/DATA/U2018_CLC2012_V2020_20u1.tfw\"\n", + "
      • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
      • \n", + " type\n", + " \"text/plain\"\n", + "
      • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
      • \n", + " title\n", + " \"World File Europe\"\n", + "
      • \n", + " \n", + " \n", + " \n", + " \n", + "
      • \n", + " roles[] 1 items\n", + " \n", + "
          \n", + " \n", + " \n", + " \n", + "
        • \n", + " 0\n", + " \"metadata\"\n", + "
        • \n", + " \n", + " \n", + " \n", + "
        \n", + " \n", + "
      • \n", + " \n", + " \n", + "
      \n", + "
    • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    • \n", + " U2018_CLC2012_V2020_20u1_tif\n", + "
        \n", + " \n", + " \n", + " \n", + "
      • \n", + " href\n", + " \"X:/EO/u2018_clc2012_v2020_20u1_raster100m/DATA/U2018_CLC2012_V2020_20u1.tif\"\n", + "
      • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
      • \n", + " type\n", + " \"image/tiff; application=geotiff; profile=cloud-optimized\"\n", + "
      • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
      • \n", + " title\n", + " \"Single Band Land Classification Europe\"\n", + "
      • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
      • \n", + " proj:epsg\n", + " 3035\n", + "
      • \n", + " \n", + " \n", + " \n", + " \n", + "
      • \n", + " proj:bbox[] 4 items\n", + " \n", + "
          \n", + " \n", + " \n", + " \n", + "
        • \n", + " 0\n", + " 900000.0\n", + "
        • \n", + " \n", + " \n", + " \n", + "
        \n", + " \n", + "
          \n", + " \n", + " \n", + " \n", + "
        • \n", + " 1\n", + " 900000.0\n", + "
        • \n", + " \n", + " \n", + " \n", + "
        \n", + " \n", + "
          \n", + " \n", + " \n", + " \n", + "
        • \n", + " 2\n", + " 7400000.0\n", + "
        • \n", + " \n", + " \n", + " \n", + "
        \n", + " \n", + "
          \n", + " \n", + " \n", + " \n", + "
        • \n", + " 3\n", + " 5500000.0\n", + "
        • \n", + " \n", + " \n", + " \n", + "
        \n", + " \n", + "
      • \n", + " \n", + " \n", + " \n", + "
      • \n", + " proj:shape[] 2 items\n", + " \n", + "
          \n", + " \n", + " \n", + " \n", + "
        • \n", + " 0\n", + " 46000\n", + "
        • \n", + " \n", + " \n", + " \n", + "
        \n", + " \n", + "
          \n", + " \n", + " \n", + " \n", + "
        • \n", + " 1\n", + " 65000\n", + "
        • \n", + " \n", + " \n", + " \n", + "
        \n", + " \n", + "
      • \n", + " \n", + " \n", + " \n", + "
      • \n", + " proj:transform[] 12 items\n", + " \n", + "
          \n", + " \n", + " \n", + " \n", + "
        • \n", + " 0\n", + " 100.0\n", + "
        • \n", + " \n", + " \n", + " \n", + "
        \n", + " \n", + "
          \n", + " \n", + " \n", + " \n", + "
        • \n", + " 1\n", + " 0.0\n", + "
        • \n", + " \n", + " \n", + " \n", + "
        \n", + " \n", + "
          \n", + " \n", + " \n", + " \n", + "
        • \n", + " 2\n", + " 900000.0\n", + "
        • \n", + " \n", + " \n", + " \n", + "
        \n", + " \n", + "
          \n", + " \n", + " \n", + " \n", + "
        • \n", + " 3\n", + " 0.0\n", + "
        • \n", + " \n", + " \n", + " \n", + "
        \n", + " \n", + "
          \n", + " \n", + " \n", + " \n", + "
        • \n", + " 4\n", + " -100.0\n", + "
        • \n", + " \n", + " \n", + " \n", + "
        \n", + " \n", + "
          \n", + " \n", + " \n", + " \n", + "
        • \n", + " 5\n", + " 5500000.0\n", + "
        • \n", + " \n", + " \n", + " \n", + "
        \n", + " \n", + "
          \n", + " \n", + " \n", + " \n", + "
        • \n", + " 6\n", + " 0.0\n", + "
        • \n", + " \n", + " \n", + " \n", + "
        \n", + " \n", + "
          \n", + " \n", + " \n", + " \n", + "
        • \n", + " 7\n", + " 0.0\n", + "
        • \n", + " \n", + " \n", + " \n", + "
        \n", + " \n", + "
          \n", + " \n", + " \n", + " \n", + "
        • \n", + " 8\n", + " 1.0\n", + "
        • \n", + " \n", + " \n", + " \n", + "
        \n", + " \n", + "
          \n", + " \n", + " \n", + " \n", + "
        • \n", + " 9\n", + " 0.0\n", + "
        • \n", + " \n", + " \n", + " \n", + "
        \n", + " \n", + "
          \n", + " \n", + " \n", + " \n", + "
        • \n", + " 10\n", + " 0.0\n", + "
        • \n", + " \n", + " \n", + " \n", + "
        \n", + " \n", + "
          \n", + " \n", + " \n", + " \n", + "
        • \n", + " 11\n", + " 1.0\n", + "
        • \n", + " \n", + " \n", + " \n", + "
        \n", + " \n", + "
      • \n", + " \n", + " \n", + " \n", + "
      • \n", + " roles[] 2 items\n", + " \n", + "
          \n", + " \n", + " \n", + " \n", + "
        • \n", + " 0\n", + " \"data\"\n", + "
        • \n", + " \n", + " \n", + " \n", + "
        \n", + " \n", + "
          \n", + " \n", + " \n", + " \n", + "
        • \n", + " 1\n", + " \"visual\"\n", + "
        • \n", + " \n", + " \n", + " \n", + "
        \n", + " \n", + "
      • \n", + " \n", + " \n", + "
      \n", + "
    • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    • \n", + " U2018_CLC2012_V2020_20u1_tif_aux_xml\n", + "
        \n", + " \n", + " \n", + " \n", + "
      • \n", + " href\n", + " \"X:/EO/u2018_clc2012_v2020_20u1_raster100m/DATA/U2018_CLC2012_V2020_20u1.tif.aux.xml\"\n", + "
      • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
      • \n", + " type\n", + " \"application/xml\"\n", + "
      • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
      • \n", + " title\n", + " \"TIFF Statistics Europe\"\n", + "
      • \n", + " \n", + " \n", + " \n", + " \n", + "
      • \n", + " roles[] 1 items\n", + " \n", + "
          \n", + " \n", + " \n", + " \n", + "
        • \n", + " 0\n", + " \"metadata\"\n", + "
        • \n", + " \n", + " \n", + " \n", + "
        \n", + " \n", + "
      • \n", + " \n", + " \n", + "
      \n", + "
    • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    • \n", + " U2018_CLC2012_V2020_20u1_tif_ovr\n", + "
        \n", + " \n", + " \n", + " \n", + "
      • \n", + " href\n", + " \"X:/EO/u2018_clc2012_v2020_20u1_raster100m/DATA/U2018_CLC2012_V2020_20u1.tif.ovr\"\n", + "
      • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
      • \n", + " type\n", + " \"image/tiff; application=geotiff; profile=pyramid\"\n", + "
      • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
      • \n", + " title\n", + " \"Pyramid Europe\"\n", + "
      • \n", + " \n", + " \n", + " \n", + " \n", + "
      • \n", + " roles[] 1 items\n", + " \n", + "
          \n", + " \n", + " \n", + " \n", + "
        • \n", + " 0\n", + " \"metadata\"\n", + "
        • \n", + " \n", + " \n", + " \n", + "
        \n", + " \n", + "
      • \n", + " \n", + " \n", + "
      \n", + "
    • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    • \n", + " U2018_CLC2012_V2020_20u1_tif_vat_cpg\n", + "
        \n", + " \n", + " \n", + " \n", + "
      • \n", + " href\n", + " \"X:/EO/u2018_clc2012_v2020_20u1_raster100m/DATA/U2018_CLC2012_V2020_20u1.tif.vat.cpg\"\n", + "
      • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
      • \n", + " type\n", + " \"text/plain\"\n", + "
      • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
      • \n", + " title\n", + " \"Encoding Europe\"\n", + "
      • \n", + " \n", + " \n", + " \n", + " \n", + "
      • \n", + " roles[] 1 items\n", + " \n", + "
          \n", + " \n", + " \n", + " \n", + "
        • \n", + " 0\n", + " \"metadata\"\n", + "
        • \n", + " \n", + " \n", + " \n", + "
        \n", + " \n", + "
      • \n", + " \n", + " \n", + "
      \n", + "
    • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    • \n", + " U2018_CLC2012_V2020_20u1_tif_vat_dbf\n", + "
        \n", + " \n", + " \n", + " \n", + "
      • \n", + " href\n", + " \"X:/EO/u2018_clc2012_v2020_20u1_raster100m/DATA/U2018_CLC2012_V2020_20u1.tif.vat.dbf\"\n", + "
      • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
      • \n", + " type\n", + " \"application/dbf\"\n", + "
      • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
      • \n", + " title\n", + " \"Database Europe\"\n", + "
      • \n", + " \n", + " \n", + " \n", + " \n", + "
      • \n", + " roles[] 1 items\n", + " \n", + "
          \n", + " \n", + " \n", + " \n", + "
        • \n", + " 0\n", + " \"metadata\"\n", + "
        • \n", + " \n", + " \n", + " \n", + "
        \n", + " \n", + "
      • \n", + " \n", + " \n", + "
      \n", + "
    • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    • \n", + " U2018_CLC2012_V2020_20u1_tif_xml\n", + "
        \n", + " \n", + " \n", + " \n", + "
      • \n", + " href\n", + " \"X:/EO/u2018_clc2012_v2020_20u1_raster100m/DATA/U2018_CLC2012_V2020_20u1.tif.xml\"\n", + "
      • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
      • \n", + " type\n", + " \"application/xml\"\n", + "
      • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
      • \n", + " title\n", + " \"TIFF Metadata Europe\"\n", + "
      • \n", + " \n", + " \n", + " \n", + " \n", + "
      • \n", + " roles[] 1 items\n", + " \n", + "
          \n", + " \n", + " \n", + " \n", + "
        • \n", + " 0\n", + " \"metadata\"\n", + "
        • \n", + " \n", + " \n", + " \n", + "
        \n", + " \n", + "
      • \n", + " \n", + " \n", + "
      \n", + "
    • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    • \n", + " CLC2018_CLC2012_V2018_20_tif_lyr\n", + "
        \n", + " \n", + " \n", + " \n", + "
      • \n", + " href\n", + " \"X:/EO/u2018_clc2012_v2020_20u1_raster100m/Legend/CLC2018_CLC2012_V2018_20.tif.lyr\"\n", + "
      • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
      • \n", + " type\n", + " \"image/tiff; application=geotiff; profile=layer\"\n", + "
      • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
      • \n", + " title\n", + " \"Legend Layer Europe\"\n", + "
      • \n", + " \n", + " \n", + " \n", + " \n", + "
      • \n", + " roles[] 1 items\n", + " \n", + "
          \n", + " \n", + " \n", + " \n", + "
        • \n", + " 0\n", + " \"metadata\"\n", + "
        • \n", + " \n", + " \n", + " \n", + "
        \n", + " \n", + "
      • \n", + " \n", + " \n", + "
      \n", + "
    • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    • \n", + " CLC2018_CLC2012_V2018_20_QGIS_txt\n", + "
        \n", + " \n", + " \n", + " \n", + "
      • \n", + " href\n", + " \"X:/EO/u2018_clc2012_v2020_20u1_raster100m/Legend/CLC2018_CLC2012_V2018_20_QGIS.txt\"\n", + "
      • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
      • \n", + " type\n", + " \"text/plain\"\n", + "
      • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
      • \n", + " title\n", + " \"Legends Europe\"\n", + "
      • \n", + " \n", + " \n", + " \n", + " \n", + "
      • \n", + " roles[] 1 items\n", + " \n", + "
          \n", + " \n", + " \n", + " \n", + "
        • \n", + " 0\n", + " \"metadata\"\n", + "
        • \n", + " \n", + " \n", + " \n", + "
        \n", + " \n", + "
      • \n", + " \n", + " \n", + "
      \n", + "
    • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    • \n", + " U2018_CLC2012_V2020_20u1_xml\n", + "
        \n", + " \n", + " \n", + " \n", + "
      • \n", + " href\n", + " \"X:/EO/u2018_clc2012_v2020_20u1_raster100m/Metadata/U2018_CLC2012_V2020_20u1.xml\"\n", + "
      • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
      • \n", + " type\n", + " \"application/xml\"\n", + "
      • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
      • \n", + " title\n", + " \"Single Band Land Classification Metadata Europe\"\n", + "
      • \n", + " \n", + " \n", + " \n", + " \n", + "
      • \n", + " roles[] 1 items\n", + " \n", + "
          \n", + " \n", + " \n", + " \n", + "
        • \n", + " 0\n", + " \"metadata\"\n", + "
        • \n", + " \n", + " \n", + " \n", + "
        \n", + " \n", + "
      • \n", + " \n", + " \n", + "
      \n", + "
    • \n", + " \n", + " \n", + " \n", + "
    \n", + "
  • \n", + " \n", + " \n", + " \n", + " \n", + "
  • \n", + " bbox[] 4 items\n", + " \n", + "
      \n", + " \n", + " \n", + " \n", + "
    • \n", + " 0\n", + " -56.50514190170437\n", + "
    • \n", + " \n", + " \n", + " \n", + "
    \n", + " \n", + "
      \n", + " \n", + " \n", + " \n", + "
    • \n", + " 1\n", + " 24.28417701147754\n", + "
    • \n", + " \n", + " \n", + " \n", + "
    \n", + " \n", + "
      \n", + " \n", + " \n", + " \n", + "
    • \n", + " 2\n", + " 72.90613675900903\n", + "
    • \n", + " \n", + " \n", + " \n", + "
    \n", + " \n", + "
      \n", + " \n", + " \n", + " \n", + "
    • \n", + " 3\n", + " 72.63376966542347\n", + "
    • \n", + " \n", + " \n", + " \n", + "
    \n", + " \n", + "
  • \n", + " \n", + " \n", + " \n", + "
  • \n", + " stac_extensions[] 1 items\n", + " \n", + "
      \n", + " \n", + " \n", + " \n", + "
    • \n", + " 0\n", + " \"https://stac-extensions.github.io/projection/v1.1.0/schema.json\"\n", + "
    • \n", + " \n", + " \n", + " \n", + "
    \n", + " \n", + "
  • \n", + " \n", + " \n", + "
\n", + "
\n", + "
" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "item" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [], "source": [ "item.save_object(dest_href='testY.json')" ] From 4a3ddb7cb903b6b0914d43c0c3dfa1568ac1c5cc Mon Sep 17 00:00:00 2001 From: Xiaoman Huang Date: Tue, 30 Apr 2024 13:06:48 +0800 Subject: [PATCH 23/80] update item, collection --- .DS_Store | Bin 6148 -> 6148 bytes scripts/.DS_Store | Bin 8196 -> 8196 bytes scripts/vabh/test_collection.json | 87 ++++++++++++++++++++++++++++++ scripts/vabh/test_item.json | 35 +++++++++++- scripts/vabh/vabh_collection.py | 64 ++++++++++++++++++++++ scripts/vabh/vabh_item.py | 86 ++++++++++++----------------- 6 files changed, 219 insertions(+), 53 deletions(-) diff --git a/.DS_Store b/.DS_Store index d2c92cbf939cb9f45f03101e9b39f43feaf7f0a3..3d5f93ace29183dca7189ecec81e2814e5802e8f 100644 GIT binary patch delta 96 zcmZoMXffEJ$;h~KvKC{Zhh%lNp^1rwj)I|~S*?yjwWYDKj)Ix7L2WH3hp4i?bx?eE gPHtX)HvpGynhq delta 97 zcmZoMXffEJ$;h~CvKC{Zr&M*dk+Hdfj)JbSnMtjVLbau_v5tb7u|aJuCx@uAzI9N1 hc1~_yeh&i}Ffu}D23{x)qk1<#W>jX|%+B$b9{@&K6J-DZ diff --git a/scripts/.DS_Store b/scripts/.DS_Store index 9da4b8e40e276beedbe4ce56e6bfb5960e3d3dba..518d29115e615d4116dfa7bf2f3fbef96920e02a 100644 GIT binary patch delta 92 zcmV-i0HgneK!iY$V*$Cbah3;>fDN+;2pk9jxsxOiaVQykdonXPHY^}5IWc`KAbUA8 yGb|uBGcbL92?!Mzkn delta 89 zcmV-f0H*(hK!iY$V*$Fcah3;>fDN+;2pk9jx|1XjaVZ*mdoeUOI4mG7F*h-NEFgP1 vGB7M4G&VGSeF+E^BQHBrUu$e_Zf}f}y$~M(jkDblDF?F$81@9S{1gWR%Jdsn diff --git a/scripts/vabh/test_collection.json b/scripts/vabh/test_collection.json index c7557d1..43d6271 100644 --- a/scripts/vabh/test_collection.json +++ b/scripts/vabh/test_collection.json @@ -10,6 +10,21 @@ "type": "application/json", "title": "Urban Atlas Building Height 10m" }, + { + "rel": "license", + "href": "https://land.copernicus.eu/en/data-policy", + "title": "Legal notice on the use of CLMS data" + }, + { + "rel": "root", + "href": "/Users/xiaomanhuang/pl/ETCDI_STAC/clms-stac-1/stacs/clms_catalog.json", + "title": "CLMS Catalog" + }, + { + "rel": "parent", + "href": "/Users/xiaomanhuang/pl/ETCDI_STAC/clms-stac-1/stacs/clms_catalog.json", + "title": "CLMS Catalog" + }, { "rel": "self", "href": "/Users/xiaomanhuang/pl/ETCDI_STAC/clms-stac-1/scripts/vabh/test_collection.json", @@ -20,6 +35,78 @@ "https://stac-extensions.github.io/item-assets/v1.0.0/schema.json", "https://stac-extensions.github.io/projection/v1.1.0/schema.json" ], + "item_assets": { + "dataset": { + "title": "Building height raster", + "media_type": "image/tiff; application=geotiff", + "roles": [ + "data" + ] + }, + "quality_check_report": { + "title": "Quality check report", + "media_type": "application/pdf", + "roles": [ + "metadata" + ] + }, + "metadata": { + "title": "Metadata", + "media_type": "application/xml", + "roles": [ + "metadata" + ] + }, + "quality_control_report": { + "title": "Quality control report", + "media_type": "application/pdf", + "roles": [ + "metadata" + ] + }, + "pixel_based_info_shp": { + "title": "Pixel based info shape format", + "media_type": "application/octet-stream", + "roles": [ + "metadata" + ] + }, + "pixel_based_info_shx": { + "title": "Pixel based info shape index", + "media_type": "application/octet-stream", + "roles": [ + "metadata" + ] + }, + "pixel_based_info_dbf": { + "title": "Pixel based info attribute", + "media_type": "application/x-dbf", + "roles": [ + "metadata" + ] + }, + "pixel_based_info_prj": { + "title": "Pixel based info projection description", + "media_type": "text/plain", + "roles": [ + "metadata" + ] + }, + "pixel_based_info_cpg": { + "title": "Pixel based info character encoding", + "media_type": "text/plain", + "roles": [ + "metadata" + ] + }, + "compressed_dataset": { + "title": "Compressed building height raster", + "media_type": "application/zip", + "roles": [ + "data" + ] + } + }, "title": "Urban Atlas Building Height 10m", "extent": { "spatial": { diff --git a/scripts/vabh/test_item.json b/scripts/vabh/test_item.json index 1652c9b..5e98f12 100644 --- a/scripts/vabh/test_item.json +++ b/scripts/vabh/test_item.json @@ -73,7 +73,40 @@ "type": "application/json" } ], - "assets": {}, + "assets": { + "dataset": { + "href": "/Users/xiaomanhuang/pl/ETCDI_STAC/uabh_samples/AT001_WIEN_UA2012_DHM_v020/Dataset/AT001_WIEN_UA2012_DHM_v020.tif", + "type": "image/tiff; application=geotiff", + "title": "Building Height Dataset", + "roles": [ + "data" + ] + }, + "compressed_dataset": { + "href": "/Users/xiaomanhuang/pl/ETCDI_STAC/uabh_samples/AT001_WIEN_UA2012_DHM_v020.zip", + "type": "application/zip", + "title": "Compressed Building Height Metadata", + "roles": [ + "data" + ] + }, + "quality_check_report": { + "href": "/Users/xiaomanhuang/pl/ETCDI_STAC/uabh_samples/AT001_WIEN_UA2012_DHM_v020/Doc/AT001_WIEN_UA2012_DHM_QC_Reportv020.pdf", + "type": "application/pdf", + "title": "Quality Check Report", + "roles": [ + "metadata" + ] + }, + "metadata": { + "href": "/Users/xiaomanhuang/pl/ETCDI_STAC/uabh_samples/AT001_WIEN_UA2012_DHM_v020/Metadata/AT001_WIEN_UA2012_DHM_metadata_v020.xml", + "type": "application/xml", + "title": "Building Height Dataset Metadata", + "roles": [ + "metadata" + ] + } + }, "bbox": [ -21.210399013454868, 62.99044383484405, diff --git a/scripts/vabh/vabh_collection.py b/scripts/vabh/vabh_collection.py index c499e7a..4fd2ca2 100644 --- a/scripts/vabh/vabh_collection.py +++ b/scripts/vabh/vabh_collection.py @@ -8,6 +8,8 @@ import rasterio as rio from pyproj import Transformer from pystac import Extent, SpatialExtent, TemporalExtent +from pystac.extensions.item_assets import AssetDefinition, ItemAssetsExtension +from pystac.link import Link from pystac.provider import ProviderRole from rasterio.coords import BoundingBox from rasterio.crs import CRS @@ -34,6 +36,26 @@ COLLECTION_description = "Urban Atlas building height over capital cities." COLLECTION_keywords = ["Buildings", "Building height", "Elevation"] +# links +CLMS_LICENSE: Final[Link] = Link( + rel="license", + target="https://land.copernicus.eu/en/data-policy", + title="Legal notice on the use of CLMS data", +) + +WORKING_DIR = os.getcwd() +CLMS_CATALOG_LINK: Final[Link] = Link( + rel=pystac.RelType.ROOT, + target=pystac.STACObject.from_file(os.path.join(WORKING_DIR, "stacs/clms_catalog.json")), + title="CLMS Catalog", +) + +CLMS_PARENT_LINK: Final[Link] = Link( + rel=pystac.RelType.PARENT, + target=pystac.STACObject.from_file(os.path.join(WORKING_DIR, "stacs/clms_catalog.json")), + title="CLMS Catalog", +) + def get_metadata_from_tif(key: str) -> tuple[BoundingBox, CRS, int, int]: with rio.open(key) as tif: @@ -92,5 +114,47 @@ def get_collection_extent(bbox, start_datetime) -> Extent: providers=[HOST_AND_LICENSOR], ) + # add item assets + add_item_assets = ItemAssetsExtension.ext(collection, add_if_missing=True) + add_item_assets.item_assets = { + "dataset": AssetDefinition( + {"title": "Building height raster", "media_type": pystac.MediaType.GEOTIFF, "roles": ["data"]} + ), + "quality_check_report": AssetDefinition( + {"title": "Quality check report", "media_type": pystac.MediaType.PDF, "roles": ["metadata"]} + ), + "metadata": AssetDefinition({"title": "Metadata", "media_type": pystac.MediaType.XML, "roles": ["metadata"]}), + "quality_control_report": AssetDefinition( + {"title": "Quality control report", "media_type": pystac.MediaType.PDF, "roles": ["metadata"]} + ), + "pixel_based_info_shp": AssetDefinition( + {"title": "Pixel based info shape format", "media_type": "application/octet-stream", "roles": ["metadata"]} + ), + "pixel_based_info_shx": AssetDefinition( + {"title": "Pixel based info shape index", "media_type": "application/octet-stream", "roles": ["metadata"]} + ), + "pixel_based_info_dbf": AssetDefinition( + {"title": "Pixel based info attribute", "media_type": "application/x-dbf", "roles": ["metadata"]} + ), + "pixel_based_info_prj": AssetDefinition( + { + "title": "Pixel based info projection description", + "media_type": pystac.MediaType.TEXT, + "roles": ["metadata"], + } + ), + "pixel_based_info_cpg": AssetDefinition( + {"title": "Pixel based info character encoding", "media_type": pystac.MediaType.TEXT, "roles": ["metadata"]} + ), + "compressed_dataset": AssetDefinition( + {"title": "Compressed building height raster", "media_type": "application/zip", "roles": ["data"]} + ), + } + + # add links + collection.links.append(CLMS_LICENSE) + collection.links.append(CLMS_CATALOG_LINK) + collection.links.append(CLMS_PARENT_LINK) + collection.set_self_href("scripts/vabh/test_collection.json") collection.save_object() diff --git a/scripts/vabh/vabh_item.py b/scripts/vabh/vabh_item.py index 019bacc..f017173 100644 --- a/scripts/vabh/vabh_item.py +++ b/scripts/vabh/vabh_item.py @@ -25,6 +25,34 @@ PATH_Metadata = os.path.join(KEY, "Metadata/" + product_id + "_metadata_" + product_version + ".xml") PATH_Zip = os.path.join(head, tail + ".zip") +ASSET_dataset = pystac.Asset( + href=PATH_Dataset, + media_type=pystac.MediaType.GEOTIFF, + title="Building Height Dataset", + roles=["data"], +) + +ASSET_quality_check_report = pystac.Asset( + href=PATH_Doc, + media_type=pystac.MediaType.PDF, + title="Quality Check Report", + roles=["metadata"], +) + +ASSET_metadata = pystac.Asset( + href=PATH_Metadata, + media_type=pystac.MediaType.XML, + title="Building Height Dataset Metadata", + roles=["metadata"], +) + +ASSET_compressed_dataset = pystac.Asset( + href=PATH_Zip, + media_type="application/zip", + title="Compressed Building Height Metadata", + roles=["data"], +) + HOST_AND_LICENSOR: Final[pystac.Provider] = pystac.Provider( name="Copernicus Land Monitoring Service", description=( @@ -66,7 +94,7 @@ def get_metadata_from_tif(key: str) -> tuple[BoundingBox, CRS, int, int]: def str_to_datetime(datetime_str: str): year, month, day = datetime_str.split("-") - return datetime(year=int(year), month=int(month), day=int(day)) # .strftime("%Y-%m-%dT%H:%M:%SZ") + return datetime(year=int(year), month=int(month), day=int(day)) def get_metadata_from_xml(xml: str) -> tuple[datetime, datetime, datetime]: @@ -94,11 +122,6 @@ def get_description(product_id: str) -> str: return f"{year[2:]} {city.title()} building height" -# def get_item_assets() - -# def get_links() - - if __name__ == "__main__": head, tail = os.path.split(KEY) (product_id,) = tail.split(".")[0].rsplit("_", 0) @@ -130,53 +153,12 @@ def get_description(product_id: str) -> str: for link in links: item.links.append(link) - # # assets - # assets = {os.path.split(key)[-1][:-4].lower(): create_asset(key) for key in asset_keys} - # for key, asset in assets.items(): - # item.add_asset(key, asset) + # assets + item.add_asset("dataset", ASSET_dataset) + item.add_asset("quality_check_report", ASSET_quality_check_report) + item.add_asset("metadata", ASSET_metadata) + item.add_asset("compressed_dataset", ASSET_compressed_dataset) # item.set_self_href(os.path.join(KEY, f"{tail}.json")) item.set_self_href("scripts/vabh/test_item.json") item.save_object() - - -# def create_item(aws_session: boto3.Session, bucket: str, tile: str) -> pystac.Item: -# client = aws_session.client("s3") -# parameters = client.list_objects(Bucket=bucket, Prefix=tile, Delimiter=".")["CommonPrefixes"] -# asset_keys = [parameter["Prefix"] + "tif" for parameter in parameters] -# _, tail = os.path.split(asset_keys[0]) -# product_id = "_".join((tail[:23], tail[29:31])) -# bounds, crs, height, width, created = read_metadata_from_s3(bucket, asset_keys[0], aws_session) -# geom_wgs84 = get_geom_wgs84(bounds, crs) -# description = get_description(product_id) -# start_datetime, end_datetime = get_datetime(product_id) - -# # common metadata -# item = pystac.Item( -# id=product_id, -# geometry=mapping(geom_wgs84), -# bbox=list(geom_wgs84.bounds), -# datetime=None, -# start_datetime=start_datetime, -# end_datetime=end_datetime, -# properties={"created": created.strftime("%Y-%m-%dT%H:%M:%SZ"), "description": description}, -# collection=COLLECTION_ID, -# ) -# item.common_metadata.providers = [HOST_AND_LICENSOR] - -# # extensions -# projection = ProjectionExtension.ext(item, add_if_missing=True) -# projection.epsg = crs.to_epsg() -# projection.bbox = [int(bounds.left), int(bounds.bottom), int(bounds.right), int(bounds.top)] -# projection.shape = [height, width] - -# # links -# links = [CLMS_LICENSE, CLMS_CATALOG_LINK, ITEM_PARENT_LINK, COLLECTION_LINK] -# for link in links: -# item.links.append(link) - -# # assets -# assets = {os.path.split(key)[-1][:-4].lower(): create_asset(key) for key in asset_keys} -# for key, asset in assets.items(): -# item.add_asset(key, asset) -# return item From 93e86bcd3fe83f95092e3cbedb0baa6728ebbe11 Mon Sep 17 00:00:00 2001 From: Sebastian Boeck Date: Tue, 30 Apr 2024 16:08:40 +0200 Subject: [PATCH 24/80] finally, item creation as a function --- clms_item_generator.ipynb | 1966 ++++--------------------------------- 1 file changed, 216 insertions(+), 1750 deletions(-) diff --git a/clms_item_generator.ipynb b/clms_item_generator.ipynb index f0a09e4..266c0c5 100644 --- a/clms_item_generator.ipynb +++ b/clms_item_generator.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": 17, "metadata": {}, "outputs": [], "source": [ @@ -14,6 +14,7 @@ "import pystac.link\n", "from pystac.provider import ProviderRole\n", "from pystac.extensions.projection import ProjectionExtension\n", + "from pystac.extensions.item_assets import AssetDefinition\n", "\n", "from pyproj import Transformer\n", "from shapely.geometry import GeometryCollection, box, shape, mapping\n", @@ -39,7 +40,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 29, "metadata": {}, "outputs": [], "source": [ @@ -86,7 +87,6 @@ " 'REU': 'Réunion',\n", " '': 'Europe',\n", " }\n", - " \n", "\n", " MEDIA_TYPE_DICT = {\n", " 'tif': pystac.MediaType.COG,\n", @@ -165,58 +165,48 @@ " asset_files.append(os.path.join(root, file))\n", "\n", " return(asset_files)\n", - " " - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [], - "source": [ - "root = 'X:/EO/u2018_clc2012_v2020_20u1_raster100m'\n", - "\n", - "img_paths = get_img_paths(path=root)\n", - "img_path = img_paths[0]" + " \n", + "def project_bbox(img, target_epsg=4326):\n", + " target_crs = rio.crs.CRS.from_epsg(target_epsg)\n", + " bbox_warped = rio.warp.transform_bounds(img.crs, target_crs, *img.bounds)\n", + " return(bbox_warped)" ] }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "['X:/EO/u2018_clc2012_v2020_20u1_raster100m\\\\DATA\\\\U2018_CLC2012_V2020_20u1.tfw',\n", - " 'X:/EO/u2018_clc2012_v2020_20u1_raster100m\\\\DATA\\\\U2018_CLC2012_V2020_20u1.tif',\n", - " 'X:/EO/u2018_clc2012_v2020_20u1_raster100m\\\\DATA\\\\U2018_CLC2012_V2020_20u1.tif.aux.xml',\n", - " 'X:/EO/u2018_clc2012_v2020_20u1_raster100m\\\\DATA\\\\U2018_CLC2012_V2020_20u1.tif.ovr',\n", - " 'X:/EO/u2018_clc2012_v2020_20u1_raster100m\\\\DATA\\\\U2018_CLC2012_V2020_20u1.tif.vat.cpg',\n", - " 'X:/EO/u2018_clc2012_v2020_20u1_raster100m\\\\DATA\\\\U2018_CLC2012_V2020_20u1.tif.vat.dbf',\n", - " 'X:/EO/u2018_clc2012_v2020_20u1_raster100m\\\\DATA\\\\U2018_CLC2012_V2020_20u1.tif.xml',\n", - " 'X:/EO/u2018_clc2012_v2020_20u1_raster100m\\\\Legend\\\\CLC2018_CLC2012_V2018_20.tif.lyr',\n", - " 'X:/EO/u2018_clc2012_v2020_20u1_raster100m\\\\Legend\\\\CLC2018_CLC2012_V2018_20_QGIS.txt',\n", - " 'X:/EO/u2018_clc2012_v2020_20u1_raster100m\\\\Metadata\\\\U2018_CLC2012_V2020_20u1.xml']" + "['X:/EO/u2018_clc2012_v2020_20u1_raster100m\\\\DATA\\\\U2018_CLC2012_V2020_20u1.tif',\n", + " 'X:/EO/u2018_clc2012_v2020_20u1_raster100m\\\\DATA\\\\French_DOMs\\\\U2018_CLC2012_V2020_20u1_FR_GLP.tif',\n", + " 'X:/EO/u2018_clc2012_v2020_20u1_raster100m\\\\DATA\\\\French_DOMs\\\\U2018_CLC2012_V2020_20u1_FR_GUF.tif',\n", + " 'X:/EO/u2018_clc2012_v2020_20u1_raster100m\\\\DATA\\\\French_DOMs\\\\U2018_CLC2012_V2020_20u1_FR_MTQ.tif',\n", + " 'X:/EO/u2018_clc2012_v2020_20u1_raster100m\\\\DATA\\\\French_DOMs\\\\U2018_CLC2012_V2020_20u1_FR_MYT.tif',\n", + " 'X:/EO/u2018_clc2012_v2020_20u1_raster100m\\\\DATA\\\\French_DOMs\\\\U2018_CLC2012_V2020_20u1_FR_REU.tif']" ] }, - "execution_count": 11, + "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "clc_name_elements = deconstruct_clc_name(img_path)\n", + "root = 'X:/EO/u2018_clc2012_v2020_20u1_raster100m'\n", + "\n", + "img_paths = get_img_paths(path=root)\n", "\n", - "asset_files = get_asset_files(root, clc_name=clc_name_elements['id'])\n", + "# for img_path in img_paths:\n", "\n", - "asset_files = [f for f in asset_files if not f.endswith('aux')]\n", - "asset_files" + "img_path = img_paths[0]\n", + "img_paths" ] }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 25, "metadata": {}, "outputs": [ { @@ -233,7 +223,102 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 33, + "metadata": {}, + "outputs": [], + "source": [ + "def create_item(img_path):\n", + "\n", + " clc_name_elements = deconstruct_clc_name(img_path)\n", + "\n", + " asset_files = get_asset_files(root, clc_name=clc_name_elements['id'])\n", + " asset_files = [f for f in asset_files if not f.endswith('aux')]\n", + " asset_files\n", + "\n", + " year = clc_name_elements.get('reference_year')\n", + "\n", + " CLC_PROVIDER = pystac.provider.Provider(\n", + " name='Copernicus Land Monitoring Service',\n", + " description=('The Copernicus Land Monitoring Service provides '\n", + " 'geographical information on land cover and its '\n", + " 'changes, land use, ground motions, vegetation state, '\n", + " 'water cycle and Earth\\'s surface energy variables to '\n", + " 'a broad range of users in Europe and across the World '\n", + " 'in the field of environmental terrestrial applications.'),\n", + " roles=[ProviderRole.LICENSOR, ProviderRole.HOST],\n", + " url='https://land.copernicus.eu'\n", + " )\n", + "\n", + " props = {'description': (f'Corine Land Cover {year} (CLC{year}) is one of the Corine Land Cover (CLC) ' \n", + " f'datasets produced within the frame the Copernicus Land Monitoring Service '\n", + " f'referring to land cover / land use status of year {year}. '\n", + " f'CLC service has a long-time heritage (formerly known as \\\"CORINE Land Cover Programme\\\"), '\n", + " f'coordinated by the European Environment Agency (EEA). It provides consistent '\n", + " f'and thematically detailed information on land cover and land cover changes across Europe. '\n", + " f'CLC datasets are based on the classification of satellite images produced by the national '\n", + " f'teams of the participating countries - the EEA members and cooperating countries (EEA39). '\n", + " f'National CLC inventories are then further integrated into a seamless land cover map of Europe. '\n", + " f'The resulting European database relies on standard methodology and nomenclature with following '\n", + " f'base parameters: 44 classes in the hierarchical 3-level CLC nomenclature; '\n", + " f'minimum mapping unit (MMU) for status layers is 25 hectares; '\n", + " f'minimum width of linear elements is 100 metres. '\n", + " f'Change layers have higher resolution, i.e. minimum mapping unit (MMU) is 5 hectares '\n", + " f'for Land Cover Changes (LCC), and the minimum width of linear elements is 100 metres. '\n", + " f'The CLC service delivers important data sets supporting the implementation of key priority '\n", + " f'areas of the Environment Action Programmes of the European Union as e.g. protecting ecosystems, '\n", + " f'halting the loss of biological diversity, tracking the impacts of climate change, '\n", + " f'monitoring urban land take, assessing developments in agriculture or dealing with '\n", + " f'water resources directives. CLC belongs to the Pan-European component of the '\n", + " f'Copernicus Land Monitoring Service (https://land.copernicus.eu/), part of the '\n", + " f'European Copernicus Programme coordinated by the European Environment Agency, '\n", + " f'providing environmental information from a combination of air- and space-based observation '\n", + " f'systems and in-situ monitoring. Additional information about CLC product description including '\n", + " f'mapping guides can be found at https://land.copernicus.eu/user-corner/technical-library/. '\n", + " f'CLC class descriptions can be found at '\n", + " f'https://land.copernicus.eu/user-corner/technical-library/corine-land-cover-nomenclature-guidelines/html/.'),\n", + " 'created': None,\n", + " 'providers': CLC_PROVIDER.to_dict(),\n", + " }\n", + "\n", + " with rio.open(img_path) as img:\n", + "\n", + " bbox = project_bbox(img)\n", + " params = {\n", + " 'id': clc_name_elements.get('id'),\n", + " 'bbox': bbox,\n", + " 'geometry': mapping(box(*bbox)),\n", + " 'datetime': None,\n", + " 'start_datetime': datetime(int(year), 1, 1, microsecond=0, tzinfo=UTC),\n", + " 'end_datetime': datetime(int(year), 12, 31, microsecond=0, tzinfo=UTC),\n", + " 'properties': props,\n", + " }\n", + "\n", + " item = pystac.Item(**params)\n", + "\n", + " for asset_file in asset_files:\n", + " # print(asset_file)\n", + " key, asset = create_asset(asset_file, DOM_code=clc_name_elements.get('DOM_code'))\n", + " item.add_asset(\n", + " key=key,\n", + " asset=asset,\n", + " )\n", + "\n", + " proj_ext = ProjectionExtension.ext(item.assets[os.path.basename(img_path).replace('.', '_')], add_if_missing=True)\n", + " proj_ext.apply(epsg=rio.crs.CRS(img.crs).to_epsg(),\n", + " bbox=img.bounds,\n", + " shape=[_ for _ in img.shape],\n", + " transform=[_ for _ in img.transform] + [0.0, 0.0, 1.0],\n", + " )\n", + "\n", + " license = pystac.link.Link(rel='LICENSE', target=\"https://land.copernicus.eu/en/data-policy\")\n", + " item.add_link(license)\n", + "\n", + " return(item)" + ] + }, + { + "cell_type": "code", + "execution_count": 32, "metadata": {}, "outputs": [ { @@ -254,1751 +339,132 @@ } ], "source": [ - "year = clc_name_elements.get('reference_year')\n", - "\n", - "CLC_PROVIDER = pystac.provider.Provider(\n", - " name='Copernicus Land Monitoring Service',\n", - " description=('The Copernicus Land Monitoring Service provides '\n", - " 'geographical information on land cover and its '\n", - " 'changes, land use, ground motions, vegetation state, '\n", - " 'water cycle and Earth\\'s surface energy variables to '\n", - " 'a broad range of users in Europe and across the World '\n", - " 'in the field of environmental terrestrial applications.'),\n", - " roles=[ProviderRole.LICENSOR, ProviderRole.HOST],\n", - " url='https://land.copernicus.eu'\n", - ")\n", - "\n", - "props = {'description': (f'Corine Land Cover {year} (CLC{year}) is one of the Corine Land Cover (CLC) ' \n", - " f'datasets produced within the frame the Copernicus Land Monitoring Service '\n", - " f'referring to land cover / land use status of year {year}. '\n", - " f'CLC service has a long-time heritage (formerly known as \\\"CORINE Land Cover Programme\\\"), '\n", - " f'coordinated by the European Environment Agency (EEA). It provides consistent '\n", - " f'and thematically detailed information on land cover and land cover changes across Europe. '\n", - " f'CLC datasets are based on the classification of satellite images produced by the national '\n", - " f'teams of the participating countries - the EEA members and cooperating countries (EEA39). '\n", - " f'National CLC inventories are then further integrated into a seamless land cover map of Europe. '\n", - " f'The resulting European database relies on standard methodology and nomenclature with following '\n", - " f'base parameters: 44 classes in the hierarchical 3-level CLC nomenclature; '\n", - " f'minimum mapping unit (MMU) for status layers is 25 hectares; '\n", - " f'minimum width of linear elements is 100 metres. '\n", - " f'Change layers have higher resolution, i.e. minimum mapping unit (MMU) is 5 hectares '\n", - " f'for Land Cover Changes (LCC), and the minimum width of linear elements is 100 metres. '\n", - " f'The CLC service delivers important data sets supporting the implementation of key priority '\n", - " f'areas of the Environment Action Programmes of the European Union as e.g. protecting ecosystems, '\n", - " f'halting the loss of biological diversity, tracking the impacts of climate change, '\n", - " f'monitoring urban land take, assessing developments in agriculture or dealing with '\n", - " f'water resources directives. CLC belongs to the Pan-European component of the '\n", - " f'Copernicus Land Monitoring Service (https://land.copernicus.eu/), part of the '\n", - " f'European Copernicus Programme coordinated by the European Environment Agency, '\n", - " f'providing environmental information from a combination of air- and space-based observation '\n", - " f'systems and in-situ monitoring. Additional information about CLC product description including '\n", - " f'mapping guides can be found at https://land.copernicus.eu/user-corner/technical-library/. '\n", - " f'CLC class descriptions can be found at '\n", - " f'https://land.copernicus.eu/user-corner/technical-library/corine-land-cover-nomenclature-guidelines/html/.'),\n", - " 'created': None,\n", - " 'providers': CLC_PROVIDER.to_dict(),\n", - "}\n", - "\n", - "\n", - "with rio.open(img_path) as img:\n", - "\n", - " bbox = rio.warp.transform_bounds(img.crs, rio.crs.CRS.from_epsg(4326), *img.bounds)\n", - " params = {\n", - " 'id': clc_name_elements.get('id'),\n", - " 'bbox': bbox,\n", - " 'geometry': mapping(box(*bbox)),\n", - " 'datetime': None,\n", - " 'start_datetime': datetime(int(year), 1, 1, microsecond=0, tzinfo=UTC),\n", - " 'end_datetime': datetime(int(year), 12, 31, microsecond=0, tzinfo=UTC),\n", - " 'properties': props,\n", - " }\n", - "\n", - "item = pystac.Item(**params)\n", - "\n", - "\n", - "\n", - "for asset_file in asset_files:\n", - " print(asset_file)\n", - " key, asset = create_asset(asset_file, DOM_code=clc_name_elements.get('DOM_code'))\n", - " item.add_asset(\n", - " key=key,\n", - " asset=asset,\n", - " )\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'description': 'Corine Land Cover 2012 (CLC2012) is one of the Corine Land Cover (CLC) datasets produced within the frame the Copernicus Land Monitoring Service referring to land cover / land use status of year 2012. CLC service has a long-time heritage (formerly known as \"CORINE Land Cover Programme\"), coordinated by the European Environment Agency (EEA). It provides consistent and thematically detailed information on land cover and land cover changes across Europe. CLC datasets are based on the classification of satellite images produced by the national teams of the participating countries - the EEA members and cooperating countries (EEA39). National CLC inventories are then further integrated into a seamless land cover map of Europe. The resulting European database relies on standard methodology and nomenclature with following base parameters: 44 classes in the hierarchical 3-level CLC nomenclature; minimum mapping unit (MMU) for status layers is 25 hectares; minimum width of linear elements is 100 metres. Change layers have higher resolution, i.e. minimum mapping unit (MMU) is 5 hectares for Land Cover Changes (LCC), and the minimum width of linear elements is 100 metres. The CLC service delivers important data sets supporting the implementation of key priority areas of the Environment Action Programmes of the European Union as e.g. protecting ecosystems, halting the loss of biological diversity, tracking the impacts of climate change, monitoring urban land take, assessing developments in agriculture or dealing with water resources directives. CLC belongs to the Pan-European component of the Copernicus Land Monitoring Service (https://land.copernicus.eu/), part of the European Copernicus Programme coordinated by the European Environment Agency, providing environmental information from a combination of air- and space-based observation systems and in-situ monitoring. Additional information about CLC product description including mapping guides can be found at https://land.copernicus.eu/user-corner/technical-library/. CLC class descriptions can be found at https://land.copernicus.eu/user-corner/technical-library/corine-land-cover-nomenclature-guidelines/html/.',\n", - " 'created': None,\n", - " 'providers': {'name': 'Copernicus Land Monitoring Service',\n", - " 'description': \"The Copernicus Land Monitoring Service provides geographical information on land cover and its changes, land use, ground motions, vegetation state, water cycle and Earth's surface energy variables to a broad range of users in Europe and across the World in the field of environmental terrestrial applications.\",\n", - " 'roles': [licensor, host],\n", - " 'url': 'https://land.copernicus.eu'},\n", - " 'start_datetime': '2012-01-01T00:00:00Z',\n", - " 'end_datetime': '2012-12-31T00:00:00Z'}" - ] - }, - "execution_count": 29, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "asset_file\n", - "props" - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "metadata": {}, - "outputs": [], - "source": [ - "proj_ext = ProjectionExtension.ext(item.assets[os.path.basename(img_path).replace('.', '_')], add_if_missing=True)\n", - "\n", - "proj_ext.apply(epsg=rio.crs.CRS(img.crs).to_epsg(),\n", - " bbox=img.bounds,\n", - " shape=[_ for _ in img.shape],\n", - " transform=[_ for _ in img.transform] + [0.0, 0.0, 1.0],\n", - " )\n", - "\n", - "license = pystac.link.Link(rel='LICENSE', target=\"https://land.copernicus.eu/en/data-policy\")\n", - "item.add_link(license)\n" + "create_item(img_path)" ] }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 18, "metadata": {}, "outputs": [ { - "data": { - "text/html": [ - "\n", - "\n", - "\n", - "
\n", - "
\n", - "
    \n", - " \n", - " \n", - " \n", - "
  • \n", - " type\n", - " \"Feature\"\n", - "
  • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
  • \n", - " stac_version\n", - " \"1.0.0\"\n", - "
  • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
  • \n", - " id\n", - " \"U2018_CLC2012_V2020_20u1\"\n", - "
  • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
  • \n", - " properties\n", - "
      \n", - " \n", - " \n", - " \n", - "
    • \n", - " description\n", - " \"Corine Land Cover 2012 (CLC2012) is one of the Corine Land Cover (CLC) datasets produced within the frame the Copernicus Land Monitoring Service referring to land cover / land use status of year 2012. CLC service has a long-time heritage (formerly known as \"CORINE Land Cover Programme\"), coordinated by the European Environment Agency (EEA). It provides consistent and thematically detailed information on land cover and land cover changes across Europe. CLC datasets are based on the classification of satellite images produced by the national teams of the participating countries - the EEA members and cooperating countries (EEA39). National CLC inventories are then further integrated into a seamless land cover map of Europe. The resulting European database relies on standard methodology and nomenclature with following base parameters: 44 classes in the hierarchical 3-level CLC nomenclature; minimum mapping unit (MMU) for status layers is 25 hectares; minimum width of linear elements is 100 metres. Change layers have higher resolution, i.e. minimum mapping unit (MMU) is 5 hectares for Land Cover Changes (LCC), and the minimum width of linear elements is 100 metres. The CLC service delivers important data sets supporting the implementation of key priority areas of the Environment Action Programmes of the European Union as e.g. protecting ecosystems, halting the loss of biological diversity, tracking the impacts of climate change, monitoring urban land take, assessing developments in agriculture or dealing with water resources directives. CLC belongs to the Pan-European component of the Copernicus Land Monitoring Service (https://land.copernicus.eu/), part of the European Copernicus Programme coordinated by the European Environment Agency, providing environmental information from a combination of air- and space-based observation systems and in-situ monitoring. Additional information about CLC product description including mapping guides can be found at https://land.copernicus.eu/user-corner/technical-library/. CLC class descriptions can be found at https://land.copernicus.eu/user-corner/technical-library/corine-land-cover-nomenclature-guidelines/html/.\"\n", - "
    • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
    • \n", - " created\n", - " None\n", - "
    • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
    • \n", - " providers\n", - "
        \n", - " \n", - " \n", - " \n", - "
      • \n", - " name\n", - " \"Copernicus Land Monitoring Service\"\n", - "
      • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
      • \n", - " description\n", - " \"The Copernicus Land Monitoring Service provides geographical information on land cover and its changes, land use, ground motions, vegetation state, water cycle and Earth's surface energy variables to a broad range of users in Europe and across the World in the field of environmental terrestrial applications.\"\n", - "
      • \n", - " \n", - " \n", - " \n", - " \n", - "
      • \n", - " roles[] 2 items\n", - " \n", - "
          \n", - " \n", - " \n", - " \n", - "
        • \n", - " 0\n", - " \"licensor\"\n", - "
        • \n", - " \n", - " \n", - " \n", - "
        \n", - " \n", - "
          \n", - " \n", - " \n", - " \n", - "
        • \n", - " 1\n", - " \"host\"\n", - "
        • \n", - " \n", - " \n", - " \n", - "
        \n", - " \n", - "
      • \n", - " \n", - " \n", - " \n", - " \n", - "
      • \n", - " url\n", - " \"https://land.copernicus.eu\"\n", - "
      • \n", - " \n", - " \n", - " \n", - "
      \n", - "
    • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
    • \n", - " start_datetime\n", - " \"2012-01-01T00:00:00Z\"\n", - "
    • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
    • \n", - " end_datetime\n", - " \"2012-12-31T00:00:00Z\"\n", - "
    • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
    • \n", - " datetime\n", - " None\n", - "
    • \n", - " \n", - " \n", - " \n", - "
    \n", - "
  • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
  • \n", - " geometry\n", - "
      \n", - " \n", - " \n", - " \n", - "
    • \n", - " type\n", - " \"Polygon\"\n", - "
    • \n", - " \n", - " \n", - " \n", - " \n", - "
    • \n", - " coordinates[] 1 items\n", - " \n", - "
        \n", - " \n", - " \n", - "
      • \n", - " 0[] 5 items\n", - " \n", - "
          \n", - " \n", - " \n", - "
        • \n", - " 0[] 2 items\n", - " \n", - "
            \n", - " \n", - " \n", - " \n", - "
          • \n", - " 0\n", - " 72.90613675900903\n", - "
          • \n", - " \n", - " \n", - " \n", - "
          \n", - " \n", - "
            \n", - " \n", - " \n", - " \n", - "
          • \n", - " 1\n", - " 24.28417701147754\n", - "
          • \n", - " \n", - " \n", - " \n", - "
          \n", - " \n", - "
        • \n", - " \n", - " \n", - "
        \n", - " \n", - "
          \n", - " \n", - " \n", - "
        • \n", - " 1[] 2 items\n", - " \n", - "
            \n", - " \n", - " \n", - " \n", - "
          • \n", - " 0\n", - " 72.90613675900903\n", - "
          • \n", - " \n", - " \n", - " \n", - "
          \n", - " \n", - "
            \n", - " \n", - " \n", - " \n", - "
          • \n", - " 1\n", - " 72.63376966542347\n", - "
          • \n", - " \n", - " \n", - " \n", - "
          \n", - " \n", - "
        • \n", - " \n", - " \n", - "
        \n", - " \n", - "
          \n", - " \n", - " \n", - "
        • \n", - " 2[] 2 items\n", - " \n", - "
            \n", - " \n", - " \n", - " \n", - "
          • \n", - " 0\n", - " -56.50514190170437\n", - "
          • \n", - " \n", - " \n", - " \n", - "
          \n", - " \n", - "
            \n", - " \n", - " \n", - " \n", - "
          • \n", - " 1\n", - " 72.63376966542347\n", - "
          • \n", - " \n", - " \n", - " \n", - "
          \n", - " \n", - "
        • \n", - " \n", - " \n", - "
        \n", - " \n", - "
          \n", - " \n", - " \n", - "
        • \n", - " 3[] 2 items\n", - " \n", - "
            \n", - " \n", - " \n", - " \n", - "
          • \n", - " 0\n", - " -56.50514190170437\n", - "
          • \n", - " \n", - " \n", - " \n", - "
          \n", - " \n", - "
            \n", - " \n", - " \n", - " \n", - "
          • \n", - " 1\n", - " 24.28417701147754\n", - "
          • \n", - " \n", - " \n", - " \n", - "
          \n", - " \n", - "
        • \n", - " \n", - " \n", - "
        \n", - " \n", - "
          \n", - " \n", - " \n", - "
        • \n", - " 4[] 2 items\n", - " \n", - "
            \n", - " \n", - " \n", - " \n", - "
          • \n", - " 0\n", - " 72.90613675900903\n", - "
          • \n", - " \n", - " \n", - " \n", - "
          \n", - " \n", - "
            \n", - " \n", - " \n", - " \n", - "
          • \n", - " 1\n", - " 24.28417701147754\n", - "
          • \n", - " \n", - " \n", - " \n", - "
          \n", - " \n", - "
        • \n", - " \n", - " \n", - "
        \n", - " \n", - "
      • \n", - " \n", - " \n", - "
      \n", - " \n", - "
    • \n", - " \n", - " \n", - "
    \n", - "
  • \n", - " \n", - " \n", - " \n", - " \n", - "
  • \n", - " links[] 1 items\n", - " \n", - "
      \n", - " \n", - " \n", - " \n", - "
    • \n", - " 0\n", - "
        \n", - " \n", - " \n", - " \n", - "
      • \n", - " rel\n", - " \"LICENSE\"\n", - "
      • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
      • \n", - " href\n", - " \"https://land.copernicus.eu/en/data-policy\"\n", - "
      • \n", - " \n", - " \n", - " \n", - "
      \n", - "
    • \n", - " \n", - " \n", - " \n", - "
    \n", - " \n", - "
  • \n", - " \n", - " \n", - " \n", - " \n", - "
  • \n", - " assets\n", - "
      \n", - " \n", - " \n", - " \n", - "
    • \n", - " U2018_CLC2012_V2020_20u1_tfw\n", - "
        \n", - " \n", - " \n", - " \n", - "
      • \n", - " href\n", - " \"X:/EO/u2018_clc2012_v2020_20u1_raster100m/DATA/U2018_CLC2012_V2020_20u1.tfw\"\n", - "
      • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
      • \n", - " type\n", - " \"text/plain\"\n", - "
      • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
      • \n", - " title\n", - " \"World File Europe\"\n", - "
      • \n", - " \n", - " \n", - " \n", - " \n", - "
      • \n", - " roles[] 1 items\n", - " \n", - "
          \n", - " \n", - " \n", - " \n", - "
        • \n", - " 0\n", - " \"metadata\"\n", - "
        • \n", - " \n", - " \n", - " \n", - "
        \n", - " \n", - "
      • \n", - " \n", - " \n", - "
      \n", - "
    • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
    • \n", - " U2018_CLC2012_V2020_20u1_tif\n", - "
        \n", - " \n", - " \n", - " \n", - "
      • \n", - " href\n", - " \"X:/EO/u2018_clc2012_v2020_20u1_raster100m/DATA/U2018_CLC2012_V2020_20u1.tif\"\n", - "
      • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
      • \n", - " type\n", - " \"image/tiff; application=geotiff; profile=cloud-optimized\"\n", - "
      • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
      • \n", - " title\n", - " \"Single Band Land Classification Europe\"\n", - "
      • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
      • \n", - " proj:epsg\n", - " 3035\n", - "
      • \n", - " \n", - " \n", - " \n", - " \n", - "
      • \n", - " proj:bbox[] 4 items\n", - " \n", - "
          \n", - " \n", - " \n", - " \n", - "
        • \n", - " 0\n", - " 900000.0\n", - "
        • \n", - " \n", - " \n", - " \n", - "
        \n", - " \n", - "
          \n", - " \n", - " \n", - " \n", - "
        • \n", - " 1\n", - " 900000.0\n", - "
        • \n", - " \n", - " \n", - " \n", - "
        \n", - " \n", - "
          \n", - " \n", - " \n", - " \n", - "
        • \n", - " 2\n", - " 7400000.0\n", - "
        • \n", - " \n", - " \n", - " \n", - "
        \n", - " \n", - "
          \n", - " \n", - " \n", - " \n", - "
        • \n", - " 3\n", - " 5500000.0\n", - "
        • \n", - " \n", - " \n", - " \n", - "
        \n", - " \n", - "
      • \n", - " \n", - " \n", - " \n", - "
      • \n", - " proj:shape[] 2 items\n", - " \n", - "
          \n", - " \n", - " \n", - " \n", - "
        • \n", - " 0\n", - " 46000\n", - "
        • \n", - " \n", - " \n", - " \n", - "
        \n", - " \n", - "
          \n", - " \n", - " \n", - " \n", - "
        • \n", - " 1\n", - " 65000\n", - "
        • \n", - " \n", - " \n", - " \n", - "
        \n", - " \n", - "
      • \n", - " \n", - " \n", - " \n", - "
      • \n", - " proj:transform[] 12 items\n", - " \n", - "
          \n", - " \n", - " \n", - " \n", - "
        • \n", - " 0\n", - " 100.0\n", - "
        • \n", - " \n", - " \n", - " \n", - "
        \n", - " \n", - "
          \n", - " \n", - " \n", - " \n", - "
        • \n", - " 1\n", - " 0.0\n", - "
        • \n", - " \n", - " \n", - " \n", - "
        \n", - " \n", - "
          \n", - " \n", - " \n", - " \n", - "
        • \n", - " 2\n", - " 900000.0\n", - "
        • \n", - " \n", - " \n", - " \n", - "
        \n", - " \n", - "
          \n", - " \n", - " \n", - " \n", - "
        • \n", - " 3\n", - " 0.0\n", - "
        • \n", - " \n", - " \n", - " \n", - "
        \n", - " \n", - "
          \n", - " \n", - " \n", - " \n", - "
        • \n", - " 4\n", - " -100.0\n", - "
        • \n", - " \n", - " \n", - " \n", - "
        \n", - " \n", - "
          \n", - " \n", - " \n", - " \n", - "
        • \n", - " 5\n", - " 5500000.0\n", - "
        • \n", - " \n", - " \n", - " \n", - "
        \n", - " \n", - "
          \n", - " \n", - " \n", - " \n", - "
        • \n", - " 6\n", - " 0.0\n", - "
        • \n", - " \n", - " \n", - " \n", - "
        \n", - " \n", - "
          \n", - " \n", - " \n", - " \n", - "
        • \n", - " 7\n", - " 0.0\n", - "
        • \n", - " \n", - " \n", - " \n", - "
        \n", - " \n", - "
          \n", - " \n", - " \n", - " \n", - "
        • \n", - " 8\n", - " 1.0\n", - "
        • \n", - " \n", - " \n", - " \n", - "
        \n", - " \n", - "
          \n", - " \n", - " \n", - " \n", - "
        • \n", - " 9\n", - " 0.0\n", - "
        • \n", - " \n", - " \n", - " \n", - "
        \n", - " \n", - "
          \n", - " \n", - " \n", - " \n", - "
        • \n", - " 10\n", - " 0.0\n", - "
        • \n", - " \n", - " \n", - " \n", - "
        \n", - " \n", - "
          \n", - " \n", - " \n", - " \n", - "
        • \n", - " 11\n", - " 1.0\n", - "
        • \n", - " \n", - " \n", - " \n", - "
        \n", - " \n", - "
      • \n", - " \n", - " \n", - " \n", - "
      • \n", - " roles[] 2 items\n", - " \n", - "
          \n", - " \n", - " \n", - " \n", - "
        • \n", - " 0\n", - " \"data\"\n", - "
        • \n", - " \n", - " \n", - " \n", - "
        \n", - " \n", - "
          \n", - " \n", - " \n", - " \n", - "
        • \n", - " 1\n", - " \"visual\"\n", - "
        • \n", - " \n", - " \n", - " \n", - "
        \n", - " \n", - "
      • \n", - " \n", - " \n", - "
      \n", - "
    • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
    • \n", - " U2018_CLC2012_V2020_20u1_tif_aux_xml\n", - "
        \n", - " \n", - " \n", - " \n", - "
      • \n", - " href\n", - " \"X:/EO/u2018_clc2012_v2020_20u1_raster100m/DATA/U2018_CLC2012_V2020_20u1.tif.aux.xml\"\n", - "
      • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
      • \n", - " type\n", - " \"application/xml\"\n", - "
      • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
      • \n", - " title\n", - " \"TIFF Statistics Europe\"\n", - "
      • \n", - " \n", - " \n", - " \n", - " \n", - "
      • \n", - " roles[] 1 items\n", - " \n", - "
          \n", - " \n", - " \n", - " \n", - "
        • \n", - " 0\n", - " \"metadata\"\n", - "
        • \n", - " \n", - " \n", - " \n", - "
        \n", - " \n", - "
      • \n", - " \n", - " \n", - "
      \n", - "
    • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
    • \n", - " U2018_CLC2012_V2020_20u1_tif_ovr\n", - "
        \n", - " \n", - " \n", - " \n", - "
      • \n", - " href\n", - " \"X:/EO/u2018_clc2012_v2020_20u1_raster100m/DATA/U2018_CLC2012_V2020_20u1.tif.ovr\"\n", - "
      • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
      • \n", - " type\n", - " \"image/tiff; application=geotiff; profile=pyramid\"\n", - "
      • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
      • \n", - " title\n", - " \"Pyramid Europe\"\n", - "
      • \n", - " \n", - " \n", - " \n", - " \n", - "
      • \n", - " roles[] 1 items\n", - " \n", - "
          \n", - " \n", - " \n", - " \n", - "
        • \n", - " 0\n", - " \"metadata\"\n", - "
        • \n", - " \n", - " \n", - " \n", - "
        \n", - " \n", - "
      • \n", - " \n", - " \n", - "
      \n", - "
    • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
    • \n", - " U2018_CLC2012_V2020_20u1_tif_vat_cpg\n", - "
        \n", - " \n", - " \n", - " \n", - "
      • \n", - " href\n", - " \"X:/EO/u2018_clc2012_v2020_20u1_raster100m/DATA/U2018_CLC2012_V2020_20u1.tif.vat.cpg\"\n", - "
      • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
      • \n", - " type\n", - " \"text/plain\"\n", - "
      • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
      • \n", - " title\n", - " \"Encoding Europe\"\n", - "
      • \n", - " \n", - " \n", - " \n", - " \n", - "
      • \n", - " roles[] 1 items\n", - " \n", - "
          \n", - " \n", - " \n", - " \n", - "
        • \n", - " 0\n", - " \"metadata\"\n", - "
        • \n", - " \n", - " \n", - " \n", - "
        \n", - " \n", - "
      • \n", - " \n", - " \n", - "
      \n", - "
    • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
    • \n", - " U2018_CLC2012_V2020_20u1_tif_vat_dbf\n", - "
        \n", - " \n", - " \n", - " \n", - "
      • \n", - " href\n", - " \"X:/EO/u2018_clc2012_v2020_20u1_raster100m/DATA/U2018_CLC2012_V2020_20u1.tif.vat.dbf\"\n", - "
      • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
      • \n", - " type\n", - " \"application/dbf\"\n", - "
      • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
      • \n", - " title\n", - " \"Database Europe\"\n", - "
      • \n", - " \n", - " \n", - " \n", - " \n", - "
      • \n", - " roles[] 1 items\n", - " \n", - "
          \n", - " \n", - " \n", - " \n", - "
        • \n", - " 0\n", - " \"metadata\"\n", - "
        • \n", - " \n", - " \n", - " \n", - "
        \n", - " \n", - "
      • \n", - " \n", - " \n", - "
      \n", - "
    • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
    • \n", - " U2018_CLC2012_V2020_20u1_tif_xml\n", - "
        \n", - " \n", - " \n", - " \n", - "
      • \n", - " href\n", - " \"X:/EO/u2018_clc2012_v2020_20u1_raster100m/DATA/U2018_CLC2012_V2020_20u1.tif.xml\"\n", - "
      • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
      • \n", - " type\n", - " \"application/xml\"\n", - "
      • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
      • \n", - " title\n", - " \"TIFF Metadata Europe\"\n", - "
      • \n", - " \n", - " \n", - " \n", - " \n", - "
      • \n", - " roles[] 1 items\n", - " \n", - "
          \n", - " \n", - " \n", - " \n", - "
        • \n", - " 0\n", - " \"metadata\"\n", - "
        • \n", - " \n", - " \n", - " \n", - "
        \n", - " \n", - "
      • \n", - " \n", - " \n", - "
      \n", - "
    • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
    • \n", - " CLC2018_CLC2012_V2018_20_tif_lyr\n", - "
        \n", - " \n", - " \n", - " \n", - "
      • \n", - " href\n", - " \"X:/EO/u2018_clc2012_v2020_20u1_raster100m/Legend/CLC2018_CLC2012_V2018_20.tif.lyr\"\n", - "
      • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
      • \n", - " type\n", - " \"image/tiff; application=geotiff; profile=layer\"\n", - "
      • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
      • \n", - " title\n", - " \"Legend Layer Europe\"\n", - "
      • \n", - " \n", - " \n", - " \n", - " \n", - "
      • \n", - " roles[] 1 items\n", - " \n", - "
          \n", - " \n", - " \n", - " \n", - "
        • \n", - " 0\n", - " \"metadata\"\n", - "
        • \n", - " \n", - " \n", - " \n", - "
        \n", - " \n", - "
      • \n", - " \n", - " \n", - "
      \n", - "
    • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
    • \n", - " CLC2018_CLC2012_V2018_20_QGIS_txt\n", - "
        \n", - " \n", - " \n", - " \n", - "
      • \n", - " href\n", - " \"X:/EO/u2018_clc2012_v2020_20u1_raster100m/Legend/CLC2018_CLC2012_V2018_20_QGIS.txt\"\n", - "
      • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
      • \n", - " type\n", - " \"text/plain\"\n", - "
      • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
      • \n", - " title\n", - " \"Legends Europe\"\n", - "
      • \n", - " \n", - " \n", - " \n", - " \n", - "
      • \n", - " roles[] 1 items\n", - " \n", - "
          \n", - " \n", - " \n", - " \n", - "
        • \n", - " 0\n", - " \"metadata\"\n", - "
        • \n", - " \n", - " \n", - " \n", - "
        \n", - " \n", - "
      • \n", - " \n", - " \n", - "
      \n", - "
    • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
    • \n", - " U2018_CLC2012_V2020_20u1_xml\n", - "
        \n", - " \n", - " \n", - " \n", - "
      • \n", - " href\n", - " \"X:/EO/u2018_clc2012_v2020_20u1_raster100m/Metadata/U2018_CLC2012_V2020_20u1.xml\"\n", - "
      • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
      • \n", - " type\n", - " \"application/xml\"\n", - "
      • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
      • \n", - " title\n", - " \"Single Band Land Classification Metadata Europe\"\n", - "
      • \n", - " \n", - " \n", - " \n", - " \n", - "
      • \n", - " roles[] 1 items\n", - " \n", - "
          \n", - " \n", - " \n", - " \n", - "
        • \n", - " 0\n", - " \"metadata\"\n", - "
        • \n", - " \n", - " \n", - " \n", - "
        \n", - " \n", - "
      • \n", - " \n", - " \n", - "
      \n", - "
    • \n", - " \n", - " \n", - " \n", - "
    \n", - "
  • \n", - " \n", - " \n", - " \n", - " \n", - "
  • \n", - " bbox[] 4 items\n", - " \n", - "
      \n", - " \n", - " \n", - " \n", - "
    • \n", - " 0\n", - " -56.50514190170437\n", - "
    • \n", - " \n", - " \n", - " \n", - "
    \n", - " \n", - "
      \n", - " \n", - " \n", - " \n", - "
    • \n", - " 1\n", - " 24.28417701147754\n", - "
    • \n", - " \n", - " \n", - " \n", - "
    \n", - " \n", - "
      \n", - " \n", - " \n", - " \n", - "
    • \n", - " 2\n", - " 72.90613675900903\n", - "
    • \n", - " \n", - " \n", - " \n", - "
    \n", - " \n", - "
      \n", - " \n", - " \n", - " \n", - "
    • \n", - " 3\n", - " 72.63376966542347\n", - "
    • \n", - " \n", - " \n", - " \n", - "
    \n", - " \n", - "
  • \n", - " \n", - " \n", - " \n", - "
  • \n", - " stac_extensions[] 1 items\n", - " \n", - "
      \n", - " \n", - " \n", - " \n", - "
    • \n", - " 0\n", - " \"https://stac-extensions.github.io/projection/v1.1.0/schema.json\"\n", - "
    • \n", - " \n", - " \n", - " \n", - "
    \n", - " \n", - "
  • \n", - " \n", - " \n", - "
\n", - "
\n", - "
" - ], - "text/plain": [ - "" - ] - }, - "execution_count": 31, - "metadata": {}, - "output_type": "execute_result" + "ename": "TypeError", + "evalue": "AssetDefinition.__init__() missing 1 required positional argument: 'properties'", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[1;32mIn[18], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m asset_def \u001b[38;5;241m=\u001b[39m \u001b[43mAssetDefinition\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 3\u001b[0m asset_def\u001b[38;5;241m.\u001b[39mitem_assets(item)\n", + "\u001b[1;31mTypeError\u001b[0m: AssetDefinition.__init__() missing 1 required positional argument: 'properties'" + ] } ], "source": [ - "item" + "asset_def = AssetDefinition()\n", + "asset_def.item_assets(item)\n", + "item.make_asset_hrefs_relative()" ] }, { "cell_type": "code", - "execution_count": 32, - "metadata": {}, - "outputs": [], - "source": [ - "item.save_object(dest_href='testY.json')" - ] - }, - { - "cell_type": "code", - "execution_count": 28, + "execution_count": 19, "metadata": {}, "outputs": [ { - "data": { - "text/plain": [ - "datetime.datetime(2012, 1, 1, 0, 0)" - ] - }, - "execution_count": 28, - "metadata": {}, - "output_type": "execute_result" + "ename": "FileNotFoundError", + "evalue": "[Errno 2] No such file or directory: 'x:\\\\projects\\\\ETC-DI\\\\Task_18\\\\clms-stac\\\\stacs/corine-land-cover-raster/corine-land-cover-raster.json'", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mFileNotFoundError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[1;32mIn[19], line 10\u001b[0m\n\u001b[0;32m 3\u001b[0m COLLECTION_ID \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mcorine-land-cover-raster\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[0;32m 5\u001b[0m CLMS_CATALOG_LINK \u001b[38;5;241m=\u001b[39m pystac\u001b[38;5;241m.\u001b[39mlink\u001b[38;5;241m.\u001b[39mLink(\n\u001b[0;32m 6\u001b[0m rel\u001b[38;5;241m=\u001b[39mpystac\u001b[38;5;241m.\u001b[39mRelType\u001b[38;5;241m.\u001b[39mROOT, target\u001b[38;5;241m=\u001b[39mpystac\u001b[38;5;241m.\u001b[39mSTACObject\u001b[38;5;241m.\u001b[39mfrom_file(os\u001b[38;5;241m.\u001b[39mpath\u001b[38;5;241m.\u001b[39mjoin(WORKING_DIR, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mstacs/clms_catalog.json\u001b[39m\u001b[38;5;124m\"\u001b[39m))\n\u001b[0;32m 7\u001b[0m )\n\u001b[0;32m 8\u001b[0m COLLECTION_LINK \u001b[38;5;241m=\u001b[39m pystac\u001b[38;5;241m.\u001b[39mlink\u001b[38;5;241m.\u001b[39mLink(\n\u001b[0;32m 9\u001b[0m rel\u001b[38;5;241m=\u001b[39mpystac\u001b[38;5;241m.\u001b[39mRelType\u001b[38;5;241m.\u001b[39mCOLLECTION,\n\u001b[1;32m---> 10\u001b[0m target\u001b[38;5;241m=\u001b[39m\u001b[43mpystac\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mSTACObject\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_file\u001b[49m\u001b[43m(\u001b[49m\u001b[43mos\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpath\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mjoin\u001b[49m\u001b[43m(\u001b[49m\u001b[43mWORKING_DIR\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43mf\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mstacs/\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mCOLLECTION_ID\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m/\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mCOLLECTION_ID\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m.json\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m,\n\u001b[0;32m 11\u001b[0m )\n\u001b[0;32m 12\u001b[0m ITEM_PARENT_LINK \u001b[38;5;241m=\u001b[39m pystac\u001b[38;5;241m.\u001b[39mlink\u001b[38;5;241m.\u001b[39mLink(\n\u001b[0;32m 13\u001b[0m rel\u001b[38;5;241m=\u001b[39mpystac\u001b[38;5;241m.\u001b[39mRelType\u001b[38;5;241m.\u001b[39mPARENT,\n\u001b[0;32m 14\u001b[0m target\u001b[38;5;241m=\u001b[39mpystac\u001b[38;5;241m.\u001b[39mSTACObject\u001b[38;5;241m.\u001b[39mfrom_file(os\u001b[38;5;241m.\u001b[39mpath\u001b[38;5;241m.\u001b[39mjoin(WORKING_DIR, \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mstacs/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mCOLLECTION_ID\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mCOLLECTION_ID\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.json\u001b[39m\u001b[38;5;124m\"\u001b[39m)),\n\u001b[0;32m 15\u001b[0m )\n\u001b[0;32m 17\u001b[0m item\u001b[38;5;241m.\u001b[39mset_self_href(os\u001b[38;5;241m.\u001b[39mpath\u001b[38;5;241m.\u001b[39mjoin(WORKING_DIR, \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mSTAC_DIR\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mCOLLECTION_ID\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mitem\u001b[38;5;241m.\u001b[39mid\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mitem\u001b[38;5;241m.\u001b[39mid\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.json\u001b[39m\u001b[38;5;124m\"\u001b[39m))\n", + "File \u001b[1;32mc:\\Users\\boeck\\AppData\\Local\\miniforge3\\envs\\stacdev\\Lib\\site-packages\\pystac\\stac_object.py:619\u001b[0m, in \u001b[0;36mSTACObject.from_file\u001b[1;34m(cls, href, stac_io)\u001b[0m\n\u001b[0;32m 607\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Reads a STACObject implementation from a file.\u001b[39;00m\n\u001b[0;32m 608\u001b[0m \n\u001b[0;32m 609\u001b[0m \u001b[38;5;124;03mArgs:\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 616\u001b[0m \u001b[38;5;124;03m by the JSON read from the file located at HREF.\u001b[39;00m\n\u001b[0;32m 617\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m 618\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mcls\u001b[39m \u001b[38;5;241m==\u001b[39m STACObject:\n\u001b[1;32m--> 619\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m cast(S, \u001b[43mpystac\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread_file\u001b[49m\u001b[43m(\u001b[49m\u001b[43mhref\u001b[49m\u001b[43m)\u001b[49m)\n\u001b[0;32m 621\u001b[0m href \u001b[38;5;241m=\u001b[39m make_posix_style(href)\n\u001b[0;32m 623\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m stac_io \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n", + "File \u001b[1;32mc:\\Users\\boeck\\AppData\\Local\\miniforge3\\envs\\stacdev\\Lib\\site-packages\\pystac\\__init__.py:165\u001b[0m, in \u001b[0;36mread_file\u001b[1;34m(href, stac_io)\u001b[0m\n\u001b[0;32m 163\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m stac_io \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m 164\u001b[0m stac_io \u001b[38;5;241m=\u001b[39m StacIO\u001b[38;5;241m.\u001b[39mdefault()\n\u001b[1;32m--> 165\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mstac_io\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread_stac_object\u001b[49m\u001b[43m(\u001b[49m\u001b[43mhref\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[1;32mc:\\Users\\boeck\\AppData\\Local\\miniforge3\\envs\\stacdev\\Lib\\site-packages\\pystac\\stac_io.py:234\u001b[0m, in \u001b[0;36mStacIO.read_stac_object\u001b[1;34m(self, source, root, *args, **kwargs)\u001b[0m\n\u001b[0;32m 208\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mread_stac_object\u001b[39m(\n\u001b[0;32m 209\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[0;32m 210\u001b[0m source: HREF,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 213\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs: Any,\n\u001b[0;32m 214\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m STACObject:\n\u001b[0;32m 215\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Read a STACObject from a JSON file at the given source.\u001b[39;00m\n\u001b[0;32m 216\u001b[0m \n\u001b[0;32m 217\u001b[0m \u001b[38;5;124;03m See :func:`StacIO.read_text ` for usage of\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 232\u001b[0m \u001b[38;5;124;03m contained in the file at the given uri.\u001b[39;00m\n\u001b[0;32m 233\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[1;32m--> 234\u001b[0m d \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread_json\u001b[49m\u001b[43m(\u001b[49m\u001b[43msource\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 235\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstac_object_from_dict(\n\u001b[0;32m 236\u001b[0m d, href\u001b[38;5;241m=\u001b[39msource, root\u001b[38;5;241m=\u001b[39mroot, preserve_dict\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[0;32m 237\u001b[0m )\n", + "File \u001b[1;32mc:\\Users\\boeck\\AppData\\Local\\miniforge3\\envs\\stacdev\\Lib\\site-packages\\pystac\\stac_io.py:205\u001b[0m, in \u001b[0;36mStacIO.read_json\u001b[1;34m(self, source, *args, **kwargs)\u001b[0m\n\u001b[0;32m 188\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mread_json\u001b[39m(\u001b[38;5;28mself\u001b[39m, source: HREF, \u001b[38;5;241m*\u001b[39margs: Any, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs: Any) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28mdict\u001b[39m[\u001b[38;5;28mstr\u001b[39m, Any]:\n\u001b[0;32m 189\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Read a dict from the given source.\u001b[39;00m\n\u001b[0;32m 190\u001b[0m \n\u001b[0;32m 191\u001b[0m \u001b[38;5;124;03m See :func:`StacIO.read_text ` for usage of\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 203\u001b[0m \u001b[38;5;124;03m given source.\u001b[39;00m\n\u001b[0;32m 204\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[1;32m--> 205\u001b[0m txt \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread_text\u001b[49m\u001b[43m(\u001b[49m\u001b[43msource\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 206\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mjson_loads(txt)\n", + "File \u001b[1;32mc:\\Users\\boeck\\AppData\\Local\\miniforge3\\envs\\stacdev\\Lib\\site-packages\\pystac\\stac_io.py:282\u001b[0m, in \u001b[0;36mDefaultStacIO.read_text\u001b[1;34m(self, source, *_, **__)\u001b[0m\n\u001b[0;32m 277\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"A concrete implementation of :meth:`StacIO.read_text\u001b[39;00m\n\u001b[0;32m 278\u001b[0m \u001b[38;5;124;03m`. Converts the ``source`` argument to a string (if it\u001b[39;00m\n\u001b[0;32m 279\u001b[0m \u001b[38;5;124;03mis not already) and delegates to :meth:`DefaultStacIO.read_text_from_href` for\u001b[39;00m\n\u001b[0;32m 280\u001b[0m \u001b[38;5;124;03mopening and reading the file.\"\"\"\u001b[39;00m\n\u001b[0;32m 281\u001b[0m href \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mstr\u001b[39m(os\u001b[38;5;241m.\u001b[39mfspath(source))\n\u001b[1;32m--> 282\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread_text_from_href\u001b[49m\u001b[43m(\u001b[49m\u001b[43mhref\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[1;32mc:\\Users\\boeck\\AppData\\Local\\miniforge3\\envs\\stacdev\\Lib\\site-packages\\pystac\\stac_io.py:305\u001b[0m, in \u001b[0;36mDefaultStacIO.read_text_from_href\u001b[1;34m(self, href)\u001b[0m\n\u001b[0;32m 303\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCould not read uri \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mhref\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01me\u001b[39;00m\n\u001b[0;32m 304\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m--> 305\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28;43mopen\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mhref\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mencoding\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mutf-8\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mas\u001b[39;00m f:\n\u001b[0;32m 306\u001b[0m href_contents \u001b[38;5;241m=\u001b[39m f\u001b[38;5;241m.\u001b[39mread()\n\u001b[0;32m 307\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m href_contents\n", + "\u001b[1;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: 'x:\\\\projects\\\\ETC-DI\\\\Task_18\\\\clms-stac\\\\stacs/corine-land-cover-raster/corine-land-cover-raster.json'" + ] } ], "source": [ - "datetime(int(year), 1, 1, microsecond=0)" - ] - }, - { - "cell_type": "code", - "execution_count": 62, - "metadata": {}, - "outputs": [], - "source": [ - "# Taken from https://stackoverflow.com/questions/2148119/how-to-convert-an-xml-string-to-a-dictionary\n", - "from xml.etree import cElementTree as ElementTree\n", - "\n", - "\n", - "class XmlListConfig(list):\n", - " def __init__(self, aList):\n", - " for element in aList:\n", - " if element:\n", - " if len(element) == 1 or element[0].tag != element[1].tag:\n", - " self.append(XmlDictConfig(element))\n", - " elif element[0].tag == element[1].tag:\n", - " self.append(XmlListConfig(element))\n", - " elif element.text:\n", - " text = element.text.strip()\n", - " if text:\n", - " self.append(text)\n", + "WORKING_DIR = os.getcwd()\n", + "STAC_DIR = 'stac_tests'\n", + "COLLECTION_ID = 'corine-land-cover-raster'\n", "\n", + "CLMS_CATALOG_LINK = pystac.link.Link(\n", + " rel=pystac.RelType.ROOT, target=pystac.STACObject.from_file(os.path.join(WORKING_DIR, \"stacs/clms_catalog.json\"))\n", + ")\n", + "COLLECTION_LINK = pystac.link.Link(\n", + " rel=pystac.RelType.COLLECTION,\n", + " target=pystac.STACObject.from_file(os.path.join(WORKING_DIR, f\"stacs/{COLLECTION_ID}/{COLLECTION_ID}.json\")),\n", + ")\n", + "ITEM_PARENT_LINK = pystac.link.Link(\n", + " rel=pystac.RelType.PARENT,\n", + " target=pystac.STACObject.from_file(os.path.join(WORKING_DIR, f\"stacs/{COLLECTION_ID}/{COLLECTION_ID}.json\")),\n", + ")\n", "\n", - "class XmlDictConfig(dict):\n", - " def __init__(self, parent_element):\n", - " if parent_element.items():\n", - " self.update(dict(parent_element.items()))\n", - " for element in parent_element:\n", - " if element:\n", - " if len(element) == 1 or element[0].tag != element[1].tag:\n", - " aDict = XmlDictConfig(element)\n", - " else:\n", - " aDict = {element[0].tag: XmlListConfig(element)}\n", - " if element.items():\n", - " aDict.update(dict(element.items()))\n", - " self.update({element.tag: aDict})\n", - " elif element.items():\n", - " self.update({element.tag: dict(element.items())})\n", - " else:\n", - " self.update({element.tag: element.text})\n", - "\n", - "stac_io = pystac.StacIO.default()\n", - "\n", - "def get_metadata(xml: str):\n", - " result = XmlDictConfig(ElementTree.XML(stac_io.read_text(xml)))\n", - " result[\n", - " \"ORIGINAL_URL\"\n", - " ] = xml # Include the original URL in the metadata for use later\n", - " return result" + "item.set_self_href(os.path.join(WORKING_DIR, f\"{STAC_DIR}/{COLLECTION_ID}/{item.id}/{item.id}.json\"))\n", + "item.save_object()" ] }, { "cell_type": "code", - "execution_count": 63, + "execution_count": 62, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "C:\\Users\\boeck\\AppData\\Local\\Temp\\ipykernel_9832\\556696271.py:24: DeprecationWarning: Testing an element's truth value will raise an exception in future versions. Use specific 'len(elem)' or 'elem is not None' test instead.\n", - " if element:\n" - ] - }, - { - "data": { - "text/plain": [ - "{'{http://www.w3.org/2001/XMLSchema-instance}schemaLocation': 'http://www.isotc211.org/2005/gmd http://schemas.opengis.net/csw/2.0.2/profiles/apiso/1.0.0/apiso.xsd',\n", - " '{http://www.isotc211.org/2005/gmd}fileIdentifier': {'{http://www.isotc211.org/2005/gco}CharacterString': '7e162b2d-5196-41b2-b6dd-e889651e2f1f'},\n", - " '{http://www.isotc211.org/2005/gmd}language': {'{http://www.isotc211.org/2005/gmd}LanguageCode': {'codeList': 'http://www.loc.gov/standards/iso639-2/',\n", - " 'codeListValue': 'eng'}},\n", - " '{http://www.isotc211.org/2005/gmd}characterSet': {'{http://www.isotc211.org/2005/gmd}MD_CharacterSetCode': {'codeList': 'http://standards.iso.org/iso/19139/resources/gmxCodelists.xml#MD_CharacterSetCode',\n", - " 'codeListValue': 'utf8'}},\n", - " '{http://www.isotc211.org/2005/gmd}hierarchyLevel': {'{http://www.isotc211.org/2005/gmd}MD_ScopeCode': {'codeList': 'http://standards.iso.org/iso/19139/resources/gmxCodelists.xml#MD_ScopeCode',\n", - " 'codeListValue': 'dataset'}},\n", - " '{http://www.isotc211.org/2005/gmd}contact': {'{http://www.isotc211.org/2005/gmd}CI_ResponsibleParty': {'{http://www.isotc211.org/2005/gmd}organisationName': {'{http://www.isotc211.org/2005/gco}CharacterString': 'European Environment Agency'},\n", - " '{http://www.isotc211.org/2005/gmd}contactInfo': {'{http://www.isotc211.org/2005/gmd}CI_Contact': {'{http://www.isotc211.org/2005/gmd}address': {'{http://www.isotc211.org/2005/gmd}CI_Address': {'{http://www.isotc211.org/2005/gmd}deliveryPoint': {'{http://www.isotc211.org/2005/gco}CharacterString': 'Kongens Nytorv 6'},\n", - " '{http://www.isotc211.org/2005/gmd}city': {'{http://www.isotc211.org/2005/gco}CharacterString': 'Copenhagen'},\n", - " '{http://www.isotc211.org/2005/gmd}administrativeArea': {'{http://www.isotc211.org/2005/gco}CharacterString': 'K'},\n", - " '{http://www.isotc211.org/2005/gmd}postalCode': {'{http://www.isotc211.org/2005/gco}CharacterString': '1050'},\n", - " '{http://www.isotc211.org/2005/gmd}country': {'{http://www.isotc211.org/2005/gco}CharacterString': 'Denmark'},\n", - " '{http://www.isotc211.org/2005/gmd}electronicMailAddress': {'{http://www.isotc211.org/2005/gco}CharacterString': 'sdi@eea.europa.eu'}}}}},\n", - " '{http://www.isotc211.org/2005/gmd}role': {'{http://www.isotc211.org/2005/gmd}CI_RoleCode': {'codeList': 'http://standards.iso.org/iso/19139/resources/gmxCodelists.xml#CI_RoleCode',\n", - " 'codeListValue': 'pointOfContact'}}}},\n", - " '{http://www.isotc211.org/2005/gmd}dateStamp': {'{http://www.isotc211.org/2005/gco}DateTime': '2019-12-18T22:18:54'},\n", - " '{http://www.isotc211.org/2005/gmd}metadataStandardName': {'{http://www.isotc211.org/2005/gco}CharacterString': 'ISO 19115:2003/19139'},\n", - " '{http://www.isotc211.org/2005/gmd}metadataStandardVersion': {'{http://www.isotc211.org/2005/gco}CharacterString': '1.0'},\n", - " '{http://www.isotc211.org/2005/gmd}referenceSystemInfo': {'{http://www.isotc211.org/2005/gmd}MD_ReferenceSystem': {'{http://www.isotc211.org/2005/gmd}referenceSystemIdentifier': {'{http://www.isotc211.org/2005/gmd}RS_Identifier': {'{http://www.isotc211.org/2005/gmd}code': {'{http://www.isotc211.org/2005/gmx}Anchor': {'{http://www.w3.org/1999/xlink}href': 'http://www.opengis.net/def/crs/EPSG/0/3035'}}}}}},\n", - " '{http://www.isotc211.org/2005/gmd}identificationInfo': {'{http://www.isotc211.org/2005/gmd}MD_DataIdentification': {'{http://www.isotc211.org/2005/gmd}citation': {'{http://www.isotc211.org/2005/gmd}CI_Citation': {'{http://www.isotc211.org/2005/gmd}title': {'{http://www.isotc211.org/2005/gco}CharacterString': 'Corine Land Cover (CLC) 2018, Version 2020 20_1'},\n", - " '{http://www.isotc211.org/2005/gmd}date': {'{http://www.isotc211.org/2005/gmd}CI_Date': {'{http://www.isotc211.org/2005/gmd}date': {'{http://www.isotc211.org/2005/gco}Date': '2020-02-24'},\n", - " '{http://www.isotc211.org/2005/gmd}dateType': {'{http://www.isotc211.org/2005/gmd}CI_DateTypeCode': {'codeList': 'http://standards.iso.org/iso/19139/resources/gmxCodelists.xml#CI_DateTypeCode',\n", - " 'codeListValue': 'publication'}}}},\n", - " '{http://www.isotc211.org/2005/gmd}edition': {'{http://www.isotc211.org/2005/gco}CharacterString': '20_1'},\n", - " '{http://www.isotc211.org/2005/gmd}identifier': {'{http://www.isotc211.org/2005/gmd}MD_Identifier': {'{http://www.isotc211.org/2005/gmd}code': {'{http://www.isotc211.org/2005/gco}CharacterString': 'Corine Land Cover (CLC) 2018, Version 2020 20_1'}}}}},\n", - " '{http://www.isotc211.org/2005/gmd}abstract': {'{http://www.isotc211.org/2005/gco}CharacterString': 'CLC2018 is one of the Corine Land Cover (CLC) datasets produced within the frame the Copernicus Land Monitoring Service referring to land cover / land use status of year 2018. CLC service has a long-time heritage (formerly known as \"CORINE Land Cover Programme\"), coordinated by the European Environment Agency (EEA). It provides consistent and thematically detailed information on land cover and land cover changes across Europe. \\nCLC datasets are based on the classification of satellite images produced by the national teams of the participating countries - the EEA members and cooperating countries (EEA39). National CLC inventories are then further integrated into a seamless land cover map of Europe. The resulting European database relies on standard methodology and nomenclature with following base parameters: 44 classes in the hierarchical 3-level CLC nomenclature; minimum mapping unit (MMU) for status layers is 25 hectares; minimum width of linear elements is 100 metres. Change layers have higher resolution, i.e. minimum mapping unit (MMU) is 5 hectares for Land Cover Changes (LCC), and the minimum width of linear elements is 100 metres. The CLC service delivers important data sets supporting the implementation of key priority areas of the Environment Action Programmes of the European Union as e.g. protecting ecosystems, halting the loss of biological diversity, tracking the impacts of climate change, monitoring urban land take, assessing developments in agriculture or dealing with water resources directives. CLC belongs to the Pan-European component of the Copernicus Land Monitoring Service (https://land.copernicus.eu/), part of the European Copernicus Programme coordinated by the European Environment Agency, providing environmental information from a combination of air- and space-based observation systems and in-situ monitoring. Additional information about CLC product description including mapping guides can be found at https://land.copernicus.eu/user-corner/technical-library/clc2018technicalguidelines_final.pdf. CLC class descriptions can be found at https://land.copernicus.eu/user-corner/technical-library/corine-land-cover-nomenclature-guidelines/html/.'},\n", - " '{http://www.isotc211.org/2005/gmd}pointOfContact': {'{http://www.isotc211.org/2005/gmd}CI_ResponsibleParty': {'{http://www.isotc211.org/2005/gmd}individualName': {'{http://www.isotc211.org/2005/gco}nilReason': 'missing',\n", - " '{http://www.isotc211.org/2005/gco}CharacterString': None},\n", - " '{http://www.isotc211.org/2005/gmd}organisationName': {'{http://www.isotc211.org/2005/gco}nilReason': 'missing',\n", - " '{http://www.isotc211.org/2005/gco}CharacterString': None},\n", - " '{http://www.isotc211.org/2005/gmd}positionName': {'{http://www.isotc211.org/2005/gco}nilReason': 'missing',\n", - " '{http://www.isotc211.org/2005/gco}CharacterString': None},\n", - " '{http://www.isotc211.org/2005/gmd}contactInfo': {'{http://www.isotc211.org/2005/gmd}CI_Contact': {'{http://www.isotc211.org/2005/gmd}phone': {'{http://www.isotc211.org/2005/gmd}CI_Telephone': {'{http://www.isotc211.org/2005/gmd}voice': {'{http://www.isotc211.org/2005/gco}nilReason': 'missing',\n", - " '{http://www.isotc211.org/2005/gco}CharacterString': None},\n", - " '{http://www.isotc211.org/2005/gmd}facsimile': {'{http://www.isotc211.org/2005/gco}nilReason': 'missing',\n", - " '{http://www.isotc211.org/2005/gco}CharacterString': None}}},\n", - " '{http://www.isotc211.org/2005/gmd}address': {'{http://www.isotc211.org/2005/gmd}CI_Address': {'{http://www.isotc211.org/2005/gmd}deliveryPoint': {'{http://www.isotc211.org/2005/gco}nilReason': 'missing',\n", - " '{http://www.isotc211.org/2005/gco}CharacterString': None},\n", - " '{http://www.isotc211.org/2005/gmd}city': {'{http://www.isotc211.org/2005/gco}nilReason': 'missing',\n", - " '{http://www.isotc211.org/2005/gco}CharacterString': None},\n", - " '{http://www.isotc211.org/2005/gmd}administrativeArea': {'{http://www.isotc211.org/2005/gco}nilReason': 'missing',\n", - " '{http://www.isotc211.org/2005/gco}CharacterString': None},\n", - " '{http://www.isotc211.org/2005/gmd}postalCode': {'{http://www.isotc211.org/2005/gco}nilReason': 'missing',\n", - " '{http://www.isotc211.org/2005/gco}CharacterString': None},\n", - " '{http://www.isotc211.org/2005/gmd}country': {'{http://www.isotc211.org/2005/gco}nilReason': 'missing',\n", - " '{http://www.isotc211.org/2005/gco}CharacterString': None},\n", - " '{http://www.isotc211.org/2005/gmd}electronicMailAddress': {'{http://www.isotc211.org/2005/gco}nilReason': 'missing',\n", - " '{http://www.isotc211.org/2005/gco}CharacterString': None}}}}},\n", - " '{http://www.isotc211.org/2005/gmd}role': {'{http://www.isotc211.org/2005/gmd}CI_RoleCode': {'codeList': 'http://standards.iso.org/iso/19139/resources/gmxCodelists.xml#CI_RoleCode',\n", - " 'codeListValue': 'resourceProvider'}}}},\n", - " '{http://www.isotc211.org/2005/gmd}resourceMaintenance': {'{http://www.isotc211.org/2005/gmd}MD_MaintenanceInformation': {'{http://www.isotc211.org/2005/gmd}maintenanceAndUpdateFrequency': {'{http://www.isotc211.org/2005/gmd}MD_MaintenanceFrequencyCode': {'codeList': 'http://standards.iso.org/iso/19139/resources/gmxCodelists.xml#MD_MaintenanceFrequencyCode',\n", - " 'codeListValue': ''}}}},\n", - " '{http://www.isotc211.org/2005/gmd}graphicOverview': {'{http://www.isotc211.org/2005/gmd}MD_BrowseGraphic': {'{http://www.isotc211.org/2005/gmd}fileName': {'{http://www.isotc211.org/2005/gco}CharacterString': 'https://sdi.eea.europa.eu/public/catalogue-graphic-overview/blank.png'}}},\n", - " '{http://www.isotc211.org/2005/gmd}descriptiveKeywords': {'{http://www.isotc211.org/2005/gmd}MD_Keywords': {'{http://www.isotc211.org/2005/gmd}keyword': {'{http://www.isotc211.org/2005/gco}CharacterString': 'Albania, Austria, Belgium, Bosnia and Herzegovina, Bulgaria, Croatia, Cyprus, Czech Republic, Denmark, Estonia, Finland, France, Germany, Greece, Hungary, Iceland, Ireland, Italy, Kosovo, Latvia, Liechtenstein, Lithuania, Luxembourg, Malta, Montenegro, Netherlands, North Macedonia, Norway, Poland, Portugal, Romania, Serbia, Slovakia, Slovenia, Spain, Sweden, Switzerland, Turkey, United Kingdom'},\n", - " '{http://www.isotc211.org/2005/gmd}type': {'{http://www.isotc211.org/2005/gmd}MD_KeywordTypeCode': {'codeList': 'http://standards.iso.org/iso/19139/resources/gmxCodelists.xml#MD_KeywordTypeCode',\n", - " 'codeListValue': 'place'}},\n", - " '{http://www.isotc211.org/2005/gmd}thesaurusName': {'{http://www.isotc211.org/2005/gmd}CI_Citation': {'{http://www.isotc211.org/2005/gmd}title': {'{http://www.isotc211.org/2005/gco}CharacterString': 'Continents, countries, sea regions of the world'},\n", - " '{http://www.isotc211.org/2005/gmd}date': {'{http://www.isotc211.org/2005/gmd}CI_Date': {'{http://www.isotc211.org/2005/gmd}date': {'{http://www.isotc211.org/2005/gco}Date': '2015-07-17'},\n", - " '{http://www.isotc211.org/2005/gmd}dateType': {'{http://www.isotc211.org/2005/gmd}CI_DateTypeCode': {'codeList': 'http://standards.iso.org/iso/19139/resources/gmxCodelists.xml#CI_DateTypeCode',\n", - " 'codeListValue': 'publication'}}}},\n", - " '{http://www.isotc211.org/2005/gmd}identifier': {'{http://www.isotc211.org/2005/gmd}MD_Identifier': {'{http://www.isotc211.org/2005/gmd}code': {'{http://www.isotc211.org/2005/gmx}Anchor': {'{http://www.w3.org/1999/xlink}href': 'http://sdi.eea.europa.eu/editor-catalogue/srv/eng/thesaurus.download?ref=external.place.regions'}}}}}}}},\n", - " '{http://www.isotc211.org/2005/gmd}resourceConstraints': {'{http://www.isotc211.org/2005/gmd}MD_LegalConstraints': {'{http://www.isotc211.org/2005/gmd}useConstraints': {'{http://www.isotc211.org/2005/gmd}MD_RestrictionCode': {'codeList': 'http://standards.iso.org/iso/19139/resources/gmxCodelists.xml#MD_RestrictionCode',\n", - " 'codeListValue': 'otherRestrictions'}}}},\n", - " '{http://www.isotc211.org/2005/gmd}language': {'{http://www.isotc211.org/2005/gmd}LanguageCode': {'codeList': 'http://www.loc.gov/standards/iso639-2/',\n", - " 'codeListValue': 'eng'}},\n", - " '{http://www.isotc211.org/2005/gmd}topicCategory': {'{http://www.isotc211.org/2005/gmd}MD_TopicCategoryCode': 'imageryBaseMapsEarthCover'},\n", - " '{http://www.isotc211.org/2005/gmd}extent': {'{http://www.isotc211.org/2005/gmd}EX_Extent': {'{http://www.isotc211.org/2005/gmd}temporalElement': None}}}},\n", - " '{http://www.isotc211.org/2005/gmd}distributionInfo': {'{http://www.isotc211.org/2005/gmd}MD_Distribution': {'{http://www.isotc211.org/2005/gmd}distributionFormat': {'{http://www.isotc211.org/2005/gmd}MD_Format': {'{http://www.isotc211.org/2005/gmd}name': {'{http://www.isotc211.org/2005/gco}nilReason': 'missing',\n", - " '{http://www.isotc211.org/2005/gco}CharacterString': None},\n", - " '{http://www.isotc211.org/2005/gmd}version': {'{http://www.isotc211.org/2005/gco}nilReason': 'unknown',\n", - " '{http://www.isotc211.org/2005/gco}CharacterString': None}}},\n", - " '{http://www.isotc211.org/2005/gmd}distributor': None,\n", - " '{http://www.isotc211.org/2005/gmd}transferOptions': {'{http://www.isotc211.org/2005/gmd}MD_DigitalTransferOptions': None}}},\n", - " '{http://www.isotc211.org/2005/gmd}dataQualityInfo': {'{http://www.isotc211.org/2005/gmd}DQ_DataQuality': {'{http://www.isotc211.org/2005/gmd}scope': {'{http://www.isotc211.org/2005/gmd}DQ_Scope': {'{http://www.isotc211.org/2005/gmd}level': {'{http://www.isotc211.org/2005/gmd}MD_ScopeCode': {'codeListValue': 'dataset',\n", - " 'codeList': 'http://standards.iso.org/iso/19139/resources/gmxCodelists.xml#MD_ScopeCode'}}}},\n", - " '{http://www.isotc211.org/2005/gmd}report': {'{http://www.isotc211.org/2005/gmd}DQ_DomainConsistency': {'{http://www.isotc211.org/2005/gmd}result': {'{http://www.isotc211.org/2005/gmd}DQ_ConformanceResult': {'{http://www.isotc211.org/2005/gmd}specification': {'{http://www.isotc211.org/2005/gmd}CI_Citation': {'{http://www.isotc211.org/2005/gmd}title': {'{http://www.isotc211.org/2005/gco}CharacterString': 'Commission Regulation (EU) No 1089/2010 of 23 November 2010 implementing Directive 2007/2/EC of the European Parliament and of the Council as regards interoperability of spatial data sets and services'},\n", - " '{http://www.isotc211.org/2005/gmd}date': {'{http://www.isotc211.org/2005/gmd}CI_Date': {'{http://www.isotc211.org/2005/gmd}date': {'{http://www.isotc211.org/2005/gco}Date': '2010-12-08'},\n", - " '{http://www.isotc211.org/2005/gmd}dateType': {'{http://www.isotc211.org/2005/gmd}CI_DateTypeCode': {'codeList': 'http://standards.iso.org/iso/19139/resources/gmxCodelists.xml#CI_DateTypeCode',\n", - " 'codeListValue': 'publication'}}}}}},\n", - " '{http://www.isotc211.org/2005/gmd}explanation': {'{http://www.isotc211.org/2005/gco}CharacterString': 'See the referenced specification'},\n", - " '{http://www.isotc211.org/2005/gmd}pass': {'{http://www.isotc211.org/2005/gco}nilReason': 'unknown'}}}}},\n", - " '{http://www.isotc211.org/2005/gmd}lineage': {'{http://www.isotc211.org/2005/gmd}LI_Lineage': {'{http://www.isotc211.org/2005/gmd}statement': {'{http://www.isotc211.org/2005/gco}CharacterString': 'CLC products are based in majority of EEA39 countries on the photointerpretation of satellite images by the national teams of the participating countries - the EEA member and cooperating countries. All features in original vector database are delineated and classified on satellite images according to CLC specifications i.e. with better than 100 m positional accuracy and 25 ha minimum mapping unit (5 ha MMU for change layer) into the standardized CLC nomenclature (44 CLC classes). The change layer is derived from satellite imagery by direct mapping of changes taken place between two consecutive inventories, based on image-to-image comparison. Some countries follow alternative approaches by utilizing semiautomatic methodology e.g. generalisation of higher resolution national datasets. Production of national CLC inventories is supported by training and is under systematic control of the CLC Technical Team, both for thematic and semantic aspects, to assure harmonized European products. The process of European data integration starts when national deliveries are accepted and the Database Acceptance Report (DBTA) issued. National CLC data are then transformed into the common European reference (ETRS89/LAEA) and pan-European seamless dataset is produced. Integration step includes also harmonization of database along country borders. Rigorous harmonization of country borders has been done only for CLC2000 and CHA9000 layers (in 2 km wide strips along borders) as part of CLC2000 update. Currently, only simplified harmonisation is applied i.e. small border polygons (area < 5 ha) are generalised according to predefined rules to largest and/or thematically most similar neighbour, sliver polygons along borders (< 0.1 ha) are eliminated. European Corine Land Cover seamless database represents the final product of European data integration. Some artificial lines (dividing polygons with the same code) can be still present in database due to technical constraints of current ArcGIS technology and complexity of dataset (adaptive tiling) but this has no impact on dataset contents and can be dissolved for smaller data extracts.\\nRevised versions\\nStarted from the publication of CLC2006 (Version 16) the previous inventory is substituted by its revised version by most of the participating countries (see CLC seamless data coverage table https://land.copernicus.eu/user-corner/technical-library/clc-country-coverage-1990-2018-v20_1). However, due to their specific methodology not all countries are producing revised version of the previous inventory. The revision of previous CLC layer is a “by-product” of the standard updating process, including corrections to the original data identified during the update. Revisions (correcting mistakes) are needed because of the following factors: \\n- availability of higher resolution satellite imagery;\\n- a new satellite image or time series of satellite imagery provides additional key to correctly recognise a feature;\\n- improved availability and better quality of in-situ data;\\n- improved skills of experts, i.e. better understanding and application of CLC nomenclature;\\n- decision of the national team to improve the product between two inventories.\\n\\nThese revisions are not propagated backward to older datasets (e.g. during CLC2018 revision of CLC2012 might be provided, but the older datasets were not revised). Thus, consecutive inventories might include not only real changes, but also differences due to revisions. Therefore, it is recommended that in time series analysis CLC-Change layers should be used. If status layers from past are needed, these could be derived backward from deducting CLC-Change layers from the latest (best available) status layer as it is done for EEA accounting layers (see at https://www.eea.europa.eu/data-and-maps/data/corine-land-cover-accounting-layers)\\nMore details to be available soon in upcoming \"Users\\' Manual for all Copernicus data” document. \\nVersion 20_1\\nRelease date: 24-02-2020\\nFile naming conventions simplified and better described. New file naming convention has been introduced based on user feedback on version 20. Filename is composed of combination of information about update campaign, data theme and reference year and version specification (including release year and release number). \\nSee https://land.copernicus.eu/user-corner/technical-library/clc-file-naming-conventions-guide-v20_1 for details.\\n\\nThe French DOMs are provided in separate databases (files both for vector and raster version of data).\\n\\nAll raster layers are back in 8 bit GeoTIFF. Modification is introduced based on the user feedback on version 20. In order to keep 8 bit resolution for raster change layers, they are divided into two files - representing consumption (from) and formation (to) part of change.\\n\\nSee https://land.copernicus.eu/user-corner/technical-library/clc-country-coverage-1990-2018-v20_1 for full information about the coverage of this version.\\nSee http://land.copernicus.eu/user-corner/technical-library/clc-and-clcc-release-lineage for full information about lineage history.'}}}}},\n", - " 'ORIGINAL_URL': '../CLC_samples/U2018_CLC2018_V2020_20u1.xml'}" - ] - }, - "execution_count": 63, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "xml_path = '../CLC_samples/U2018_CLC2018_V2020_20u1.xml'\n", - "\n", - "get_metadata(xml_path)\n" + "# # Taken from https://stackoverflow.com/questions/2148119/how-to-convert-an-xml-string-to-a-dictionary\n", + "# from xml.etree import cElementTree as ElementTree\n", + "\n", + "\n", + "# class XmlListConfig(list):\n", + "# def __init__(self, aList):\n", + "# for element in aList:\n", + "# if element:\n", + "# if len(element) == 1 or element[0].tag != element[1].tag:\n", + "# self.append(XmlDictConfig(element))\n", + "# elif element[0].tag == element[1].tag:\n", + "# self.append(XmlListConfig(element))\n", + "# elif element.text:\n", + "# text = element.text.strip()\n", + "# if text:\n", + "# self.append(text)\n", + "\n", + "\n", + "# class XmlDictConfig(dict):\n", + "# def __init__(self, parent_element):\n", + "# if parent_element.items():\n", + "# self.update(dict(parent_element.items()))\n", + "# for element in parent_element:\n", + "# if element:\n", + "# if len(element) == 1 or element[0].tag != element[1].tag:\n", + "# aDict = XmlDictConfig(element)\n", + "# else:\n", + "# aDict = {element[0].tag: XmlListConfig(element)}\n", + "# if element.items():\n", + "# aDict.update(dict(element.items()))\n", + "# self.update({element.tag: aDict})\n", + "# elif element.items():\n", + "# self.update({element.tag: dict(element.items())})\n", + "# else:\n", + "# self.update({element.tag: element.text})\n", + "\n", + "# stac_io = pystac.StacIO.default()\n", + "\n", + "# def get_metadata(xml: str):\n", + "# result = XmlDictConfig(ElementTree.XML(stac_io.read_text(xml)))\n", + "# result[\n", + "# \"ORIGINAL_URL\"\n", + "# ] = xml # Include the original URL in the metadata for use later\n", + "# return result\n", + "\n", + "\n", + "# xml_path = '../CLC_samples/U2018_CLC2018_V2020_20u1.xml'\n", + "\n", + "# get_metadata(xml_path)\n" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { From 6f8e1aa75f6616540d220f2384193804d5ad2486 Mon Sep 17 00:00:00 2001 From: Sebastian Boeck Date: Tue, 30 Apr 2024 16:09:50 +0200 Subject: [PATCH 25/80] separate collection development notebook --- clms_collection_generator.ipynb | 104 ++++++++++++++++++++++++++++++++ 1 file changed, 104 insertions(+) create mode 100644 clms_collection_generator.ipynb diff --git a/clms_collection_generator.ipynb b/clms_collection_generator.ipynb new file mode 100644 index 0000000..b0ad047 --- /dev/null +++ b/clms_collection_generator.ipynb @@ -0,0 +1,104 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import re\n", + "\n", + "import pystac\n", + "import pystac.item\n", + "import pystac.link\n", + "from pystac.provider import ProviderRole\n", + "from pystac.extensions.projection import ProjectionExtension\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "WORKING_DIR = os.getcwd()\n", + "STAC_DIR = 'stac_tests'\n", + "COLLECTION_ID = 'corine-land-cover-raster'\n", + "COLLECTION_TITLE = \"CORINE Land Cover Raster\"\n", + "COLLECTION_DESCRIPTION = (\"The European Commission launched the CORINE (Coordination of Information on the Environment) \"\n", + " \"program in an effort to develop a standardized methodology for producing continent-scale land \"\n", + " \"cover, biotope, and air quality maps. The CORINE Land Cover (CLC) product offers a pan-European \"\n", + " \"land cover and land use inventory with 44 thematic classes, ranging from broad forested areas \"\n", + " \"to individual vineyards.\")\n", + "\n", + "\n", + "\n", + "# CLMS_CATALOG_LINK = pystac.link.Link(\n", + "# rel=pystac.RelType.ROOT, target=pystac.STACObject.from_file(os.path.join(WORKING_DIR, \"stacs/clms_catalog.json\"))\n", + "# )\n", + "# COLLECTION_LINK = pystac.link.Link(\n", + "# rel=pystac.RelType.COLLECTION,\n", + "# target=pystac.STACObject.from_file(os.path.join(WORKING_DIR, f\"stacs/{COLLECTION_ID}/{COLLECTION_ID}.json\")),\n", + "# )\n", + "# ITEM_PARENT_LINK = pystac.link.Link(\n", + "# rel=pystac.RelType.PARENT,\n", + "# target=pystac.STACObject.from_file(os.path.join(WORKING_DIR, f\"stacs/{COLLECTION_ID}/{COLLECTION_ID}.json\")),\n", + "# )" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "sp_extent = pystac.SpatialExtent([None, None, None, None])\n", + "tmp_extent = pystac.TemporalExtent([\"1990-01-01T00:00:00.000Z\", None])\n", + "extent = pystac.Extent(sp_extent, tmp_extent)\n", + "\n", + "collection = pystac.Collection(id=COLLECTION_ID,\n", + " description=COLLECTION_DESCRIPTION,\n", + " title=COLLECTION_TITLE,\n", + " extent=extent,\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "stacdev", + "language": "python", + "name": "stacdev" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From c5949ff3d47070f4849d1564e0459aa1682c6889 Mon Sep 17 00:00:00 2001 From: Sebastian Boeck Date: Tue, 30 Apr 2024 16:54:50 +0200 Subject: [PATCH 26/80] cosmetic --- clms_item_generator.ipynb | 114 +++++++++++++++++++------------------- 1 file changed, 57 insertions(+), 57 deletions(-) diff --git a/clms_item_generator.ipynb b/clms_item_generator.ipynb index 266c0c5..b7ddd67 100644 --- a/clms_item_generator.ipynb +++ b/clms_item_generator.ipynb @@ -169,64 +169,8 @@ "def project_bbox(img, target_epsg=4326):\n", " target_crs = rio.crs.CRS.from_epsg(target_epsg)\n", " bbox_warped = rio.warp.transform_bounds(img.crs, target_crs, *img.bounds)\n", - " return(bbox_warped)" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['X:/EO/u2018_clc2012_v2020_20u1_raster100m\\\\DATA\\\\U2018_CLC2012_V2020_20u1.tif',\n", - " 'X:/EO/u2018_clc2012_v2020_20u1_raster100m\\\\DATA\\\\French_DOMs\\\\U2018_CLC2012_V2020_20u1_FR_GLP.tif',\n", - " 'X:/EO/u2018_clc2012_v2020_20u1_raster100m\\\\DATA\\\\French_DOMs\\\\U2018_CLC2012_V2020_20u1_FR_GUF.tif',\n", - " 'X:/EO/u2018_clc2012_v2020_20u1_raster100m\\\\DATA\\\\French_DOMs\\\\U2018_CLC2012_V2020_20u1_FR_MTQ.tif',\n", - " 'X:/EO/u2018_clc2012_v2020_20u1_raster100m\\\\DATA\\\\French_DOMs\\\\U2018_CLC2012_V2020_20u1_FR_MYT.tif',\n", - " 'X:/EO/u2018_clc2012_v2020_20u1_raster100m\\\\DATA\\\\French_DOMs\\\\U2018_CLC2012_V2020_20u1_FR_REU.tif']" - ] - }, - "execution_count": 24, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "root = 'X:/EO/u2018_clc2012_v2020_20u1_raster100m'\n", - "\n", - "img_paths = get_img_paths(path=root)\n", - "\n", - "# for img_path in img_paths:\n", + " return(bbox_warped)\n", "\n", - "img_path = img_paths[0]\n", - "img_paths" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'update_campaign': '2018', 'theme': 'CLC', 'reference_year': '2012', 'release_year': '2020', 'release_number': '20u1', 'country_code': '', 'DOM_code': '', 'id': 'U2018_CLC2012_V2020_20u1', 'suffix': 'tif'}\n" - ] - } - ], - "source": [ - "print(clc_name_elements)" - ] - }, - { - "cell_type": "code", - "execution_count": 33, - "metadata": {}, - "outputs": [], - "source": [ "def create_item(img_path):\n", "\n", " clc_name_elements = deconstruct_clc_name(img_path)\n", @@ -316,6 +260,62 @@ " return(item)" ] }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['X:/EO/u2018_clc2012_v2020_20u1_raster100m\\\\DATA\\\\U2018_CLC2012_V2020_20u1.tif',\n", + " 'X:/EO/u2018_clc2012_v2020_20u1_raster100m\\\\DATA\\\\French_DOMs\\\\U2018_CLC2012_V2020_20u1_FR_GLP.tif',\n", + " 'X:/EO/u2018_clc2012_v2020_20u1_raster100m\\\\DATA\\\\French_DOMs\\\\U2018_CLC2012_V2020_20u1_FR_GUF.tif',\n", + " 'X:/EO/u2018_clc2012_v2020_20u1_raster100m\\\\DATA\\\\French_DOMs\\\\U2018_CLC2012_V2020_20u1_FR_MTQ.tif',\n", + " 'X:/EO/u2018_clc2012_v2020_20u1_raster100m\\\\DATA\\\\French_DOMs\\\\U2018_CLC2012_V2020_20u1_FR_MYT.tif',\n", + " 'X:/EO/u2018_clc2012_v2020_20u1_raster100m\\\\DATA\\\\French_DOMs\\\\U2018_CLC2012_V2020_20u1_FR_REU.tif']" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "root = 'X:/EO/u2018_clc2012_v2020_20u1_raster100m'\n", + "\n", + "img_paths = get_img_paths(path=root)\n", + "\n", + "# for img_path in img_paths:\n", + "\n", + "img_path = img_paths[0]\n", + "img_paths" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'update_campaign': '2018', 'theme': 'CLC', 'reference_year': '2012', 'release_year': '2020', 'release_number': '20u1', 'country_code': '', 'DOM_code': '', 'id': 'U2018_CLC2012_V2020_20u1', 'suffix': 'tif'}\n" + ] + } + ], + "source": [ + "print(clc_name_elements)" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [], + "source": [] + }, { "cell_type": "code", "execution_count": 32, From ddcbf93607be5a16ad2ce2fde96600fae426fba7 Mon Sep 17 00:00:00 2001 From: Sebastian Boeck Date: Tue, 30 Apr 2024 17:13:22 +0200 Subject: [PATCH 27/80] changed directory structure --- .../clc/clms_collection_generator.ipynb | 0 .../clc/clms_item_generator.ipynb | 0 .../U2018_CLC2012_V2020_20u1.json | 176 ++++++++++++++++++ 3 files changed, 176 insertions(+) rename clms_collection_generator.ipynb => scripts/clc/clms_collection_generator.ipynb (100%) rename clms_item_generator.ipynb => scripts/clc/clms_item_generator.ipynb (100%) create mode 100644 stac_tests/corine-land-cover-raster/U2018_CLC2012_V2020_20u1/U2018_CLC2012_V2020_20u1.json diff --git a/clms_collection_generator.ipynb b/scripts/clc/clms_collection_generator.ipynb similarity index 100% rename from clms_collection_generator.ipynb rename to scripts/clc/clms_collection_generator.ipynb diff --git a/clms_item_generator.ipynb b/scripts/clc/clms_item_generator.ipynb similarity index 100% rename from clms_item_generator.ipynb rename to scripts/clc/clms_item_generator.ipynb diff --git a/stac_tests/corine-land-cover-raster/U2018_CLC2012_V2020_20u1/U2018_CLC2012_V2020_20u1.json b/stac_tests/corine-land-cover-raster/U2018_CLC2012_V2020_20u1/U2018_CLC2012_V2020_20u1.json new file mode 100644 index 0000000..54f0a81 --- /dev/null +++ b/stac_tests/corine-land-cover-raster/U2018_CLC2012_V2020_20u1/U2018_CLC2012_V2020_20u1.json @@ -0,0 +1,176 @@ +{ + "type": "Feature", + "stac_version": "1.0.0", + "id": "U2018_CLC2012_V2020_20u1", + "properties": { + "description": "Corine Land Cover 2012 (CLC2012) is one of the Corine Land Cover (CLC) datasets produced within the frame the Copernicus Land Monitoring Service referring to land cover / land use status of year 2012. CLC service has a long-time heritage (formerly known as \"CORINE Land Cover Programme\"), coordinated by the European Environment Agency (EEA). It provides consistent and thematically detailed information on land cover and land cover changes across Europe. CLC datasets are based on the classification of satellite images produced by the national teams of the participating countries - the EEA members and cooperating countries (EEA39). National CLC inventories are then further integrated into a seamless land cover map of Europe. The resulting European database relies on standard methodology and nomenclature with following base parameters: 44 classes in the hierarchical 3-level CLC nomenclature; minimum mapping unit (MMU) for status layers is 25 hectares; minimum width of linear elements is 100 metres. Change layers have higher resolution, i.e. minimum mapping unit (MMU) is 5 hectares for Land Cover Changes (LCC), and the minimum width of linear elements is 100 metres. The CLC service delivers important data sets supporting the implementation of key priority areas of the Environment Action Programmes of the European Union as e.g. protecting ecosystems, halting the loss of biological diversity, tracking the impacts of climate change, monitoring urban land take, assessing developments in agriculture or dealing with water resources directives. CLC belongs to the Pan-European component of the Copernicus Land Monitoring Service (https://land.copernicus.eu/), part of the European Copernicus Programme coordinated by the European Environment Agency, providing environmental information from a combination of air- and space-based observation systems and in-situ monitoring. Additional information about CLC product description including mapping guides can be found at https://land.copernicus.eu/user-corner/technical-library/. CLC class descriptions can be found at https://land.copernicus.eu/user-corner/technical-library/corine-land-cover-nomenclature-guidelines/html/.", + "created": null, + "providers": { + "name": "Copernicus Land Monitoring Service", + "description": "The Copernicus Land Monitoring Service provides geographical information on land cover and its changes, land use, ground motions, vegetation state, water cycle and Earth's surface energy variables to a broad range of users in Europe and across the World in the field of environmental terrestrial applications.", + "roles": [ + "licensor", + "host" + ], + "url": "https://land.copernicus.eu" + }, + "start_datetime": "2012-01-01T00:00:00Z", + "end_datetime": "2012-12-31T00:00:00Z", + "datetime": null + }, + "geometry": { + "type": "Polygon", + "coordinates": [ + [ + [ + 72.90613675900903, + 24.28417701147754 + ], + [ + 72.90613675900903, + 72.63376966542347 + ], + [ + -56.50514190170437, + 72.63376966542347 + ], + [ + -56.50514190170437, + 24.28417701147754 + ], + [ + 72.90613675900903, + 24.28417701147754 + ] + ] + ] + }, + "links": [ + { + "rel": "LICENSE", + "href": "https://land.copernicus.eu/en/data-policy" + }, + { + "rel": "self", + "href": "x:/projects/ETC-DI/Task_18/clms-stac/stac_tests/corine-land-cover-raster/U2018_CLC2012_V2020_20u1/U2018_CLC2012_V2020_20u1.json", + "type": "application/json" + } + ], + "assets": { + "U2018_CLC2012_V2020_20u1_tfw": { + "href": "X:/EO/u2018_clc2012_v2020_20u1_raster100m/DATA/U2018_CLC2012_V2020_20u1.tfw", + "type": "text/plain", + "title": "World File Europe", + "roles": [ + "metadata" + ] + }, + "U2018_CLC2012_V2020_20u1_tif": { + "href": "X:/EO/u2018_clc2012_v2020_20u1_raster100m/DATA/U2018_CLC2012_V2020_20u1.tif", + "type": "image/tiff; application=geotiff; profile=cloud-optimized", + "title": "Single Band Land Classification Europe", + "proj:epsg": 3035, + "proj:bbox": [ + 900000.0, + 900000.0, + 7400000.0, + 5500000.0 + ], + "proj:shape": [ + 46000, + 65000 + ], + "proj:transform": [ + 100.0, + 0.0, + 900000.0, + 0.0, + -100.0, + 5500000.0, + 0.0, + 0.0, + 1.0, + 0.0, + 0.0, + 1.0 + ], + "roles": [ + "data", + "visual" + ] + }, + "U2018_CLC2012_V2020_20u1_tif_aux_xml": { + "href": "X:/EO/u2018_clc2012_v2020_20u1_raster100m/DATA/U2018_CLC2012_V2020_20u1.tif.aux.xml", + "type": "application/xml", + "title": "TIFF Statistics Europe", + "roles": [ + "metadata" + ] + }, + "U2018_CLC2012_V2020_20u1_tif_ovr": { + "href": "X:/EO/u2018_clc2012_v2020_20u1_raster100m/DATA/U2018_CLC2012_V2020_20u1.tif.ovr", + "type": "image/tiff; application=geotiff; profile=pyramid", + "title": "Pyramid Europe", + "roles": [ + "metadata" + ] + }, + "U2018_CLC2012_V2020_20u1_tif_vat_cpg": { + "href": "X:/EO/u2018_clc2012_v2020_20u1_raster100m/DATA/U2018_CLC2012_V2020_20u1.tif.vat.cpg", + "type": "text/plain", + "title": "Encoding Europe", + "roles": [ + "metadata" + ] + }, + "U2018_CLC2012_V2020_20u1_tif_vat_dbf": { + "href": "X:/EO/u2018_clc2012_v2020_20u1_raster100m/DATA/U2018_CLC2012_V2020_20u1.tif.vat.dbf", + "type": "application/dbf", + "title": "Database Europe", + "roles": [ + "metadata" + ] + }, + "U2018_CLC2012_V2020_20u1_tif_xml": { + "href": "X:/EO/u2018_clc2012_v2020_20u1_raster100m/DATA/U2018_CLC2012_V2020_20u1.tif.xml", + "type": "application/xml", + "title": "TIFF Metadata Europe", + "roles": [ + "metadata" + ] + }, + "CLC2018_CLC2012_V2018_20_tif_lyr": { + "href": "X:/EO/u2018_clc2012_v2020_20u1_raster100m/Legend/CLC2018_CLC2012_V2018_20.tif.lyr", + "type": "image/tiff; application=geotiff; profile=layer", + "title": "Legend Layer Europe", + "roles": [ + "metadata" + ] + }, + "CLC2018_CLC2012_V2018_20_QGIS_txt": { + "href": "X:/EO/u2018_clc2012_v2020_20u1_raster100m/Legend/CLC2018_CLC2012_V2018_20_QGIS.txt", + "type": "text/plain", + "title": "Legends Europe", + "roles": [ + "metadata" + ] + }, + "U2018_CLC2012_V2020_20u1_xml": { + "href": "X:/EO/u2018_clc2012_v2020_20u1_raster100m/Metadata/U2018_CLC2012_V2020_20u1.xml", + "type": "application/xml", + "title": "Single Band Land Classification Metadata Europe", + "roles": [ + "metadata" + ] + } + }, + "bbox": [ + -56.50514190170437, + 24.28417701147754, + 72.90613675900903, + 72.63376966542347 + ], + "stac_extensions": [ + "https://stac-extensions.github.io/projection/v1.1.0/schema.json" + ] +} \ No newline at end of file From 1117053cbc4a7f2ea92b77ebfb6797e4d3a01f10 Mon Sep 17 00:00:00 2001 From: Sebastian Boeck Date: Tue, 30 Apr 2024 17:24:09 +0200 Subject: [PATCH 28/80] dedicated python file --- scripts/clc/item.py | 228 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 228 insertions(+) create mode 100644 scripts/clc/item.py diff --git a/scripts/clc/item.py b/scripts/clc/item.py new file mode 100644 index 0000000..4fe9e25 --- /dev/null +++ b/scripts/clc/item.py @@ -0,0 +1,228 @@ +import os +import re + +import pystac +import pystac.item +import pystac.link +from pystac.provider import ProviderRole +from pystac.extensions.projection import ProjectionExtension +from pystac.extensions.item_assets import AssetDefinition + +from pyproj import Transformer +from shapely.geometry import GeometryCollection, box, shape, mapping +from datetime import datetime, UTC + +import rasterio as rio + + + +def deconstruct_clc_name(filename: str): + id = os.path.basename(filename).split('.')[0] + p = re.compile(("U(?P[0-9]{4})_" + "(?PCLC|CHA)(?P[0-9]{4})_" + "V(?P[0-9]{4})_(?P[0-9a-z]*)" + "_?(?P[A-Z]*)?" + "_?(?P[A-Z]*)?")) + m = p.search(id) + + return(m.groupdict()) + + +def deconstruct_clc_name(filename: str): + p = re.compile('^(?P[A-Z0-9a-z_]*).(?P.*)$') + m = p.search(os.path.basename(filename)) + + filename_split = m.groupdict() + + p = re.compile(("U(?P[0-9]{4})_" + "(?PCLC|CHA)(?P[0-9]{4})_" + "V(?P[0-9]{4})_(?P[0-9a-z]*)" + "_?(?P[A-Z]*)?" + "_?(?P[A-Z]*)?")) + m = p.search(filename_split['id']) + + if m: + return(m.groupdict() | filename_split) + else: + return(filename_split) + + +def create_asset(filename: str, DOM_code: str): + filename_elements = deconstruct_clc_name(filename) + suffix = filename_elements['suffix'].replace('.', '_') + + DOM_DICT = { + 'GLP': 'Guadeloupe', + 'GUF': 'French Guyana', + 'MTQ': 'Martinique', + 'MYT': 'Mayotte', + 'REU': 'Réunion', + '': 'Europe', + } + + MEDIA_TYPE_DICT = { + 'tif': pystac.MediaType.COG, + 'tif_xml': pystac.MediaType.XML, + 'tif_aux_xml': pystac.MediaType.XML, + 'tif_ovr': 'image/tiff; application=geotiff; profile=pyramid', + 'tif_vat_cpg': pystac.MediaType.TEXT, + 'tif_vat_dbf': 'application/dbf', + 'txt': pystac.MediaType.TEXT, + 'tif_lyr': 'image/tiff; application=geotiff; profile=layer', + 'tfw': pystac.MediaType.TEXT, + 'xml': pystac.MediaType.XML, + } + + label = DOM_DICT[DOM_code] + + TITLE_DICT = { + 'tif': f'Single Band Land Classification {label}', + 'tif_xml': f'TIFF Metadata {label}', + 'tif_aux_xml': f'TIFF Statistics {label}', + 'tif_ovr': f'Pyramid {label}', + 'tif_vat_cpg': f'Encoding {label}', + 'tif_vat_dbf': f'Database {label}', + 'txt': f'Legends {label}', + 'tif_lyr': f'Legend Layer {label}', + 'tfw': f'World File {label}', + 'xml': f'Single Band Land Classification Metadata {label}', + } + + ROLES_DICT = { + 'tif': ['data', 'visual'], + 'tif_xml': ['metadata'], + 'tif_aux_xml': ['metadata'], + 'tif_ovr': ['metadata'], + 'tif_vat_cpg': ['metadata'], + 'tif_vat_dbf': ['metadata'], + 'txt': ['metadata'], + 'tif_lyr': ['metadata'], + 'tfw': ['metadata'], + 'xml': ['metadata'], + } + + asset = pystac.Asset(href=filename, title=TITLE_DICT[suffix], media_type=MEDIA_TYPE_DICT[suffix], roles=ROLES_DICT[suffix]) + return(f"{filename_elements['id']}_{suffix}", asset) + + +def get_img_paths(path: str): + img_paths=[] + for root, dirs, files in os.walk(path): + if root.endswith(('DATA', 'French_DOMs')): + for file in files: + if file.endswith('.tif'): + img_paths.append(os.path.join(root, file)) + + return(img_paths) + + + +def get_asset_files(path, clc_name): + clc_name_elements = deconstruct_clc_name(clc_name) + + asset_files = [] + + for root, dirs, files in os.walk(path): + if not clc_name_elements['DOM_code'] and 'French_DOMs' in root: + continue + + if clc_name_elements['DOM_code'] and ('Legend' in root and not 'French_DOMs' in root): + continue + + for file in files: + if file.startswith(clc_name + '.') or file.endswith((f'{clc_name_elements["DOM_code"]}.tif.lyr', 'QGIS.txt',)): + asset_files.append(os.path.join(root, file)) + + return(asset_files) + +def project_bbox(img, target_epsg=4326): + target_crs = rio.crs.CRS.from_epsg(target_epsg) + bbox_warped = rio.warp.transform_bounds(img.crs, target_crs, *img.bounds) + return(bbox_warped) + + +def create_item(img_path: str): + clc_name_elements = deconstruct_clc_name(img_path) + + asset_files = get_asset_files(root, clc_name=clc_name_elements['id']) + asset_files = [f for f in asset_files if not f.endswith('aux')] + + year = clc_name_elements.get('reference_year') + + CLC_PROVIDER = pystac.provider.Provider( + name='Copernicus Land Monitoring Service', + description=('The Copernicus Land Monitoring Service provides ' + 'geographical information on land cover and its ' + 'changes, land use, ground motions, vegetation state, ' + 'water cycle and Earth\'s surface energy variables to ' + 'a broad range of users in Europe and across the World ' + 'in the field of environmental terrestrial applications.'), + roles=[ProviderRole.LICENSOR, ProviderRole.HOST], + url='https://land.copernicus.eu' + ) + + props = {'description': (f'Corine Land Cover {year} (CLC{year}) is one of the Corine Land Cover (CLC) ' + f'datasets produced within the frame the Copernicus Land Monitoring Service ' + f'referring to land cover / land use status of year {year}. ' + f'CLC service has a long-time heritage (formerly known as \"CORINE Land Cover Programme\"), ' + f'coordinated by the European Environment Agency (EEA). It provides consistent ' + f'and thematically detailed information on land cover and land cover changes across Europe. ' + f'CLC datasets are based on the classification of satellite images produced by the national ' + f'teams of the participating countries - the EEA members and cooperating countries (EEA39). ' + f'National CLC inventories are then further integrated into a seamless land cover map of Europe. ' + f'The resulting European database relies on standard methodology and nomenclature with following ' + f'base parameters: 44 classes in the hierarchical 3-level CLC nomenclature; ' + f'minimum mapping unit (MMU) for status layers is 25 hectares; ' + f'minimum width of linear elements is 100 metres. ' + f'Change layers have higher resolution, i.e. minimum mapping unit (MMU) is 5 hectares ' + f'for Land Cover Changes (LCC), and the minimum width of linear elements is 100 metres. ' + f'The CLC service delivers important data sets supporting the implementation of key priority ' + f'areas of the Environment Action Programmes of the European Union as e.g. protecting ecosystems, ' + f'halting the loss of biological diversity, tracking the impacts of climate change, ' + f'monitoring urban land take, assessing developments in agriculture or dealing with ' + f'water resources directives. CLC belongs to the Pan-European component of the ' + f'Copernicus Land Monitoring Service (https://land.copernicus.eu/), part of the ' + f'European Copernicus Programme coordinated by the European Environment Agency, ' + f'providing environmental information from a combination of air- and space-based observation ' + f'systems and in-situ monitoring. Additional information about CLC product description including ' + f'mapping guides can be found at https://land.copernicus.eu/user-corner/technical-library/. ' + f'CLC class descriptions can be found at ' + f'https://land.copernicus.eu/user-corner/technical-library/corine-land-cover-nomenclature-guidelines/html/.'), + 'created': None, + 'providers': CLC_PROVIDER.to_dict(), + } + + with rio.open(img_path) as img: + + bbox = project_bbox(img) + params = { + 'id': clc_name_elements.get('id'), + 'bbox': bbox, + 'geometry': mapping(box(*bbox)), + 'datetime': None, + 'start_datetime': datetime(int(year), 1, 1, microsecond=0, tzinfo=UTC), + 'end_datetime': datetime(int(year), 12, 31, microsecond=0, tzinfo=UTC), + 'properties': props, + } + + item = pystac.Item(**params) + + for asset_file in asset_files: + # print(asset_file) + key, asset = create_asset(asset_file, DOM_code=clc_name_elements.get('DOM_code')) + item.add_asset( + key=key, + asset=asset, + ) + + proj_ext = ProjectionExtension.ext(item.assets[os.path.basename(img_path).replace('.', '_')], add_if_missing=True) + proj_ext.apply(epsg=rio.crs.CRS(img.crs).to_epsg(), + bbox=img.bounds, + shape=[_ for _ in img.shape], + transform=[_ for _ in img.transform] + [0.0, 0.0, 1.0], + ) + + license = pystac.link.Link(rel='LICENSE', target="https://land.copernicus.eu/en/data-policy") + item.add_link(license) + + return(item) \ No newline at end of file From 91a1f423ab03cc0678c226aba25173e6067e385a Mon Sep 17 00:00:00 2001 From: chorng Date: Tue, 30 Apr 2024 17:40:38 +0200 Subject: [PATCH 29/80] Fix get_geom_wgs84 function * apply rasterio.warp.transform_bounds to replace pyproj.transform fix schema to reflect the new way of reprojection --- schema/products/vpp.json | 48 ++++++++++++++++++++-------------------- scripts/vpp/item.py | 11 +++++---- 2 files changed, 29 insertions(+), 30 deletions(-) diff --git a/schema/products/vpp.json b/schema/products/vpp.json index dbccdbb..e309b6a 100644 --- a/schema/products/vpp.json +++ b/schema/products/vpp.json @@ -37,50 +37,50 @@ "oneOf": [ { "items": [ - { "type": "number", "minimum": -25, "maximum": 45 }, - { "type": "number", "minimum": 26, "maximum": 72 }, - { "type": "number", "minimum": -25, "maximum": 45 }, - { "type": "number", "minimum": 26, "maximum": 72 } + { "type": "number", "minimum": -31.86, "maximum": 45.12 }, + { "type": "number", "minimum": 26.99, "maximum": 72.09 }, + { "type": "number", "minimum": -31.86, "maximum": 45.12 }, + { "type": "number", "minimum": 26.99, "maximum": 72.09 } ] }, { "items": [ - { "type": "number", "minimum": -62.09, "maximum": -60.11 }, - { "type": "number", "minimum": 13.46, "maximum": 15.37 }, - { "type": "number", "minimum": -62.09, "maximum": -60.11 }, - { "type": "number", "minimum": 13.46, "maximum": 15.37 } + { "type": "number", "minimum": -62.09, "maximum": -60.1 }, + { "type": "number", "minimum": 13.45, "maximum": 15.38 }, + { "type": "number", "minimum": -62.09, "maximum": -60.1 }, + { "type": "number", "minimum": 13.45, "maximum": 15.38 } ] }, { "items": [ - { "type": "number", "minimum": 44.06, "maximum": 46.03 }, - { "type": "number", "minimum": -13.67, "maximum": -11.74 }, - { "type": "number", "minimum": 44.06, "maximum": 46.03 }, - { "type": "number", "minimum": -13.67, "maximum": -11.74 } + { "type": "number", "minimum": 44.07, "maximum": 46.02 }, + { "type": "number", "minimum": -13.67, "maximum": -11.75 }, + { "type": "number", "minimum": 44.07, "maximum": 46.02 }, + { "type": "number", "minimum": -13.67, "maximum": -11.75 } ] }, { "items": [ - { "type": "number", "minimum": 55.05, "maximum": 56.15 }, - { "type": "number", "minimum": -21.79, "maximum": -19.88 }, - { "type": "number", "minimum": 55.05, "maximum": 56.15 }, - { "type": "number", "minimum": -21.79, "maximum": -19.88 } + { "type": "number", "minimum": 55.06, "maximum": 56.15 }, + { "type": "number", "minimum": -21.8, "maximum": -19.88 }, + { "type": "number", "minimum": 55.06, "maximum": 56.15 }, + { "type": "number", "minimum": -21.8, "maximum": -19.88 } ] }, { "items": [ - { "type": "number", "minimum": -62.08, "maximum": -60.07 }, - { "type": "number", "minimum": 15.27, "maximum": 17.18 }, - { "type": "number", "minimum": -62.08, "maximum": -60.07 }, - { "type": "number", "minimum": 15.27, "maximum": 17.18 } + { "type": "number", "minimum": -62.08, "maximum": -60.08 }, + { "type": "number", "minimum": 15.26, "maximum": 17.19 }, + { "type": "number", "minimum": -62.08, "maximum": -60.08 }, + { "type": "number", "minimum": 15.26, "maximum": 17.19 } ] }, { "items": [ - { "type": "number", "minimum": -55.22, "maximum": -50.9 }, - { "type": "number", "minimum": 1.7, "maximum": 6.33 }, - { "type": "number", "minimum": -55.22, "maximum": -50.9 }, - { "type": "number", "minimum": 1.7, "maximum": 6.33 } + { "type": "number", "minimum": -55.21, "maximum": -50.9 }, + { "type": "number", "minimum": 1.71, "maximum": 6.34 }, + { "type": "number", "minimum": -55.21, "maximum": -50.9 }, + { "type": "number", "minimum": 1.71, "maximum": 6.34 } ] } ] diff --git a/scripts/vpp/item.py b/scripts/vpp/item.py index 9416e1f..3850e4f 100644 --- a/scripts/vpp/item.py +++ b/scripts/vpp/item.py @@ -12,10 +12,10 @@ from botocore.paginate import PageIterator from jsonschema import Draft7Validator from jsonschema.exceptions import best_match -from pyproj import Transformer from pystac.extensions.projection import ProjectionExtension from rasterio.coords import BoundingBox from rasterio.crs import CRS +from rasterio.warp import transform_bounds from referencing import Registry, Resource from shapely.geometry import Polygon, box, mapping @@ -63,11 +63,10 @@ def read_metadata_from_s3(bucket: str, key: str, aws_session: boto3.Session) -> def get_geom_wgs84(bounds: BoundingBox, crs: CRS) -> Polygon: - transformer = Transformer.from_crs(crs.to_epsg(), 4326) - miny, minx = transformer.transform(bounds.left, bounds.bottom) - maxy, maxx = transformer.transform(bounds.right, bounds.top) - bbox = (minx, miny, maxx, maxy) - return box(*bbox) + bbox = rio.coords.BoundingBox( + *transform_bounds(crs.to_epsg(), 4326, bounds.left, bounds.bottom, bounds.right, bounds.top) + ) + return box(*(bbox.left, bbox.bottom, bbox.right, bbox.top)) def get_description(product_id: str) -> str: From 088f859cbfcdbece8c9a775f025a62a7462212b6 Mon Sep 17 00:00:00 2001 From: Sebastian Boeck Date: Tue, 30 Apr 2024 23:58:20 +0200 Subject: [PATCH 30/80] separating constants and removing f-strings, adding item_assets --- scripts/clc/clms_collection_generator.ipynb | 1300 ++++++++++++++- scripts/clc/clms_item_generator.ipynb | 1599 +++++++++++++++++-- 2 files changed, 2738 insertions(+), 161 deletions(-) diff --git a/scripts/clc/clms_collection_generator.ipynb b/scripts/clc/clms_collection_generator.ipynb index b0ad047..5bac18d 100644 --- a/scripts/clc/clms_collection_generator.ipynb +++ b/scripts/clc/clms_collection_generator.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ @@ -13,25 +13,74 @@ "import pystac.item\n", "import pystac.link\n", "from pystac.provider import ProviderRole\n", - "from pystac.extensions.projection import ProjectionExtension\n" + "from pystac.extensions.projection import ProjectionExtension\n", + "\n", + "from datetime import datetime, UTC" ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "TITLE_DICT = {\n", + " 'tif': 'Single Band Land Classification {label}',\n", + " 'tif_xml': 'TIFF Metadata {label}',\n", + " 'tif_aux_xml': 'TIFF Statistics {label}',\n", + " 'tif_ovr': 'Pyramid {label}',\n", + " 'tif_vat_cpg': 'Encoding {label}',\n", + " 'tif_vat_dbf': 'Database {label}',\n", + " 'txt': 'Legends {label}',\n", + " 'tif_lyr': 'Legend Layer {label}',\n", + " 'tfw': 'World File {label}',\n", + " 'xml': 'Single Band Land Classification Metadata {label}',\n", + "}\n", + "\n", + "MEDIA_TYPE_DICT = {\n", + " 'tif': pystac.MediaType.COG,\n", + " 'tif_xml': pystac.MediaType.XML,\n", + " 'tif_aux_xml': pystac.MediaType.XML,\n", + " 'tif_ovr': 'image/tiff; application=geotiff; profile=pyramid',\n", + " 'tif_vat_cpg': pystac.MediaType.TEXT,\n", + " 'tif_vat_dbf': 'application/dbf',\n", + " 'txt': pystac.MediaType.TEXT,\n", + " 'tif_lyr': 'image/tiff; application=geotiff; profile=layer',\n", + " 'tfw': pystac.MediaType.TEXT,\n", + " 'xml': pystac.MediaType.XML,\n", + "}\n", + "\n", + "ROLES_DICT = {\n", + " 'tif': ['data', 'visual'],\n", + " 'tif_xml': ['metadata'],\n", + " 'tif_aux_xml': ['metadata'],\n", + " 'tif_ovr': ['metadata'],\n", + " 'tif_vat_cpg': ['metadata'],\n", + " 'tif_vat_dbf': ['metadata'],\n", + " 'txt': ['metadata'],\n", + " 'tif_lyr': ['metadata'],\n", + " 'tfw': ['metadata'],\n", + " 'xml': ['metadata'],\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "WORKING_DIR = os.getcwd()\n", "STAC_DIR = 'stac_tests'\n", "COLLECTION_ID = 'corine-land-cover-raster'\n", - "COLLECTION_TITLE = \"CORINE Land Cover Raster\"\n", + "COLLECTION_TITLE = 'CORINE Land Cover Raster'\n", "COLLECTION_DESCRIPTION = (\"The European Commission launched the CORINE (Coordination of Information on the Environment) \"\n", " \"program in an effort to develop a standardized methodology for producing continent-scale land \"\n", " \"cover, biotope, and air quality maps. The CORINE Land Cover (CLC) product offers a pan-European \"\n", " \"land cover and land use inventory with 44 thematic classes, ranging from broad forested areas \"\n", " \"to individual vineyards.\")\n", - "\n", + "COLLECTION_KEYWORDS = [\"clms\", \"corine\", \"derived data\", \"land cover\", \"machine learning\", \"open data\"]\n", + "COLLECTION_LICENSE = 'proprietary'\n", "\n", "\n", "# CLMS_CATALOG_LINK = pystac.link.Link(\n", @@ -49,42 +98,1267 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "\n", "sp_extent = pystac.SpatialExtent([None, None, None, None])\n", - "tmp_extent = pystac.TemporalExtent([\"1990-01-01T00:00:00.000Z\", None])\n", + "tmp_extent = pystac.TemporalExtent([datetime(1990, 1, 1, microsecond=0, tzinfo=UTC), None])\n", "extent = pystac.Extent(sp_extent, tmp_extent)\n", "\n", "collection = pystac.Collection(id=COLLECTION_ID,\n", " description=COLLECTION_DESCRIPTION,\n", " title=COLLECTION_TITLE,\n", " extent=extent,\n", + " keywords=COLLECTION_KEYWORDS,\n", + " license=COLLECTION_LICENSE\n", " )" ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 11, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "
\n", + "
\n", + "
    \n", + " \n", + " \n", + " \n", + "
  • \n", + " type\n", + " \"Collection\"\n", + "
  • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
  • \n", + " id\n", + " \"corine-land-cover-raster\"\n", + "
  • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
  • \n", + " stac_version\n", + " \"1.0.0\"\n", + "
  • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
  • \n", + " description\n", + " \"The European Commission launched the CORINE (Coordination of Information on the Environment) program in an effort to develop a standardized methodology for producing continent-scale land cover, biotope, and air quality maps. The CORINE Land Cover (CLC) product offers a pan-European land cover and land use inventory with 44 thematic classes, ranging from broad forested areas to individual vineyards.\"\n", + "
  • \n", + " \n", + " \n", + " \n", + " \n", + "
  • \n", + " links[] 0 items\n", + " \n", + "
  • \n", + " \n", + " \n", + " \n", + " \n", + "
  • \n", + " title\n", + " \"CORINE Land Cover Raster\"\n", + "
  • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
  • \n", + " extent\n", + "
      \n", + " \n", + " \n", + " \n", + "
    • \n", + " spatial\n", + "
        \n", + " \n", + " \n", + "
      • \n", + " bbox[] 4 items\n", + " \n", + "
          \n", + " \n", + " \n", + " \n", + "
        • \n", + " 0\n", + " None\n", + "
        • \n", + " \n", + " \n", + " \n", + "
        \n", + " \n", + "
          \n", + " \n", + " \n", + " \n", + "
        • \n", + " 1\n", + " None\n", + "
        • \n", + " \n", + " \n", + " \n", + "
        \n", + " \n", + "
          \n", + " \n", + " \n", + " \n", + "
        • \n", + " 2\n", + " None\n", + "
        • \n", + " \n", + " \n", + " \n", + "
        \n", + " \n", + "
          \n", + " \n", + " \n", + " \n", + "
        • \n", + " 3\n", + " None\n", + "
        • \n", + " \n", + " \n", + " \n", + "
        \n", + " \n", + "
      • \n", + " \n", + " \n", + "
      \n", + "
    • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    • \n", + " temporal\n", + "
        \n", + " \n", + " \n", + "
      • \n", + " interval[] 1 items\n", + " \n", + "
          \n", + " \n", + " \n", + "
        • \n", + " 0[] 2 items\n", + " \n", + "
            \n", + " \n", + " \n", + " \n", + "
          • \n", + " 0\n", + " \"1990-01-01T00:00:00Z\"\n", + "
          • \n", + " \n", + " \n", + " \n", + "
          \n", + " \n", + "
            \n", + " \n", + " \n", + " \n", + "
          • \n", + " 1\n", + " None\n", + "
          • \n", + " \n", + " \n", + " \n", + "
          \n", + " \n", + "
        • \n", + " \n", + " \n", + "
        \n", + " \n", + "
      • \n", + " \n", + " \n", + "
      \n", + "
    • \n", + " \n", + " \n", + " \n", + "
    \n", + "
  • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
  • \n", + " license\n", + " \"proprietary\"\n", + "
  • \n", + " \n", + " \n", + " \n", + " \n", + "
  • \n", + " keywords[] 6 items\n", + " \n", + "
      \n", + " \n", + " \n", + " \n", + "
    • \n", + " 0\n", + " \"clms\"\n", + "
    • \n", + " \n", + " \n", + " \n", + "
    \n", + " \n", + "
      \n", + " \n", + " \n", + " \n", + "
    • \n", + " 1\n", + " \"corine\"\n", + "
    • \n", + " \n", + " \n", + " \n", + "
    \n", + " \n", + "
      \n", + " \n", + " \n", + " \n", + "
    • \n", + " 2\n", + " \"derived data\"\n", + "
    • \n", + " \n", + " \n", + " \n", + "
    \n", + " \n", + "
      \n", + " \n", + " \n", + " \n", + "
    • \n", + " 3\n", + " \"land cover\"\n", + "
    • \n", + " \n", + " \n", + " \n", + "
    \n", + " \n", + "
      \n", + " \n", + " \n", + " \n", + "
    • \n", + " 4\n", + " \"machine learning\"\n", + "
    • \n", + " \n", + " \n", + " \n", + "
    \n", + " \n", + "
      \n", + " \n", + " \n", + " \n", + "
    • \n", + " 5\n", + " \"open data\"\n", + "
    • \n", + " \n", + " \n", + " \n", + "
    \n", + " \n", + "
  • \n", + " \n", + " \n", + "
\n", + "
\n", + "
" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "collection" + ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "from pystac.extensions.item_assets import ItemAssetsExtension, AssetDefinition\n", + "\n", + "item_assets = ItemAssetsExtension.ext(collection, add_if_missing=True)\n", + "item_assets.item_assets = {\n", + " key: AssetDefinition({\"title\": TITLE_DICT[key].format(label='').strip(),\n", + " \"media_type\": MEDIA_TYPE_DICT[key], \n", + " \"roles\": ROLES_DICT[key]})\n", + " for key in TITLE_DICT\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "
\n", + "
\n", + "
    \n", + " \n", + " \n", + " \n", + "
  • \n", + " type\n", + " \"Collection\"\n", + "
  • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
  • \n", + " id\n", + " \"corine-land-cover-raster\"\n", + "
  • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
  • \n", + " stac_version\n", + " \"1.0.0\"\n", + "
  • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
  • \n", + " description\n", + " \"The European Commission launched the CORINE (Coordination of Information on the Environment) program in an effort to develop a standardized methodology for producing continent-scale land cover, biotope, and air quality maps. The CORINE Land Cover (CLC) product offers a pan-European land cover and land use inventory with 44 thematic classes, ranging from broad forested areas to individual vineyards.\"\n", + "
  • \n", + " \n", + " \n", + " \n", + " \n", + "
  • \n", + " links[] 0 items\n", + " \n", + "
  • \n", + " \n", + " \n", + " \n", + "
  • \n", + " stac_extensions[] 1 items\n", + " \n", + "
      \n", + " \n", + " \n", + " \n", + "
    • \n", + " 0\n", + " \"https://stac-extensions.github.io/item-assets/v1.0.0/schema.json\"\n", + "
    • \n", + " \n", + " \n", + " \n", + "
    \n", + " \n", + "
  • \n", + " \n", + " \n", + " \n", + " \n", + "
  • \n", + " item_assets\n", + "
      \n", + " \n", + " \n", + " \n", + "
    • \n", + " tif\n", + "
        \n", + " \n", + " \n", + " \n", + "
      • \n", + " title\n", + " \"Single Band Land Classification\"\n", + "
      • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
      • \n", + " media_type\n", + " \"image/tiff; application=geotiff; profile=cloud-optimized\"\n", + "
      • \n", + " \n", + " \n", + " \n", + " \n", + "
      • \n", + " roles[] 2 items\n", + " \n", + "
          \n", + " \n", + " \n", + " \n", + "
        • \n", + " 0\n", + " \"data\"\n", + "
        • \n", + " \n", + " \n", + " \n", + "
        \n", + " \n", + "
          \n", + " \n", + " \n", + " \n", + "
        • \n", + " 1\n", + " \"visual\"\n", + "
        • \n", + " \n", + " \n", + " \n", + "
        \n", + " \n", + "
      • \n", + " \n", + " \n", + "
      \n", + "
    • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    • \n", + " tif_xml\n", + "
        \n", + " \n", + " \n", + " \n", + "
      • \n", + " title\n", + " \"TIFF Metadata\"\n", + "
      • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
      • \n", + " media_type\n", + " \"application/xml\"\n", + "
      • \n", + " \n", + " \n", + " \n", + " \n", + "
      • \n", + " roles[] 1 items\n", + " \n", + "
          \n", + " \n", + " \n", + " \n", + "
        • \n", + " 0\n", + " \"metadata\"\n", + "
        • \n", + " \n", + " \n", + " \n", + "
        \n", + " \n", + "
      • \n", + " \n", + " \n", + "
      \n", + "
    • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    • \n", + " tif_aux_xml\n", + "
        \n", + " \n", + " \n", + " \n", + "
      • \n", + " title\n", + " \"TIFF Statistics\"\n", + "
      • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
      • \n", + " media_type\n", + " \"application/xml\"\n", + "
      • \n", + " \n", + " \n", + " \n", + " \n", + "
      • \n", + " roles[] 1 items\n", + " \n", + "
          \n", + " \n", + " \n", + " \n", + "
        • \n", + " 0\n", + " \"metadata\"\n", + "
        • \n", + " \n", + " \n", + " \n", + "
        \n", + " \n", + "
      • \n", + " \n", + " \n", + "
      \n", + "
    • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    • \n", + " tif_ovr\n", + "
        \n", + " \n", + " \n", + " \n", + "
      • \n", + " title\n", + " \"Pyramid\"\n", + "
      • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
      • \n", + " media_type\n", + " \"image/tiff; application=geotiff; profile=pyramid\"\n", + "
      • \n", + " \n", + " \n", + " \n", + " \n", + "
      • \n", + " roles[] 1 items\n", + " \n", + "
          \n", + " \n", + " \n", + " \n", + "
        • \n", + " 0\n", + " \"metadata\"\n", + "
        • \n", + " \n", + " \n", + " \n", + "
        \n", + " \n", + "
      • \n", + " \n", + " \n", + "
      \n", + "
    • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    • \n", + " tif_vat_cpg\n", + "
        \n", + " \n", + " \n", + " \n", + "
      • \n", + " title\n", + " \"Encoding\"\n", + "
      • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
      • \n", + " media_type\n", + " \"text/plain\"\n", + "
      • \n", + " \n", + " \n", + " \n", + " \n", + "
      • \n", + " roles[] 1 items\n", + " \n", + "
          \n", + " \n", + " \n", + " \n", + "
        • \n", + " 0\n", + " \"metadata\"\n", + "
        • \n", + " \n", + " \n", + " \n", + "
        \n", + " \n", + "
      • \n", + " \n", + " \n", + "
      \n", + "
    • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    • \n", + " tif_vat_dbf\n", + "
        \n", + " \n", + " \n", + " \n", + "
      • \n", + " title\n", + " \"Database\"\n", + "
      • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
      • \n", + " media_type\n", + " \"application/dbf\"\n", + "
      • \n", + " \n", + " \n", + " \n", + " \n", + "
      • \n", + " roles[] 1 items\n", + " \n", + "
          \n", + " \n", + " \n", + " \n", + "
        • \n", + " 0\n", + " \"metadata\"\n", + "
        • \n", + " \n", + " \n", + " \n", + "
        \n", + " \n", + "
      • \n", + " \n", + " \n", + "
      \n", + "
    • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    • \n", + " txt\n", + "
        \n", + " \n", + " \n", + " \n", + "
      • \n", + " title\n", + " \"Legends\"\n", + "
      • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
      • \n", + " media_type\n", + " \"text/plain\"\n", + "
      • \n", + " \n", + " \n", + " \n", + " \n", + "
      • \n", + " roles[] 1 items\n", + " \n", + "
          \n", + " \n", + " \n", + " \n", + "
        • \n", + " 0\n", + " \"metadata\"\n", + "
        • \n", + " \n", + " \n", + " \n", + "
        \n", + " \n", + "
      • \n", + " \n", + " \n", + "
      \n", + "
    • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    • \n", + " tif_lyr\n", + "
        \n", + " \n", + " \n", + " \n", + "
      • \n", + " title\n", + " \"Legend Layer\"\n", + "
      • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
      • \n", + " media_type\n", + " \"image/tiff; application=geotiff; profile=layer\"\n", + "
      • \n", + " \n", + " \n", + " \n", + " \n", + "
      • \n", + " roles[] 1 items\n", + " \n", + "
          \n", + " \n", + " \n", + " \n", + "
        • \n", + " 0\n", + " \"metadata\"\n", + "
        • \n", + " \n", + " \n", + " \n", + "
        \n", + " \n", + "
      • \n", + " \n", + " \n", + "
      \n", + "
    • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    • \n", + " tfw\n", + "
        \n", + " \n", + " \n", + " \n", + "
      • \n", + " title\n", + " \"World File\"\n", + "
      • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
      • \n", + " media_type\n", + " \"text/plain\"\n", + "
      • \n", + " \n", + " \n", + " \n", + " \n", + "
      • \n", + " roles[] 1 items\n", + " \n", + "
          \n", + " \n", + " \n", + " \n", + "
        • \n", + " 0\n", + " \"metadata\"\n", + "
        • \n", + " \n", + " \n", + " \n", + "
        \n", + " \n", + "
      • \n", + " \n", + " \n", + "
      \n", + "
    • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    • \n", + " xml\n", + "
        \n", + " \n", + " \n", + " \n", + "
      • \n", + " title\n", + " \"Single Band Land Classification Metadata\"\n", + "
      • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
      • \n", + " media_type\n", + " \"application/xml\"\n", + "
      • \n", + " \n", + " \n", + " \n", + " \n", + "
      • \n", + " roles[] 1 items\n", + " \n", + "
          \n", + " \n", + " \n", + " \n", + "
        • \n", + " 0\n", + " \"metadata\"\n", + "
        • \n", + " \n", + " \n", + " \n", + "
        \n", + " \n", + "
      • \n", + " \n", + " \n", + "
      \n", + "
    • \n", + " \n", + " \n", + " \n", + "
    \n", + "
  • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
  • \n", + " title\n", + " \"CORINE Land Cover Raster\"\n", + "
  • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
  • \n", + " extent\n", + "
      \n", + " \n", + " \n", + " \n", + "
    • \n", + " spatial\n", + "
        \n", + " \n", + " \n", + "
      • \n", + " bbox[] 4 items\n", + " \n", + "
          \n", + " \n", + " \n", + " \n", + "
        • \n", + " 0\n", + " None\n", + "
        • \n", + " \n", + " \n", + " \n", + "
        \n", + " \n", + "
          \n", + " \n", + " \n", + " \n", + "
        • \n", + " 1\n", + " None\n", + "
        • \n", + " \n", + " \n", + " \n", + "
        \n", + " \n", + "
          \n", + " \n", + " \n", + " \n", + "
        • \n", + " 2\n", + " None\n", + "
        • \n", + " \n", + " \n", + " \n", + "
        \n", + " \n", + "
          \n", + " \n", + " \n", + " \n", + "
        • \n", + " 3\n", + " None\n", + "
        • \n", + " \n", + " \n", + " \n", + "
        \n", + " \n", + "
      • \n", + " \n", + " \n", + "
      \n", + "
    • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    • \n", + " temporal\n", + "
        \n", + " \n", + " \n", + "
      • \n", + " interval[] 1 items\n", + " \n", + "
          \n", + " \n", + " \n", + "
        • \n", + " 0[] 2 items\n", + " \n", + "
            \n", + " \n", + " \n", + " \n", + "
          • \n", + " 0\n", + " \"1990-01-01T00:00:00Z\"\n", + "
          • \n", + " \n", + " \n", + " \n", + "
          \n", + " \n", + "
            \n", + " \n", + " \n", + " \n", + "
          • \n", + " 1\n", + " None\n", + "
          • \n", + " \n", + " \n", + " \n", + "
          \n", + " \n", + "
        • \n", + " \n", + " \n", + "
        \n", + " \n", + "
      • \n", + " \n", + " \n", + "
      \n", + "
    • \n", + " \n", + " \n", + " \n", + "
    \n", + "
  • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
  • \n", + " license\n", + " \"proprietary\"\n", + "
  • \n", + " \n", + " \n", + " \n", + " \n", + "
  • \n", + " keywords[] 6 items\n", + " \n", + "
      \n", + " \n", + " \n", + " \n", + "
    • \n", + " 0\n", + " \"clms\"\n", + "
    • \n", + " \n", + " \n", + " \n", + "
    \n", + " \n", + "
      \n", + " \n", + " \n", + " \n", + "
    • \n", + " 1\n", + " \"corine\"\n", + "
    • \n", + " \n", + " \n", + " \n", + "
    \n", + " \n", + "
      \n", + " \n", + " \n", + " \n", + "
    • \n", + " 2\n", + " \"derived data\"\n", + "
    • \n", + " \n", + " \n", + " \n", + "
    \n", + " \n", + "
      \n", + " \n", + " \n", + " \n", + "
    • \n", + " 3\n", + " \"land cover\"\n", + "
    • \n", + " \n", + " \n", + " \n", + "
    \n", + " \n", + "
      \n", + " \n", + " \n", + " \n", + "
    • \n", + " 4\n", + " \"machine learning\"\n", + "
    • \n", + " \n", + " \n", + " \n", + "
    \n", + " \n", + "
      \n", + " \n", + " \n", + " \n", + "
    • \n", + " 5\n", + " \"open data\"\n", + "
    • \n", + " \n", + " \n", + " \n", + "
    \n", + " \n", + "
  • \n", + " \n", + " \n", + "
\n", + "
\n", + "
" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "collection" + ] } ], "metadata": { "kernelspec": { "display_name": "stacdev", "language": "python", - "name": "stacdev" + "name": "python3" }, "language_info": { "codemirror_mode": { diff --git a/scripts/clc/clms_item_generator.ipynb b/scripts/clc/clms_item_generator.ipynb index b7ddd67..0ea90e1 100644 --- a/scripts/clc/clms_item_generator.ipynb +++ b/scripts/clc/clms_item_generator.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 17, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -40,10 +40,112 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 43, "metadata": {}, "outputs": [], "source": [ + "# This goes into constants.py ...\n", + "\n", + "STAC_DIR = 'stac_tests'\n", + "COLLECTION_ID = 'corine-land-cover-raster'\n", + "\n", + "DOM_DICT = {\n", + " 'GLP': 'Guadeloupe',\n", + " 'GUF': 'French Guyana',\n", + " 'MTQ': 'Martinique',\n", + " 'MYT': 'Mayotte',\n", + " 'REU': 'Réunion',\n", + " '': 'Europe',\n", + "}\n", + "\n", + "MEDIA_TYPE_DICT = {\n", + " 'tif': pystac.MediaType.COG,\n", + " 'tif_xml': pystac.MediaType.XML,\n", + " 'tif_aux_xml': pystac.MediaType.XML,\n", + " 'tif_ovr': 'image/tiff; application=geotiff; profile=pyramid',\n", + " 'tif_vat_cpg': pystac.MediaType.TEXT,\n", + " 'tif_vat_dbf': 'application/dbf',\n", + " 'txt': pystac.MediaType.TEXT,\n", + " 'tif_lyr': 'image/tiff; application=geotiff; profile=layer',\n", + " 'tfw': pystac.MediaType.TEXT,\n", + " 'xml': pystac.MediaType.XML,\n", + "}\n", + "\n", + "ROLES_DICT = {\n", + " 'tif': ['data', 'visual'],\n", + " 'tif_xml': ['metadata'],\n", + " 'tif_aux_xml': ['metadata'],\n", + " 'tif_ovr': ['metadata'],\n", + " 'tif_vat_cpg': ['metadata'],\n", + " 'tif_vat_dbf': ['metadata'],\n", + " 'txt': ['metadata'],\n", + " 'tif_lyr': ['metadata'],\n", + " 'tfw': ['metadata'],\n", + " 'xml': ['metadata'],\n", + "}\n", + "\n", + "TITLE_DICT = {\n", + " 'tif': 'Single Band Land Classification {label}',\n", + " 'tif_xml': 'TIFF Metadata {label}',\n", + " 'tif_aux_xml': 'TIFF Statistics {label}',\n", + " 'tif_ovr': 'Pyramid {label}',\n", + " 'tif_vat_cpg': 'Encoding {label}',\n", + " 'tif_vat_dbf': 'Database {label}',\n", + " 'txt': 'Legends {label}',\n", + " 'tif_lyr': 'Legend Layer {label}',\n", + " 'tfw': 'World File {label}',\n", + " 'xml': 'Single Band Land Classification Metadata {label}',\n", + "}\n", + "\n", + "CLC_PROVIDER = pystac.provider.Provider(\n", + " name='Copernicus Land Monitoring Service',\n", + " description=('The Copernicus Land Monitoring Service provides '\n", + " 'geographical information on land cover and its '\n", + " 'changes, land use, ground motions, vegetation state, '\n", + " 'water cycle and Earth\\'s surface energy variables to '\n", + " 'a broad range of users in Europe and across the World '\n", + " 'in the field of environmental terrestrial applications.'),\n", + " roles=[ProviderRole.LICENSOR, ProviderRole.HOST],\n", + " url='https://land.copernicus.eu'\n", + ")\n", + "\n", + "\n", + "ITEM_DESCRIPTION = ('Corine Land Cover {year} (CLC{year}) is one of the Corine Land Cover (CLC) ' \n", + " 'datasets produced within the frame the Copernicus Land Monitoring Service '\n", + " 'referring to land cover / land use status of year {year}. '\n", + " 'CLC service has a long-time heritage (formerly known as \\\"CORINE Land Cover Programme\\\"), '\n", + " 'coordinated by the European Environment Agency (EEA). It provides consistent '\n", + " 'and thematically detailed information on land cover and land cover changes across Europe. '\n", + " 'CLC datasets are based on the classification of satellite images produced by the national '\n", + " 'teams of the participating countries - the EEA members and cooperating countries (EEA39). '\n", + " 'National CLC inventories are then further integrated into a seamless land cover map of Europe. '\n", + " 'The resulting European database relies on standard methodology and nomenclature with following '\n", + " 'base parameters: 44 classes in the hierarchical 3-level CLC nomenclature; '\n", + " 'minimum mapping unit (MMU) for status layers is 25 hectares; '\n", + " 'minimum width of linear elements is 100 metres. '\n", + " 'Change layers have higher resolution, i.e. minimum mapping unit (MMU) is 5 hectares '\n", + " 'for Land Cover Changes (LCC), and the minimum width of linear elements is 100 metres. '\n", + " 'The CLC service delivers important data sets supporting the implementation of key priority '\n", + " 'areas of the Environment Action Programmes of the European Union as e.g. protecting ecosystems, '\n", + " 'halting the loss of biological diversity, tracking the impacts of climate change, '\n", + " 'monitoring urban land take, assessing developments in agriculture or dealing with '\n", + " 'water resources directives. CLC belongs to the Pan-European component of the '\n", + " 'Copernicus Land Monitoring Service (https://land.copernicus.eu/), part of the '\n", + " 'European Copernicus Programme coordinated by the European Environment Agency, '\n", + " 'providing environmental information from a combination of air- and space-based observation '\n", + " 'systems and in-situ monitoring. Additional information about CLC product description including '\n", + " 'mapping guides can be found at https://land.copernicus.eu/user-corner/technical-library/. '\n", + " 'CLC class descriptions can be found at '\n", + " 'https://land.copernicus.eu/user-corner/technical-library/corine-land-cover-nomenclature-guidelines/html/.')\n" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [], + "source": [ + "'''\n", "def deconstruct_clc_name(filename: str):\n", " id = os.path.basename(filename).split('.')[0]\n", " p = re.compile((\"U(?P[0-9]{4})_\"\n", @@ -54,7 +156,7 @@ " m = p.search(id)\n", "\n", " return(m.groupdict())\n", - "\n", + "'''\n", "\n", "def deconstruct_clc_name(filename: str):\n", " p = re.compile('^(?P[A-Z0-9a-z_]*).(?P.*)$')\n", @@ -79,57 +181,9 @@ " filename_elements = deconstruct_clc_name(filename)\n", " suffix = filename_elements['suffix'].replace('.', '_')\n", " \n", - " DOM_DICT = {\n", - " 'GLP': 'Guadeloupe',\n", - " 'GUF': 'French Guyana',\n", - " 'MTQ': 'Martinique',\n", - " 'MYT': 'Mayotte',\n", - " 'REU': 'Réunion',\n", - " '': 'Europe',\n", - " }\n", - "\n", - " MEDIA_TYPE_DICT = {\n", - " 'tif': pystac.MediaType.COG,\n", - " 'tif_xml': pystac.MediaType.XML,\n", - " 'tif_aux_xml': pystac.MediaType.XML,\n", - " 'tif_ovr': 'image/tiff; application=geotiff; profile=pyramid',\n", - " 'tif_vat_cpg': pystac.MediaType.TEXT,\n", - " 'tif_vat_dbf': 'application/dbf',\n", - " 'txt': pystac.MediaType.TEXT,\n", - " 'tif_lyr': 'image/tiff; application=geotiff; profile=layer',\n", - " 'tfw': pystac.MediaType.TEXT,\n", - " 'xml': pystac.MediaType.XML,\n", - " }\n", - " \n", " label = DOM_DICT[DOM_code]\n", " \n", - " TITLE_DICT = {\n", - " 'tif': f'Single Band Land Classification {label}',\n", - " 'tif_xml': f'TIFF Metadata {label}',\n", - " 'tif_aux_xml': f'TIFF Statistics {label}',\n", - " 'tif_ovr': f'Pyramid {label}',\n", - " 'tif_vat_cpg': f'Encoding {label}',\n", - " 'tif_vat_dbf': f'Database {label}',\n", - " 'txt': f'Legends {label}',\n", - " 'tif_lyr': f'Legend Layer {label}',\n", - " 'tfw': f'World File {label}',\n", - " 'xml': f'Single Band Land Classification Metadata {label}',\n", - " }\n", - "\n", - " ROLES_DICT = {\n", - " 'tif': ['data', 'visual'],\n", - " 'tif_xml': ['metadata'],\n", - " 'tif_aux_xml': ['metadata'],\n", - " 'tif_ovr': ['metadata'],\n", - " 'tif_vat_cpg': ['metadata'],\n", - " 'tif_vat_dbf': ['metadata'],\n", - " 'txt': ['metadata'],\n", - " 'tif_lyr': ['metadata'],\n", - " 'tfw': ['metadata'],\n", - " 'xml': ['metadata'],\n", - " }\n", - "\n", - " asset = pystac.Asset(href=filename, title=TITLE_DICT[suffix], media_type=MEDIA_TYPE_DICT[suffix], roles=ROLES_DICT[suffix])\n", + " asset = pystac.Asset(href=filename, title=TITLE_DICT[suffix].format(label=label), media_type=MEDIA_TYPE_DICT[suffix], roles=ROLES_DICT[suffix])\n", " return(f\"{filename_elements['id']}_{suffix}\", asset)\n", "\n", "def get_img_paths(path: str): \n", @@ -161,7 +215,9 @@ " continue\n", " \n", " for file in files:\n", - " if file.startswith(clc_name + '.') or file.endswith((f'{clc_name_elements[\"DOM_code\"]}.tif.lyr', 'QGIS.txt',)):\n", + " if (file.startswith(clc_name + '.') or \n", + " file.endswith((f'{clc_name_elements[\"DOM_code\"]}.tif.lyr', 'QGIS.txt',)) and \n", + " clc_name in file):\n", " asset_files.append(os.path.join(root, file))\n", "\n", " return(asset_files)\n", @@ -171,55 +227,14 @@ " bbox_warped = rio.warp.transform_bounds(img.crs, target_crs, *img.bounds)\n", " return(bbox_warped)\n", "\n", - "def create_item(img_path):\n", + "def create_item(img_path, root):\n", "\n", " clc_name_elements = deconstruct_clc_name(img_path)\n", "\n", " asset_files = get_asset_files(root, clc_name=clc_name_elements['id'])\n", " asset_files = [f for f in asset_files if not f.endswith('aux')]\n", - " asset_files\n", - "\n", " year = clc_name_elements.get('reference_year')\n", - "\n", - " CLC_PROVIDER = pystac.provider.Provider(\n", - " name='Copernicus Land Monitoring Service',\n", - " description=('The Copernicus Land Monitoring Service provides '\n", - " 'geographical information on land cover and its '\n", - " 'changes, land use, ground motions, vegetation state, '\n", - " 'water cycle and Earth\\'s surface energy variables to '\n", - " 'a broad range of users in Europe and across the World '\n", - " 'in the field of environmental terrestrial applications.'),\n", - " roles=[ProviderRole.LICENSOR, ProviderRole.HOST],\n", - " url='https://land.copernicus.eu'\n", - " )\n", - "\n", - " props = {'description': (f'Corine Land Cover {year} (CLC{year}) is one of the Corine Land Cover (CLC) ' \n", - " f'datasets produced within the frame the Copernicus Land Monitoring Service '\n", - " f'referring to land cover / land use status of year {year}. '\n", - " f'CLC service has a long-time heritage (formerly known as \\\"CORINE Land Cover Programme\\\"), '\n", - " f'coordinated by the European Environment Agency (EEA). It provides consistent '\n", - " f'and thematically detailed information on land cover and land cover changes across Europe. '\n", - " f'CLC datasets are based on the classification of satellite images produced by the national '\n", - " f'teams of the participating countries - the EEA members and cooperating countries (EEA39). '\n", - " f'National CLC inventories are then further integrated into a seamless land cover map of Europe. '\n", - " f'The resulting European database relies on standard methodology and nomenclature with following '\n", - " f'base parameters: 44 classes in the hierarchical 3-level CLC nomenclature; '\n", - " f'minimum mapping unit (MMU) for status layers is 25 hectares; '\n", - " f'minimum width of linear elements is 100 metres. '\n", - " f'Change layers have higher resolution, i.e. minimum mapping unit (MMU) is 5 hectares '\n", - " f'for Land Cover Changes (LCC), and the minimum width of linear elements is 100 metres. '\n", - " f'The CLC service delivers important data sets supporting the implementation of key priority '\n", - " f'areas of the Environment Action Programmes of the European Union as e.g. protecting ecosystems, '\n", - " f'halting the loss of biological diversity, tracking the impacts of climate change, '\n", - " f'monitoring urban land take, assessing developments in agriculture or dealing with '\n", - " f'water resources directives. CLC belongs to the Pan-European component of the '\n", - " f'Copernicus Land Monitoring Service (https://land.copernicus.eu/), part of the '\n", - " f'European Copernicus Programme coordinated by the European Environment Agency, '\n", - " f'providing environmental information from a combination of air- and space-based observation '\n", - " f'systems and in-situ monitoring. Additional information about CLC product description including '\n", - " f'mapping guides can be found at https://land.copernicus.eu/user-corner/technical-library/. '\n", - " f'CLC class descriptions can be found at '\n", - " f'https://land.copernicus.eu/user-corner/technical-library/corine-land-cover-nomenclature-guidelines/html/.'),\n", + " props = {'description': ITEM_DESCRIPTION.format(year=year),\n", " 'created': None,\n", " 'providers': CLC_PROVIDER.to_dict(),\n", " }\n", @@ -238,7 +253,7 @@ " }\n", "\n", " item = pystac.Item(**params)\n", - "\n", + " \n", " for asset_file in asset_files:\n", " # print(asset_file)\n", " key, asset = create_asset(asset_file, DOM_code=clc_name_elements.get('DOM_code'))\n", @@ -262,89 +277,1379 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 45, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "['X:/EO/u2018_clc2012_v2020_20u1_raster100m\\\\DATA\\\\U2018_CLC2012_V2020_20u1.tif',\n", - " 'X:/EO/u2018_clc2012_v2020_20u1_raster100m\\\\DATA\\\\French_DOMs\\\\U2018_CLC2012_V2020_20u1_FR_GLP.tif',\n", - " 'X:/EO/u2018_clc2012_v2020_20u1_raster100m\\\\DATA\\\\French_DOMs\\\\U2018_CLC2012_V2020_20u1_FR_GUF.tif',\n", - " 'X:/EO/u2018_clc2012_v2020_20u1_raster100m\\\\DATA\\\\French_DOMs\\\\U2018_CLC2012_V2020_20u1_FR_MTQ.tif',\n", - " 'X:/EO/u2018_clc2012_v2020_20u1_raster100m\\\\DATA\\\\French_DOMs\\\\U2018_CLC2012_V2020_20u1_FR_MYT.tif',\n", - " 'X:/EO/u2018_clc2012_v2020_20u1_raster100m\\\\DATA\\\\French_DOMs\\\\U2018_CLC2012_V2020_20u1_FR_REU.tif']" + "'x:\\\\projects\\\\ETC-DI\\\\Task_18\\\\clms-stac\\\\scripts\\\\clc'" ] }, - "execution_count": 24, + "execution_count": 45, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "root = 'X:/EO/u2018_clc2012_v2020_20u1_raster100m'\n", - "\n", - "img_paths = get_img_paths(path=root)\n", - "\n", - "# for img_path in img_paths:\n", - "\n", - "img_path = img_paths[0]\n", - "img_paths" + "os.getcwd()" ] }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 48, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "{'update_campaign': '2018', 'theme': 'CLC', 'reference_year': '2012', 'release_year': '2020', 'release_number': '20u1', 'country_code': '', 'DOM_code': '', 'id': 'U2018_CLC2012_V2020_20u1', 'suffix': 'tif'}\n" + "['../../../CLC_100m\\\\u2018_clc2012_v2020_20u1_raster100m\\\\DATA\\\\U2018_CLC2012_V2020_20u1.tif', '../../../CLC_100m\\\\u2018_clc2012_v2020_20u1_raster100m\\\\DATA\\\\French_DOMs\\\\U2018_CLC2012_V2020_20u1_FR_GLP.tif', '../../../CLC_100m\\\\u2018_clc2012_v2020_20u1_raster100m\\\\DATA\\\\French_DOMs\\\\U2018_CLC2012_V2020_20u1_FR_GUF.tif', '../../../CLC_100m\\\\u2018_clc2012_v2020_20u1_raster100m\\\\DATA\\\\French_DOMs\\\\U2018_CLC2012_V2020_20u1_FR_MTQ.tif', '../../../CLC_100m\\\\u2018_clc2012_v2020_20u1_raster100m\\\\DATA\\\\French_DOMs\\\\U2018_CLC2012_V2020_20u1_FR_MYT.tif', '../../../CLC_100m\\\\u2018_clc2012_v2020_20u1_raster100m\\\\DATA\\\\French_DOMs\\\\U2018_CLC2012_V2020_20u1_FR_REU.tif', '../../../CLC_100m\\\\u2018_clc2018_v2020_20u1_raster100m\\\\DATA\\\\U2018_CLC2018_V2020_20u1.tif', '../../../CLC_100m\\\\u2018_clc2018_v2020_20u1_raster100m\\\\DATA\\\\French_DOMs\\\\U2018_CLC2018_V2020_20u1_FR_GLP.tif', '../../../CLC_100m\\\\u2018_clc2018_v2020_20u1_raster100m\\\\DATA\\\\French_DOMs\\\\U2018_CLC2018_V2020_20u1_FR_GUF.tif', '../../../CLC_100m\\\\u2018_clc2018_v2020_20u1_raster100m\\\\DATA\\\\French_DOMs\\\\U2018_CLC2018_V2020_20u1_FR_MTQ.tif', '../../../CLC_100m\\\\u2018_clc2018_v2020_20u1_raster100m\\\\DATA\\\\French_DOMs\\\\U2018_CLC2018_V2020_20u1_FR_MYT.tif', '../../../CLC_100m\\\\u2018_clc2018_v2020_20u1_raster100m\\\\DATA\\\\French_DOMs\\\\U2018_CLC2018_V2020_20u1_FR_REU.tif']\n" ] + }, + { + "data": { + "text/plain": [ + "['../../../CLC_100m\\\\u2018_clc2012_v2020_20u1_raster100m\\\\DATA\\\\French_DOMs\\\\U2018_CLC2012_V2020_20u1_FR_MTQ.tfw',\n", + " '../../../CLC_100m\\\\u2018_clc2012_v2020_20u1_raster100m\\\\DATA\\\\French_DOMs\\\\U2018_CLC2012_V2020_20u1_FR_MTQ.tif',\n", + " '../../../CLC_100m\\\\u2018_clc2012_v2020_20u1_raster100m\\\\DATA\\\\French_DOMs\\\\U2018_CLC2012_V2020_20u1_FR_MTQ.tif.aux.xml',\n", + " '../../../CLC_100m\\\\u2018_clc2012_v2020_20u1_raster100m\\\\DATA\\\\French_DOMs\\\\U2018_CLC2012_V2020_20u1_FR_MTQ.tif.ovr',\n", + " '../../../CLC_100m\\\\u2018_clc2012_v2020_20u1_raster100m\\\\DATA\\\\French_DOMs\\\\U2018_CLC2012_V2020_20u1_FR_MTQ.tif.vat.cpg',\n", + " '../../../CLC_100m\\\\u2018_clc2012_v2020_20u1_raster100m\\\\DATA\\\\French_DOMs\\\\U2018_CLC2012_V2020_20u1_FR_MTQ.tif.vat.dbf',\n", + " '../../../CLC_100m\\\\u2018_clc2012_v2020_20u1_raster100m\\\\DATA\\\\French_DOMs\\\\U2018_CLC2012_V2020_20u1_FR_MTQ.tif.xml',\n", + " '../../../CLC_100m\\\\u2018_clc2012_v2020_20u1_raster100m\\\\Metadata\\\\U2018_CLC2012_V2020_20u1_FR_MTQ.xml']" + ] + }, + "execution_count": 48, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ - "print(clc_name_elements)" + "root = '../../../CLC_100m'\n", + "\n", + "img_paths = get_img_paths(path=root)\n", + "print(img_paths)\n", + "# for img_path in img_paths:\n", + "\n", + "img_path = img_paths[3]\n", + "\n", + "get_asset_files(root, deconstruct_clc_name(img_path)['id'])" ] }, { "cell_type": "code", - "execution_count": 33, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 32, + "execution_count": 50, "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "X:/EO/u2018_clc2012_v2020_20u1_raster100m\\DATA\\U2018_CLC2012_V2020_20u1.tfw\n", - "X:/EO/u2018_clc2012_v2020_20u1_raster100m\\DATA\\U2018_CLC2012_V2020_20u1.tif\n", - "X:/EO/u2018_clc2012_v2020_20u1_raster100m\\DATA\\U2018_CLC2012_V2020_20u1.tif.aux.xml\n", - "X:/EO/u2018_clc2012_v2020_20u1_raster100m\\DATA\\U2018_CLC2012_V2020_20u1.tif.ovr\n", - "X:/EO/u2018_clc2012_v2020_20u1_raster100m\\DATA\\U2018_CLC2012_V2020_20u1.tif.vat.cpg\n", - "X:/EO/u2018_clc2012_v2020_20u1_raster100m\\DATA\\U2018_CLC2012_V2020_20u1.tif.vat.dbf\n", - "X:/EO/u2018_clc2012_v2020_20u1_raster100m\\DATA\\U2018_CLC2012_V2020_20u1.tif.xml\n", - "X:/EO/u2018_clc2012_v2020_20u1_raster100m\\Legend\\CLC2018_CLC2012_V2018_20.tif.lyr\n", - "X:/EO/u2018_clc2012_v2020_20u1_raster100m\\Legend\\CLC2018_CLC2012_V2018_20_QGIS.txt\n", - "X:/EO/u2018_clc2012_v2020_20u1_raster100m\\Metadata\\U2018_CLC2012_V2020_20u1.xml\n" - ] + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "
\n", + "
\n", + "
    \n", + " \n", + " \n", + " \n", + "
  • \n", + " type\n", + " \"Feature\"\n", + "
  • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
  • \n", + " stac_version\n", + " \"1.0.0\"\n", + "
  • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
  • \n", + " id\n", + " \"U2018_CLC2012_V2020_20u1_FR_MTQ\"\n", + "
  • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
  • \n", + " properties\n", + "
      \n", + " \n", + " \n", + " \n", + "
    • \n", + " description\n", + " \"Corine Land Cover 2012 (CLC2012) is one of the Corine Land Cover (CLC) datasets produced within the frame the Copernicus Land Monitoring Service referring to land cover / land use status of year 2012. CLC service has a long-time heritage (formerly known as \"CORINE Land Cover Programme\"), coordinated by the European Environment Agency (EEA). It provides consistent and thematically detailed information on land cover and land cover changes across Europe. CLC datasets are based on the classification of satellite images produced by the national teams of the participating countries - the EEA members and cooperating countries (EEA39). National CLC inventories are then further integrated into a seamless land cover map of Europe. The resulting European database relies on standard methodology and nomenclature with following base parameters: 44 classes in the hierarchical 3-level CLC nomenclature; minimum mapping unit (MMU) for status layers is 25 hectares; minimum width of linear elements is 100 metres. Change layers have higher resolution, i.e. minimum mapping unit (MMU) is 5 hectares for Land Cover Changes (LCC), and the minimum width of linear elements is 100 metres. The CLC service delivers important data sets supporting the implementation of key priority areas of the Environment Action Programmes of the European Union as e.g. protecting ecosystems, halting the loss of biological diversity, tracking the impacts of climate change, monitoring urban land take, assessing developments in agriculture or dealing with water resources directives. CLC belongs to the Pan-European component of the Copernicus Land Monitoring Service (https://land.copernicus.eu/), part of the European Copernicus Programme coordinated by the European Environment Agency, providing environmental information from a combination of air- and space-based observation systems and in-situ monitoring. Additional information about CLC product description including mapping guides can be found at https://land.copernicus.eu/user-corner/technical-library/. CLC class descriptions can be found at https://land.copernicus.eu/user-corner/technical-library/corine-land-cover-nomenclature-guidelines/html/.\"\n", + "
    • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    • \n", + " created\n", + " None\n", + "
    • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    • \n", + " providers\n", + "
        \n", + " \n", + " \n", + " \n", + "
      • \n", + " name\n", + " \"Copernicus Land Monitoring Service\"\n", + "
      • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
      • \n", + " description\n", + " \"The Copernicus Land Monitoring Service provides geographical information on land cover and its changes, land use, ground motions, vegetation state, water cycle and Earth's surface energy variables to a broad range of users in Europe and across the World in the field of environmental terrestrial applications.\"\n", + "
      • \n", + " \n", + " \n", + " \n", + " \n", + "
      • \n", + " roles[] 2 items\n", + " \n", + "
          \n", + " \n", + " \n", + " \n", + "
        • \n", + " 0\n", + " \"licensor\"\n", + "
        • \n", + " \n", + " \n", + " \n", + "
        \n", + " \n", + "
          \n", + " \n", + " \n", + " \n", + "
        • \n", + " 1\n", + " \"host\"\n", + "
        • \n", + " \n", + " \n", + " \n", + "
        \n", + " \n", + "
      • \n", + " \n", + " \n", + " \n", + " \n", + "
      • \n", + " url\n", + " \"https://land.copernicus.eu\"\n", + "
      • \n", + " \n", + " \n", + " \n", + "
      \n", + "
    • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    • \n", + " start_datetime\n", + " \"2012-01-01T00:00:00Z\"\n", + "
    • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    • \n", + " end_datetime\n", + " \"2012-12-31T00:00:00Z\"\n", + "
    • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    • \n", + " datetime\n", + " None\n", + "
    • \n", + " \n", + " \n", + " \n", + "
    \n", + "
  • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
  • \n", + " geometry\n", + "
      \n", + " \n", + " \n", + " \n", + "
    • \n", + " type\n", + " \"Polygon\"\n", + "
    • \n", + " \n", + " \n", + " \n", + " \n", + "
    • \n", + " coordinates[] 1 items\n", + " \n", + "
        \n", + " \n", + " \n", + "
      • \n", + " 0[] 5 items\n", + " \n", + "
          \n", + " \n", + " \n", + "
        • \n", + " 0[] 2 items\n", + " \n", + "
            \n", + " \n", + " \n", + " \n", + "
          • \n", + " 0\n", + " -60.711279992199906\n", + "
          • \n", + " \n", + " \n", + " \n", + "
          \n", + " \n", + "
            \n", + " \n", + " \n", + " \n", + "
          • \n", + " 1\n", + " 14.296077071286975\n", + "
          • \n", + " \n", + " \n", + " \n", + "
          \n", + " \n", + "
        • \n", + " \n", + " \n", + "
        \n", + " \n", + "
          \n", + " \n", + " \n", + "
        • \n", + " 1[] 2 items\n", + " \n", + "
            \n", + " \n", + " \n", + " \n", + "
          • \n", + " 0\n", + " -60.711279992199906\n", + "
          • \n", + " \n", + " \n", + " \n", + "
          \n", + " \n", + "
            \n", + " \n", + " \n", + " \n", + "
          • \n", + " 1\n", + " 14.970827479517444\n", + "
          • \n", + " \n", + " \n", + " \n", + "
          \n", + " \n", + "
        • \n", + " \n", + " \n", + "
        \n", + " \n", + "
          \n", + " \n", + " \n", + "
        • \n", + " 2[] 2 items\n", + " \n", + "
            \n", + " \n", + " \n", + " \n", + "
          • \n", + " 0\n", + " -61.32702312276209\n", + "
          • \n", + " \n", + " \n", + " \n", + "
          \n", + " \n", + "
            \n", + " \n", + " \n", + " \n", + "
          • \n", + " 1\n", + " 14.970827479517444\n", + "
          • \n", + " \n", + " \n", + " \n", + "
          \n", + " \n", + "
        • \n", + " \n", + " \n", + "
        \n", + " \n", + "
          \n", + " \n", + " \n", + "
        • \n", + " 3[] 2 items\n", + " \n", + "
            \n", + " \n", + " \n", + " \n", + "
          • \n", + " 0\n", + " -61.32702312276209\n", + "
          • \n", + " \n", + " \n", + " \n", + "
          \n", + " \n", + "
            \n", + " \n", + " \n", + " \n", + "
          • \n", + " 1\n", + " 14.296077071286975\n", + "
          • \n", + " \n", + " \n", + " \n", + "
          \n", + " \n", + "
        • \n", + " \n", + " \n", + "
        \n", + " \n", + "
          \n", + " \n", + " \n", + "
        • \n", + " 4[] 2 items\n", + " \n", + "
            \n", + " \n", + " \n", + " \n", + "
          • \n", + " 0\n", + " -60.711279992199906\n", + "
          • \n", + " \n", + " \n", + " \n", + "
          \n", + " \n", + "
            \n", + " \n", + " \n", + " \n", + "
          • \n", + " 1\n", + " 14.296077071286975\n", + "
          • \n", + " \n", + " \n", + " \n", + "
          \n", + " \n", + "
        • \n", + " \n", + " \n", + "
        \n", + " \n", + "
      • \n", + " \n", + " \n", + "
      \n", + " \n", + "
    • \n", + " \n", + " \n", + "
    \n", + "
  • \n", + " \n", + " \n", + " \n", + " \n", + "
  • \n", + " links[] 1 items\n", + " \n", + "
      \n", + " \n", + " \n", + " \n", + "
    • \n", + " 0\n", + "
        \n", + " \n", + " \n", + " \n", + "
      • \n", + " rel\n", + " \"LICENSE\"\n", + "
      • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
      • \n", + " href\n", + " \"https://land.copernicus.eu/en/data-policy\"\n", + "
      • \n", + " \n", + " \n", + " \n", + "
      \n", + "
    • \n", + " \n", + " \n", + " \n", + "
    \n", + " \n", + "
  • \n", + " \n", + " \n", + " \n", + " \n", + "
  • \n", + " assets\n", + "
      \n", + " \n", + " \n", + " \n", + "
    • \n", + " U2018_CLC2012_V2020_20u1_FR_MTQ_tfw\n", + "
        \n", + " \n", + " \n", + " \n", + "
      • \n", + " href\n", + " \"../../../CLC_100m/u2018_clc2012_v2020_20u1_raster100m/DATA/French_DOMs/U2018_CLC2012_V2020_20u1_FR_MTQ.tfw\"\n", + "
      • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
      • \n", + " type\n", + " \"text/plain\"\n", + "
      • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
      • \n", + " title\n", + " \"World File Martinique\"\n", + "
      • \n", + " \n", + " \n", + " \n", + " \n", + "
      • \n", + " roles[] 1 items\n", + " \n", + "
          \n", + " \n", + " \n", + " \n", + "
        • \n", + " 0\n", + " \"metadata\"\n", + "
        • \n", + " \n", + " \n", + " \n", + "
        \n", + " \n", + "
      • \n", + " \n", + " \n", + "
      \n", + "
    • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    • \n", + " U2018_CLC2012_V2020_20u1_FR_MTQ_tif\n", + "
        \n", + " \n", + " \n", + " \n", + "
      • \n", + " href\n", + " \"../../../CLC_100m/u2018_clc2012_v2020_20u1_raster100m/DATA/French_DOMs/U2018_CLC2012_V2020_20u1_FR_MTQ.tif\"\n", + "
      • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
      • \n", + " type\n", + " \"image/tiff; application=geotiff; profile=cloud-optimized\"\n", + "
      • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
      • \n", + " title\n", + " \"Single Band Land Classification Martinique\"\n", + "
      • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
      • \n", + " proj:epsg\n", + " 4559\n", + "
      • \n", + " \n", + " \n", + " \n", + " \n", + "
      • \n", + " proj:bbox[] 4 items\n", + " \n", + "
          \n", + " \n", + " \n", + " \n", + "
        • \n", + " 0\n", + " 680451.000324164\n", + "
        • \n", + " \n", + " \n", + " \n", + "
        \n", + " \n", + "
          \n", + " \n", + " \n", + " \n", + "
        • \n", + " 1\n", + " 1581682.1579604005\n", + "
        • \n", + " \n", + " \n", + " \n", + "
        \n", + " \n", + "
          \n", + " \n", + " \n", + " \n", + "
        • \n", + " 2\n", + " 746151.000324164\n", + "
        • \n", + " \n", + " \n", + " \n", + "
        \n", + " \n", + "
          \n", + " \n", + " \n", + " \n", + "
        • \n", + " 3\n", + " 1655782.1579604005\n", + "
        • \n", + " \n", + " \n", + " \n", + "
        \n", + " \n", + "
      • \n", + " \n", + " \n", + " \n", + "
      • \n", + " proj:shape[] 2 items\n", + " \n", + "
          \n", + " \n", + " \n", + " \n", + "
        • \n", + " 0\n", + " 741\n", + "
        • \n", + " \n", + " \n", + " \n", + "
        \n", + " \n", + "
          \n", + " \n", + " \n", + " \n", + "
        • \n", + " 1\n", + " 657\n", + "
        • \n", + " \n", + " \n", + " \n", + "
        \n", + " \n", + "
      • \n", + " \n", + " \n", + " \n", + "
      • \n", + " proj:transform[] 12 items\n", + " \n", + "
          \n", + " \n", + " \n", + " \n", + "
        • \n", + " 0\n", + " 100.0\n", + "
        • \n", + " \n", + " \n", + " \n", + "
        \n", + " \n", + "
          \n", + " \n", + " \n", + " \n", + "
        • \n", + " 1\n", + " 0.0\n", + "
        • \n", + " \n", + " \n", + " \n", + "
        \n", + " \n", + "
          \n", + " \n", + " \n", + " \n", + "
        • \n", + " 2\n", + " 680451.000324164\n", + "
        • \n", + " \n", + " \n", + " \n", + "
        \n", + " \n", + "
          \n", + " \n", + " \n", + " \n", + "
        • \n", + " 3\n", + " 0.0\n", + "
        • \n", + " \n", + " \n", + " \n", + "
        \n", + " \n", + "
          \n", + " \n", + " \n", + " \n", + "
        • \n", + " 4\n", + " -100.0\n", + "
        • \n", + " \n", + " \n", + " \n", + "
        \n", + " \n", + "
          \n", + " \n", + " \n", + " \n", + "
        • \n", + " 5\n", + " 1655782.1579604005\n", + "
        • \n", + " \n", + " \n", + " \n", + "
        \n", + " \n", + "
          \n", + " \n", + " \n", + " \n", + "
        • \n", + " 6\n", + " 0.0\n", + "
        • \n", + " \n", + " \n", + " \n", + "
        \n", + " \n", + "
          \n", + " \n", + " \n", + " \n", + "
        • \n", + " 7\n", + " 0.0\n", + "
        • \n", + " \n", + " \n", + " \n", + "
        \n", + " \n", + "
          \n", + " \n", + " \n", + " \n", + "
        • \n", + " 8\n", + " 1.0\n", + "
        • \n", + " \n", + " \n", + " \n", + "
        \n", + " \n", + "
          \n", + " \n", + " \n", + " \n", + "
        • \n", + " 9\n", + " 0.0\n", + "
        • \n", + " \n", + " \n", + " \n", + "
        \n", + " \n", + "
          \n", + " \n", + " \n", + " \n", + "
        • \n", + " 10\n", + " 0.0\n", + "
        • \n", + " \n", + " \n", + " \n", + "
        \n", + " \n", + "
          \n", + " \n", + " \n", + " \n", + "
        • \n", + " 11\n", + " 1.0\n", + "
        • \n", + " \n", + " \n", + " \n", + "
        \n", + " \n", + "
      • \n", + " \n", + " \n", + " \n", + "
      • \n", + " roles[] 2 items\n", + " \n", + "
          \n", + " \n", + " \n", + " \n", + "
        • \n", + " 0\n", + " \"data\"\n", + "
        • \n", + " \n", + " \n", + " \n", + "
        \n", + " \n", + "
          \n", + " \n", + " \n", + " \n", + "
        • \n", + " 1\n", + " \"visual\"\n", + "
        • \n", + " \n", + " \n", + " \n", + "
        \n", + " \n", + "
      • \n", + " \n", + " \n", + "
      \n", + "
    • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    • \n", + " U2018_CLC2012_V2020_20u1_FR_MTQ_tif_aux_xml\n", + "
        \n", + " \n", + " \n", + " \n", + "
      • \n", + " href\n", + " \"../../../CLC_100m/u2018_clc2012_v2020_20u1_raster100m/DATA/French_DOMs/U2018_CLC2012_V2020_20u1_FR_MTQ.tif.aux.xml\"\n", + "
      • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
      • \n", + " type\n", + " \"application/xml\"\n", + "
      • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
      • \n", + " title\n", + " \"TIFF Statistics Martinique\"\n", + "
      • \n", + " \n", + " \n", + " \n", + " \n", + "
      • \n", + " roles[] 1 items\n", + " \n", + "
          \n", + " \n", + " \n", + " \n", + "
        • \n", + " 0\n", + " \"metadata\"\n", + "
        • \n", + " \n", + " \n", + " \n", + "
        \n", + " \n", + "
      • \n", + " \n", + " \n", + "
      \n", + "
    • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    • \n", + " U2018_CLC2012_V2020_20u1_FR_MTQ_tif_ovr\n", + "
        \n", + " \n", + " \n", + " \n", + "
      • \n", + " href\n", + " \"../../../CLC_100m/u2018_clc2012_v2020_20u1_raster100m/DATA/French_DOMs/U2018_CLC2012_V2020_20u1_FR_MTQ.tif.ovr\"\n", + "
      • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
      • \n", + " type\n", + " \"image/tiff; application=geotiff; profile=pyramid\"\n", + "
      • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
      • \n", + " title\n", + " \"Pyramid Martinique\"\n", + "
      • \n", + " \n", + " \n", + " \n", + " \n", + "
      • \n", + " roles[] 1 items\n", + " \n", + "
          \n", + " \n", + " \n", + " \n", + "
        • \n", + " 0\n", + " \"metadata\"\n", + "
        • \n", + " \n", + " \n", + " \n", + "
        \n", + " \n", + "
      • \n", + " \n", + " \n", + "
      \n", + "
    • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    • \n", + " U2018_CLC2012_V2020_20u1_FR_MTQ_tif_vat_cpg\n", + "
        \n", + " \n", + " \n", + " \n", + "
      • \n", + " href\n", + " \"../../../CLC_100m/u2018_clc2012_v2020_20u1_raster100m/DATA/French_DOMs/U2018_CLC2012_V2020_20u1_FR_MTQ.tif.vat.cpg\"\n", + "
      • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
      • \n", + " type\n", + " \"text/plain\"\n", + "
      • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
      • \n", + " title\n", + " \"Encoding Martinique\"\n", + "
      • \n", + " \n", + " \n", + " \n", + " \n", + "
      • \n", + " roles[] 1 items\n", + " \n", + "
          \n", + " \n", + " \n", + " \n", + "
        • \n", + " 0\n", + " \"metadata\"\n", + "
        • \n", + " \n", + " \n", + " \n", + "
        \n", + " \n", + "
      • \n", + " \n", + " \n", + "
      \n", + "
    • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    • \n", + " U2018_CLC2012_V2020_20u1_FR_MTQ_tif_vat_dbf\n", + "
        \n", + " \n", + " \n", + " \n", + "
      • \n", + " href\n", + " \"../../../CLC_100m/u2018_clc2012_v2020_20u1_raster100m/DATA/French_DOMs/U2018_CLC2012_V2020_20u1_FR_MTQ.tif.vat.dbf\"\n", + "
      • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
      • \n", + " type\n", + " \"application/dbf\"\n", + "
      • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
      • \n", + " title\n", + " \"Database Martinique\"\n", + "
      • \n", + " \n", + " \n", + " \n", + " \n", + "
      • \n", + " roles[] 1 items\n", + " \n", + "
          \n", + " \n", + " \n", + " \n", + "
        • \n", + " 0\n", + " \"metadata\"\n", + "
        • \n", + " \n", + " \n", + " \n", + "
        \n", + " \n", + "
      • \n", + " \n", + " \n", + "
      \n", + "
    • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    • \n", + " U2018_CLC2012_V2020_20u1_FR_MTQ_tif_xml\n", + "
        \n", + " \n", + " \n", + " \n", + "
      • \n", + " href\n", + " \"../../../CLC_100m/u2018_clc2012_v2020_20u1_raster100m/DATA/French_DOMs/U2018_CLC2012_V2020_20u1_FR_MTQ.tif.xml\"\n", + "
      • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
      • \n", + " type\n", + " \"application/xml\"\n", + "
      • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
      • \n", + " title\n", + " \"TIFF Metadata Martinique\"\n", + "
      • \n", + " \n", + " \n", + " \n", + " \n", + "
      • \n", + " roles[] 1 items\n", + " \n", + "
          \n", + " \n", + " \n", + " \n", + "
        • \n", + " 0\n", + " \"metadata\"\n", + "
        • \n", + " \n", + " \n", + " \n", + "
        \n", + " \n", + "
      • \n", + " \n", + " \n", + "
      \n", + "
    • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    • \n", + " U2018_CLC2012_V2020_20u1_FR_MTQ_xml\n", + "
        \n", + " \n", + " \n", + " \n", + "
      • \n", + " href\n", + " \"../../../CLC_100m/u2018_clc2012_v2020_20u1_raster100m/Metadata/U2018_CLC2012_V2020_20u1_FR_MTQ.xml\"\n", + "
      • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
      • \n", + " type\n", + " \"application/xml\"\n", + "
      • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
      • \n", + " title\n", + " \"Single Band Land Classification Metadata Martinique\"\n", + "
      • \n", + " \n", + " \n", + " \n", + " \n", + "
      • \n", + " roles[] 1 items\n", + " \n", + "
          \n", + " \n", + " \n", + " \n", + "
        • \n", + " 0\n", + " \"metadata\"\n", + "
        • \n", + " \n", + " \n", + " \n", + "
        \n", + " \n", + "
      • \n", + " \n", + " \n", + "
      \n", + "
    • \n", + " \n", + " \n", + " \n", + "
    \n", + "
  • \n", + " \n", + " \n", + " \n", + " \n", + "
  • \n", + " bbox[] 4 items\n", + " \n", + "
      \n", + " \n", + " \n", + " \n", + "
    • \n", + " 0\n", + " -61.32702312276209\n", + "
    • \n", + " \n", + " \n", + " \n", + "
    \n", + " \n", + "
      \n", + " \n", + " \n", + " \n", + "
    • \n", + " 1\n", + " 14.296077071286975\n", + "
    • \n", + " \n", + " \n", + " \n", + "
    \n", + " \n", + "
      \n", + " \n", + " \n", + " \n", + "
    • \n", + " 2\n", + " -60.711279992199906\n", + "
    • \n", + " \n", + " \n", + " \n", + "
    \n", + " \n", + "
      \n", + " \n", + " \n", + " \n", + "
    • \n", + " 3\n", + " 14.970827479517444\n", + "
    • \n", + " \n", + " \n", + " \n", + "
    \n", + " \n", + "
  • \n", + " \n", + " \n", + " \n", + "
  • \n", + " stac_extensions[] 1 items\n", + " \n", + "
      \n", + " \n", + " \n", + " \n", + "
    • \n", + " 0\n", + " \"https://stac-extensions.github.io/projection/v1.1.0/schema.json\"\n", + "
    • \n", + " \n", + " \n", + " \n", + "
    \n", + " \n", + "
  • \n", + " \n", + " \n", + "
\n", + "
\n", + "
" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 50, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ - "create_item(img_path)" + "item = create_item(img_path, root)\n", + "item" ] }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 51, "metadata": {}, "outputs": [ { @@ -354,7 +1659,7 @@ "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[1;32mIn[18], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m asset_def \u001b[38;5;241m=\u001b[39m \u001b[43mAssetDefinition\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 3\u001b[0m asset_def\u001b[38;5;241m.\u001b[39mitem_assets(item)\n", + "Cell \u001b[1;32mIn[51], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m asset_def \u001b[38;5;241m=\u001b[39m \u001b[43mAssetDefinition\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 2\u001b[0m asset_def\u001b[38;5;241m.\u001b[39mitem_assets(item)\n\u001b[0;32m 3\u001b[0m item\u001b[38;5;241m.\u001b[39mmake_asset_hrefs_relative()\n", "\u001b[1;31mTypeError\u001b[0m: AssetDefinition.__init__() missing 1 required positional argument: 'properties'" ] } @@ -367,31 +1672,29 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 52, "metadata": {}, "outputs": [ { "ename": "FileNotFoundError", - "evalue": "[Errno 2] No such file or directory: 'x:\\\\projects\\\\ETC-DI\\\\Task_18\\\\clms-stac\\\\stacs/corine-land-cover-raster/corine-land-cover-raster.json'", + "evalue": "[Errno 2] No such file or directory: 'x:\\\\projects\\\\ETC-DI\\\\Task_18\\\\clms-stac\\\\scripts\\\\clc\\\\stacs/clms_catalog.json'", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mFileNotFoundError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[1;32mIn[19], line 10\u001b[0m\n\u001b[0;32m 3\u001b[0m COLLECTION_ID \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mcorine-land-cover-raster\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[0;32m 5\u001b[0m CLMS_CATALOG_LINK \u001b[38;5;241m=\u001b[39m pystac\u001b[38;5;241m.\u001b[39mlink\u001b[38;5;241m.\u001b[39mLink(\n\u001b[0;32m 6\u001b[0m rel\u001b[38;5;241m=\u001b[39mpystac\u001b[38;5;241m.\u001b[39mRelType\u001b[38;5;241m.\u001b[39mROOT, target\u001b[38;5;241m=\u001b[39mpystac\u001b[38;5;241m.\u001b[39mSTACObject\u001b[38;5;241m.\u001b[39mfrom_file(os\u001b[38;5;241m.\u001b[39mpath\u001b[38;5;241m.\u001b[39mjoin(WORKING_DIR, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mstacs/clms_catalog.json\u001b[39m\u001b[38;5;124m\"\u001b[39m))\n\u001b[0;32m 7\u001b[0m )\n\u001b[0;32m 8\u001b[0m COLLECTION_LINK \u001b[38;5;241m=\u001b[39m pystac\u001b[38;5;241m.\u001b[39mlink\u001b[38;5;241m.\u001b[39mLink(\n\u001b[0;32m 9\u001b[0m rel\u001b[38;5;241m=\u001b[39mpystac\u001b[38;5;241m.\u001b[39mRelType\u001b[38;5;241m.\u001b[39mCOLLECTION,\n\u001b[1;32m---> 10\u001b[0m target\u001b[38;5;241m=\u001b[39m\u001b[43mpystac\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mSTACObject\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_file\u001b[49m\u001b[43m(\u001b[49m\u001b[43mos\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpath\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mjoin\u001b[49m\u001b[43m(\u001b[49m\u001b[43mWORKING_DIR\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43mf\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mstacs/\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mCOLLECTION_ID\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m/\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mCOLLECTION_ID\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m.json\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m,\n\u001b[0;32m 11\u001b[0m )\n\u001b[0;32m 12\u001b[0m ITEM_PARENT_LINK \u001b[38;5;241m=\u001b[39m pystac\u001b[38;5;241m.\u001b[39mlink\u001b[38;5;241m.\u001b[39mLink(\n\u001b[0;32m 13\u001b[0m rel\u001b[38;5;241m=\u001b[39mpystac\u001b[38;5;241m.\u001b[39mRelType\u001b[38;5;241m.\u001b[39mPARENT,\n\u001b[0;32m 14\u001b[0m target\u001b[38;5;241m=\u001b[39mpystac\u001b[38;5;241m.\u001b[39mSTACObject\u001b[38;5;241m.\u001b[39mfrom_file(os\u001b[38;5;241m.\u001b[39mpath\u001b[38;5;241m.\u001b[39mjoin(WORKING_DIR, \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mstacs/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mCOLLECTION_ID\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mCOLLECTION_ID\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.json\u001b[39m\u001b[38;5;124m\"\u001b[39m)),\n\u001b[0;32m 15\u001b[0m )\n\u001b[0;32m 17\u001b[0m item\u001b[38;5;241m.\u001b[39mset_self_href(os\u001b[38;5;241m.\u001b[39mpath\u001b[38;5;241m.\u001b[39mjoin(WORKING_DIR, \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mSTAC_DIR\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mCOLLECTION_ID\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mitem\u001b[38;5;241m.\u001b[39mid\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mitem\u001b[38;5;241m.\u001b[39mid\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.json\u001b[39m\u001b[38;5;124m\"\u001b[39m))\n", + "Cell \u001b[1;32mIn[52], line 6\u001b[0m\n\u001b[0;32m 2\u001b[0m STAC_DIR \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mstac_tests\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[0;32m 3\u001b[0m COLLECTION_ID \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mcorine-land-cover-raster\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[0;32m 5\u001b[0m CLMS_CATALOG_LINK \u001b[38;5;241m=\u001b[39m pystac\u001b[38;5;241m.\u001b[39mlink\u001b[38;5;241m.\u001b[39mLink(\n\u001b[1;32m----> 6\u001b[0m rel\u001b[38;5;241m=\u001b[39mpystac\u001b[38;5;241m.\u001b[39mRelType\u001b[38;5;241m.\u001b[39mROOT, target\u001b[38;5;241m=\u001b[39m\u001b[43mpystac\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mSTACObject\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_file\u001b[49m\u001b[43m(\u001b[49m\u001b[43mos\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpath\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mjoin\u001b[49m\u001b[43m(\u001b[49m\u001b[43mWORKING_DIR\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mstacs/clms_catalog.json\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 7\u001b[0m )\n\u001b[0;32m 8\u001b[0m COLLECTION_LINK \u001b[38;5;241m=\u001b[39m pystac\u001b[38;5;241m.\u001b[39mlink\u001b[38;5;241m.\u001b[39mLink(\n\u001b[0;32m 9\u001b[0m rel\u001b[38;5;241m=\u001b[39mpystac\u001b[38;5;241m.\u001b[39mRelType\u001b[38;5;241m.\u001b[39mCOLLECTION,\n\u001b[0;32m 10\u001b[0m target\u001b[38;5;241m=\u001b[39mpystac\u001b[38;5;241m.\u001b[39mSTACObject\u001b[38;5;241m.\u001b[39mfrom_file(os\u001b[38;5;241m.\u001b[39mpath\u001b[38;5;241m.\u001b[39mjoin(WORKING_DIR, \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mstacs/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mCOLLECTION_ID\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mCOLLECTION_ID\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.json\u001b[39m\u001b[38;5;124m\"\u001b[39m)),\n\u001b[0;32m 11\u001b[0m )\n\u001b[0;32m 12\u001b[0m ITEM_PARENT_LINK \u001b[38;5;241m=\u001b[39m pystac\u001b[38;5;241m.\u001b[39mlink\u001b[38;5;241m.\u001b[39mLink(\n\u001b[0;32m 13\u001b[0m rel\u001b[38;5;241m=\u001b[39mpystac\u001b[38;5;241m.\u001b[39mRelType\u001b[38;5;241m.\u001b[39mPARENT,\n\u001b[0;32m 14\u001b[0m target\u001b[38;5;241m=\u001b[39mpystac\u001b[38;5;241m.\u001b[39mSTACObject\u001b[38;5;241m.\u001b[39mfrom_file(os\u001b[38;5;241m.\u001b[39mpath\u001b[38;5;241m.\u001b[39mjoin(WORKING_DIR, \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mstacs/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mCOLLECTION_ID\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mCOLLECTION_ID\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.json\u001b[39m\u001b[38;5;124m\"\u001b[39m)),\n\u001b[0;32m 15\u001b[0m )\n", "File \u001b[1;32mc:\\Users\\boeck\\AppData\\Local\\miniforge3\\envs\\stacdev\\Lib\\site-packages\\pystac\\stac_object.py:619\u001b[0m, in \u001b[0;36mSTACObject.from_file\u001b[1;34m(cls, href, stac_io)\u001b[0m\n\u001b[0;32m 607\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Reads a STACObject implementation from a file.\u001b[39;00m\n\u001b[0;32m 608\u001b[0m \n\u001b[0;32m 609\u001b[0m \u001b[38;5;124;03mArgs:\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 616\u001b[0m \u001b[38;5;124;03m by the JSON read from the file located at HREF.\u001b[39;00m\n\u001b[0;32m 617\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m 618\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mcls\u001b[39m \u001b[38;5;241m==\u001b[39m STACObject:\n\u001b[1;32m--> 619\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m cast(S, \u001b[43mpystac\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread_file\u001b[49m\u001b[43m(\u001b[49m\u001b[43mhref\u001b[49m\u001b[43m)\u001b[49m)\n\u001b[0;32m 621\u001b[0m href \u001b[38;5;241m=\u001b[39m make_posix_style(href)\n\u001b[0;32m 623\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m stac_io \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n", "File \u001b[1;32mc:\\Users\\boeck\\AppData\\Local\\miniforge3\\envs\\stacdev\\Lib\\site-packages\\pystac\\__init__.py:165\u001b[0m, in \u001b[0;36mread_file\u001b[1;34m(href, stac_io)\u001b[0m\n\u001b[0;32m 163\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m stac_io \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m 164\u001b[0m stac_io \u001b[38;5;241m=\u001b[39m StacIO\u001b[38;5;241m.\u001b[39mdefault()\n\u001b[1;32m--> 165\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mstac_io\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread_stac_object\u001b[49m\u001b[43m(\u001b[49m\u001b[43mhref\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[1;32mc:\\Users\\boeck\\AppData\\Local\\miniforge3\\envs\\stacdev\\Lib\\site-packages\\pystac\\stac_io.py:234\u001b[0m, in \u001b[0;36mStacIO.read_stac_object\u001b[1;34m(self, source, root, *args, **kwargs)\u001b[0m\n\u001b[0;32m 208\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mread_stac_object\u001b[39m(\n\u001b[0;32m 209\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[0;32m 210\u001b[0m source: HREF,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 213\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs: Any,\n\u001b[0;32m 214\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m STACObject:\n\u001b[0;32m 215\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Read a STACObject from a JSON file at the given source.\u001b[39;00m\n\u001b[0;32m 216\u001b[0m \n\u001b[0;32m 217\u001b[0m \u001b[38;5;124;03m See :func:`StacIO.read_text ` for usage of\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 232\u001b[0m \u001b[38;5;124;03m contained in the file at the given uri.\u001b[39;00m\n\u001b[0;32m 233\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[1;32m--> 234\u001b[0m d \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread_json\u001b[49m\u001b[43m(\u001b[49m\u001b[43msource\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 235\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstac_object_from_dict(\n\u001b[0;32m 236\u001b[0m d, href\u001b[38;5;241m=\u001b[39msource, root\u001b[38;5;241m=\u001b[39mroot, preserve_dict\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[0;32m 237\u001b[0m )\n", "File \u001b[1;32mc:\\Users\\boeck\\AppData\\Local\\miniforge3\\envs\\stacdev\\Lib\\site-packages\\pystac\\stac_io.py:205\u001b[0m, in \u001b[0;36mStacIO.read_json\u001b[1;34m(self, source, *args, **kwargs)\u001b[0m\n\u001b[0;32m 188\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mread_json\u001b[39m(\u001b[38;5;28mself\u001b[39m, source: HREF, \u001b[38;5;241m*\u001b[39margs: Any, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs: Any) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28mdict\u001b[39m[\u001b[38;5;28mstr\u001b[39m, Any]:\n\u001b[0;32m 189\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Read a dict from the given source.\u001b[39;00m\n\u001b[0;32m 190\u001b[0m \n\u001b[0;32m 191\u001b[0m \u001b[38;5;124;03m See :func:`StacIO.read_text ` for usage of\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 203\u001b[0m \u001b[38;5;124;03m given source.\u001b[39;00m\n\u001b[0;32m 204\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[1;32m--> 205\u001b[0m txt \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread_text\u001b[49m\u001b[43m(\u001b[49m\u001b[43msource\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 206\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mjson_loads(txt)\n", "File \u001b[1;32mc:\\Users\\boeck\\AppData\\Local\\miniforge3\\envs\\stacdev\\Lib\\site-packages\\pystac\\stac_io.py:282\u001b[0m, in \u001b[0;36mDefaultStacIO.read_text\u001b[1;34m(self, source, *_, **__)\u001b[0m\n\u001b[0;32m 277\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"A concrete implementation of :meth:`StacIO.read_text\u001b[39;00m\n\u001b[0;32m 278\u001b[0m \u001b[38;5;124;03m`. Converts the ``source`` argument to a string (if it\u001b[39;00m\n\u001b[0;32m 279\u001b[0m \u001b[38;5;124;03mis not already) and delegates to :meth:`DefaultStacIO.read_text_from_href` for\u001b[39;00m\n\u001b[0;32m 280\u001b[0m \u001b[38;5;124;03mopening and reading the file.\"\"\"\u001b[39;00m\n\u001b[0;32m 281\u001b[0m href \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mstr\u001b[39m(os\u001b[38;5;241m.\u001b[39mfspath(source))\n\u001b[1;32m--> 282\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread_text_from_href\u001b[49m\u001b[43m(\u001b[49m\u001b[43mhref\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[1;32mc:\\Users\\boeck\\AppData\\Local\\miniforge3\\envs\\stacdev\\Lib\\site-packages\\pystac\\stac_io.py:305\u001b[0m, in \u001b[0;36mDefaultStacIO.read_text_from_href\u001b[1;34m(self, href)\u001b[0m\n\u001b[0;32m 303\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCould not read uri \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mhref\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01me\u001b[39;00m\n\u001b[0;32m 304\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m--> 305\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28;43mopen\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mhref\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mencoding\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mutf-8\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mas\u001b[39;00m f:\n\u001b[0;32m 306\u001b[0m href_contents \u001b[38;5;241m=\u001b[39m f\u001b[38;5;241m.\u001b[39mread()\n\u001b[0;32m 307\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m href_contents\n", - "\u001b[1;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: 'x:\\\\projects\\\\ETC-DI\\\\Task_18\\\\clms-stac\\\\stacs/corine-land-cover-raster/corine-land-cover-raster.json'" + "\u001b[1;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: 'x:\\\\projects\\\\ETC-DI\\\\Task_18\\\\clms-stac\\\\scripts\\\\clc\\\\stacs/clms_catalog.json'" ] } ], "source": [ "WORKING_DIR = os.getcwd()\n", - "STAC_DIR = 'stac_tests'\n", - "COLLECTION_ID = 'corine-land-cover-raster'\n", "\n", "CLMS_CATALOG_LINK = pystac.link.Link(\n", " rel=pystac.RelType.ROOT, target=pystac.STACObject.from_file(os.path.join(WORKING_DIR, \"stacs/clms_catalog.json\"))\n", @@ -471,7 +1774,7 @@ "kernelspec": { "display_name": "stacdev", "language": "python", - "name": "stacdev" + "name": "python3" }, "language_info": { "codemirror_mode": { From 930c69672ab0da876a2c0087ad073dc9fcc92a67 Mon Sep 17 00:00:00 2001 From: Sebastian Boeck Date: Wed, 1 May 2024 20:12:13 +0200 Subject: [PATCH 31/80] adds collection item_assets --- scripts/clc/clms_collection_generator.ipynb | 232 +++++--------------- 1 file changed, 61 insertions(+), 171 deletions(-) diff --git a/scripts/clc/clms_collection_generator.ipynb b/scripts/clc/clms_collection_generator.ipynb index 5bac18d..6de915d 100644 --- a/scripts/clc/clms_collection_generator.ipynb +++ b/scripts/clc/clms_collection_generator.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 8, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -20,53 +20,47 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ - "TITLE_DICT = {\n", - " 'tif': 'Single Band Land Classification {label}',\n", - " 'tif_xml': 'TIFF Metadata {label}',\n", - " 'tif_aux_xml': 'TIFF Statistics {label}',\n", - " 'tif_ovr': 'Pyramid {label}',\n", - " 'tif_vat_cpg': 'Encoding {label}',\n", - " 'tif_vat_dbf': 'Database {label}',\n", - " 'txt': 'Legends {label}',\n", - " 'tif_lyr': 'Legend Layer {label}',\n", - " 'tfw': 'World File {label}',\n", - " 'xml': 'Single Band Land Classification Metadata {label}',\n", + "COLLITAS_TITLE_DICT = {\n", + " 'clc_map': 'Corine Land Cover Map',\n", + " 'clc_map_statistics': 'Corine Land Cover Map Statistics',\n", + " 'clc_map_pyramid': 'Pyramid',\n", + " 'clc_map_encoding': 'Encoding',\n", + " 'clc_map_database': 'Database',\n", + " 'clc_map_database_metadata': 'Database Metadata',\n", + " 'clc_map_tif_metadata': 'TIFF Metadata',\n", + " 'clc_map_metadata': 'Corine Land Cover Map Metadata',\n", "}\n", "\n", - "MEDIA_TYPE_DICT = {\n", - " 'tif': pystac.MediaType.COG,\n", - " 'tif_xml': pystac.MediaType.XML,\n", - " 'tif_aux_xml': pystac.MediaType.XML,\n", - " 'tif_ovr': 'image/tiff; application=geotiff; profile=pyramid',\n", - " 'tif_vat_cpg': pystac.MediaType.TEXT,\n", - " 'tif_vat_dbf': 'application/dbf',\n", - " 'txt': pystac.MediaType.TEXT,\n", - " 'tif_lyr': 'image/tiff; application=geotiff; profile=layer',\n", - " 'tfw': pystac.MediaType.TEXT,\n", - " 'xml': pystac.MediaType.XML,\n", + "COLLITAS_MEDIA_TYPE_DICT = {\n", + " 'clc_map': pystac.MediaType.COG,\n", + " 'clc_map_statistics': pystac.MediaType.XML,\n", + " 'clc_map_pyramid': 'image/tiff; application=geotiff; profile=pyramid',\n", + " 'clc_map_encoding': pystac.MediaType.TEXT,\n", + " 'clc_map_database': 'application/dbf',\n", + " 'clc_map_database_metadata': pystac.MediaType.TEXT,\n", + " 'clc_map_tif_metadata': 'image/tiff; application=geotiff; profile=layer',\n", + " 'clc_map_metadata': pystac.MediaType.XML,\n", "}\n", "\n", - "ROLES_DICT = {\n", - " 'tif': ['data', 'visual'],\n", - " 'tif_xml': ['metadata'],\n", - " 'tif_aux_xml': ['metadata'],\n", - " 'tif_ovr': ['metadata'],\n", - " 'tif_vat_cpg': ['metadata'],\n", - " 'tif_vat_dbf': ['metadata'],\n", - " 'txt': ['metadata'],\n", - " 'tif_lyr': ['metadata'],\n", - " 'tfw': ['metadata'],\n", - " 'xml': ['metadata'],\n", + "COLLITAS_ROLES_DICT = {\n", + " 'clc_map': ['data'],\n", + " 'clc_map_statistics': ['metadata'],\n", + " 'clc_map_pyramid': ['metadata'],\n", + " 'clc_map_encoding': ['metadata'],\n", + " 'clc_map_database': ['metadata'],\n", + " 'clc_map_database_metadata': ['metadata'],\n", + " 'clc_map_tif_metadata': ['metadata'],\n", + " 'clc_map_metadata': ['metadata'],\n", "}" ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -98,7 +92,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -118,7 +112,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 6, "metadata": {}, "outputs": [ { @@ -450,7 +444,7 @@ "" ] }, - "execution_count": 11, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -461,7 +455,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ @@ -469,16 +463,16 @@ "\n", "item_assets = ItemAssetsExtension.ext(collection, add_if_missing=True)\n", "item_assets.item_assets = {\n", - " key: AssetDefinition({\"title\": TITLE_DICT[key].format(label='').strip(),\n", - " \"media_type\": MEDIA_TYPE_DICT[key], \n", - " \"roles\": ROLES_DICT[key]})\n", - " for key in TITLE_DICT\n", + " key: AssetDefinition({\"title\": COLLITAS_TITLE_DICT[key].format(label='').strip(),\n", + " \"media_type\": COLLITAS_MEDIA_TYPE_DICT[key], \n", + " \"roles\": COLLITAS_ROLES_DICT[key]})\n", + " for key in COLLITAS_TITLE_DICT\n", "}" ] }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -592,14 +586,14 @@ " \n", " \n", "
  • \n", - " tif\n", + " clc_map\n", "
      \n", " \n", " \n", " \n", "
    • \n", " title\n", - " \"Single Band Land Classification\"\n", + " \"Corine Land Cover Map\"\n", "
    • \n", " \n", " \n", @@ -615,68 +609,6 @@ " \n", " \n", "
    • \n", - " roles[] 2 items\n", - " \n", - "
        \n", - " \n", - " \n", - " \n", - "
      • \n", - " 0\n", - " \"data\"\n", - "
      • \n", - " \n", - " \n", - " \n", - "
      \n", - " \n", - "
        \n", - " \n", - " \n", - " \n", - "
      • \n", - " 1\n", - " \"visual\"\n", - "
      • \n", - " \n", - " \n", - " \n", - "
      \n", - " \n", - "
    • \n", - " \n", - " \n", - "
    \n", - "
  • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
  • \n", - " tif_xml\n", - "
      \n", - " \n", - " \n", - " \n", - "
    • \n", - " title\n", - " \"TIFF Metadata\"\n", - "
    • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
    • \n", - " media_type\n", - " \"application/xml\"\n", - "
    • \n", - " \n", - " \n", - " \n", - " \n", - "
    • \n", " roles[] 1 items\n", " \n", "
        \n", @@ -685,7 +617,7 @@ " \n", "
      • \n", " 0\n", - " \"metadata\"\n", + " \"data\"\n", "
      • \n", " \n", " \n", @@ -703,14 +635,14 @@ " \n", " \n", "
      • \n", - " tif_aux_xml\n", + " clc_map_statistics\n", "
          \n", " \n", " \n", " \n", "
        • \n", " title\n", - " \"TIFF Statistics\"\n", + " \"Corine Land Cover Map Statistics\"\n", "
        • \n", " \n", " \n", @@ -752,7 +684,7 @@ " \n", " \n", "
        • \n", - " tif_ovr\n", + " clc_map_pyramid\n", "
            \n", " \n", " \n", @@ -801,7 +733,7 @@ " \n", " \n", "
          • \n", - " tif_vat_cpg\n", + " clc_map_encoding\n", "
              \n", " \n", " \n", @@ -850,7 +782,7 @@ " \n", " \n", "
            • \n", - " tif_vat_dbf\n", + " clc_map_database\n", "
                \n", " \n", " \n", @@ -899,14 +831,14 @@ " \n", " \n", "
              • \n", - " txt\n", + " clc_map_database_metadata\n", "
                  \n", " \n", " \n", " \n", "
                • \n", " title\n", - " \"Legends\"\n", + " \"Database Metadata\"\n", "
                • \n", " \n", " \n", @@ -948,14 +880,14 @@ " \n", " \n", "
                • \n", - " tif_lyr\n", + " clc_map_tif_metadata\n", "
                    \n", " \n", " \n", " \n", "
                  • \n", " title\n", - " \"Legend Layer\"\n", + " \"TIFF Metadata\"\n", "
                  • \n", " \n", " \n", @@ -997,63 +929,14 @@ " \n", " \n", "
                  • \n", - " tfw\n", - "
                      \n", - " \n", - " \n", - " \n", - "
                    • \n", - " title\n", - " \"World File\"\n", - "
                    • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
                    • \n", - " media_type\n", - " \"text/plain\"\n", - "
                    • \n", - " \n", - " \n", - " \n", - " \n", - "
                    • \n", - " roles[] 1 items\n", - " \n", - "
                        \n", - " \n", - " \n", - " \n", - "
                      • \n", - " 0\n", - " \"metadata\"\n", - "
                      • \n", - " \n", - " \n", - " \n", - "
                      \n", - " \n", - "
                    • \n", - " \n", - " \n", - "
                    \n", - "
                  • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
                  • \n", - " xml\n", + " clc_map_metadata\n", "
                      \n", " \n", " \n", " \n", "
                    • \n", " title\n", - " \"Single Band Land Classification Metadata\"\n", + " \"Corine Land Cover Map Metadata\"\n", "
                    • \n", " \n", " \n", @@ -1344,7 +1227,7 @@ "" ] }, - "execution_count": 16, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -1352,6 +1235,13 @@ "source": [ "collection" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { From 1c9212db06ed602fc3b26ef908d5128ac1b2fa0d Mon Sep 17 00:00:00 2001 From: Sebastian Boeck Date: Wed, 1 May 2024 20:36:09 +0200 Subject: [PATCH 32/80] constants in dedicated script --- scripts/clc/constants.py | 143 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 143 insertions(+) create mode 100644 scripts/clc/constants.py diff --git a/scripts/clc/constants.py b/scripts/clc/constants.py new file mode 100644 index 0000000..4f4b56b --- /dev/null +++ b/scripts/clc/constants.py @@ -0,0 +1,143 @@ +import os +from datetime import datetime, UTC + +import pystac +from pystac.provider import ProviderRole + + + +STAC_DIR = 'stac_tests' + +# Collection +COLLECTION_ID = 'corine-land-cover-raster' +COLLECTION_TITLE = 'CORINE Land Cover Raster' +COLLECTION_DESCRIPTION = ("The European Commission launched the CORINE (Coordination of Information on the Environment) " + "program in an effort to develop a standardized methodology for producing continent-scale land " + "cover, biotope, and air quality maps. The CORINE Land Cover (CLC) product offers a pan-European " + "land cover and land use inventory with 44 thematic classes, ranging from broad forested areas " + "to individual vineyards.") +COLLECTION_KEYWORDS = ["clms", "corine", "derived data", "land cover", "machine learning", "open data"] +COLLECTION_LICENSE = 'proprietary' + +COLLITAS_TITLE_DICT = { + 'clc_map': 'Corine Land Cover Map', + 'clc_map_statistics': 'Corine Land Cover Map Statistics', + 'clc_map_pyramid': 'Pyramid', + 'clc_map_encoding': 'Encoding', + 'clc_map_database': 'Database', + 'clc_map_database_metadata': 'Database Metadata', + 'clc_map_tif_metadata': 'TIFF Metadata', + 'clc_map_metadata': 'Corine Land Cover Map Metadata', +} + +COLLITAS_MEDIA_TYPE_DICT = { + 'clc_map': pystac.MediaType.COG, + 'clc_map_statistics': pystac.MediaType.XML, + 'clc_map_pyramid': 'image/tiff; application=geotiff; profile=pyramid', + 'clc_map_encoding': pystac.MediaType.TEXT, + 'clc_map_database': 'application/dbf', + 'clc_map_database_metadata': pystac.MediaType.TEXT, + 'clc_map_tif_metadata': 'image/tiff; application=geotiff; profile=layer', + 'clc_map_metadata': pystac.MediaType.XML, +} + +COLLITAS_ROLES_DICT = { + 'clc_map': ['data'], + 'clc_map_statistics': ['metadata'], + 'clc_map_pyramid': ['metadata'], + 'clc_map_encoding': ['metadata'], + 'clc_map_database': ['metadata'], + 'clc_map_database_metadata': ['metadata'], + 'clc_map_tif_metadata': ['metadata'], + 'clc_map_metadata': ['metadata'], +} + +# Items +DOM_DICT = { + 'GLP': 'Guadeloupe', + 'GUF': 'French Guyana', + 'MTQ': 'Martinique', + 'MYT': 'Mayotte', + 'REU': 'Réunion', + '': 'Europe', +} + +MEDIA_TYPE_DICT = { + 'tif': pystac.MediaType.COG, + 'tif_xml': pystac.MediaType.XML, + 'tif_aux_xml': pystac.MediaType.XML, + 'tif_ovr': 'image/tiff; application=geotiff; profile=pyramid', + 'tif_vat_cpg': pystac.MediaType.TEXT, + 'tif_vat_dbf': 'application/dbf', + 'txt': pystac.MediaType.TEXT, + 'tif_lyr': 'image/tiff; application=geotiff; profile=layer', + 'tfw': pystac.MediaType.TEXT, + 'xml': pystac.MediaType.XML, +} + +ROLES_DICT = { + 'tif': ['data', 'visual'], + 'tif_xml': ['metadata'], + 'tif_aux_xml': ['metadata'], + 'tif_ovr': ['metadata'], + 'tif_vat_cpg': ['metadata'], + 'tif_vat_dbf': ['metadata'], + 'txt': ['metadata'], + 'tif_lyr': ['metadata'], + 'tfw': ['metadata'], + 'xml': ['metadata'], +} + +TITLE_DICT = { + 'tif': 'Single Band Land Classification {label}', + 'tif_xml': 'TIFF Metadata {label}', + 'tif_aux_xml': 'TIFF Statistics {label}', + 'tif_ovr': 'Pyramid {label}', + 'tif_vat_cpg': 'Encoding {label}', + 'tif_vat_dbf': 'Database {label}', + 'txt': 'Legends {label}', + 'tif_lyr': 'Legend Layer {label}', + 'tfw': 'World File {label}', + 'xml': 'Single Band Land Classification Metadata {label}', +} + +CLC_PROVIDER = pystac.provider.Provider( + name='Copernicus Land Monitoring Service', + description=('The Copernicus Land Monitoring Service provides ' + 'geographical information on land cover and its ' + 'changes, land use, ground motions, vegetation state, ' + 'water cycle and Earth\'s surface energy variables to ' + 'a broad range of users in Europe and across the World ' + 'in the field of environmental terrestrial applications.'), + roles=[ProviderRole.LICENSOR, ProviderRole.HOST], + url='https://land.copernicus.eu' +) + + +ITEM_DESCRIPTION = ('Corine Land Cover {year} (CLC{year}) is one of the Corine Land Cover (CLC) ' + 'datasets produced within the frame the Copernicus Land Monitoring Service ' + 'referring to land cover / land use status of year {year}. ' + 'CLC service has a long-time heritage (formerly known as \"CORINE Land Cover Programme\"), ' + 'coordinated by the European Environment Agency (EEA). It provides consistent ' + 'and thematically detailed information on land cover and land cover changes across Europe. ' + 'CLC datasets are based on the classification of satellite images produced by the national ' + 'teams of the participating countries - the EEA members and cooperating countries (EEA39). ' + 'National CLC inventories are then further integrated into a seamless land cover map of Europe. ' + 'The resulting European database relies on standard methodology and nomenclature with following ' + 'base parameters: 44 classes in the hierarchical 3-level CLC nomenclature; ' + 'minimum mapping unit (MMU) for status layers is 25 hectares; ' + 'minimum width of linear elements is 100 metres. ' + 'Change layers have higher resolution, i.e. minimum mapping unit (MMU) is 5 hectares ' + 'for Land Cover Changes (LCC), and the minimum width of linear elements is 100 metres. ' + 'The CLC service delivers important data sets supporting the implementation of key priority ' + 'areas of the Environment Action Programmes of the European Union as e.g. protecting ecosystems, ' + 'halting the loss of biological diversity, tracking the impacts of climate change, ' + 'monitoring urban land take, assessing developments in agriculture or dealing with ' + 'water resources directives. CLC belongs to the Pan-European component of the ' + 'Copernicus Land Monitoring Service (https://land.copernicus.eu/), part of the ' + 'European Copernicus Programme coordinated by the European Environment Agency, ' + 'providing environmental information from a combination of air- and space-based observation ' + 'systems and in-situ monitoring. Additional information about CLC product description including ' + 'mapping guides can be found at https://land.copernicus.eu/user-corner/technical-library/. ' + 'CLC class descriptions can be found at ' + 'https://land.copernicus.eu/user-corner/technical-library/corine-land-cover-nomenclature-guidelines/html/.') From 3b72d983c74d955499d17f501471d2bba965eeb8 Mon Sep 17 00:00:00 2001 From: Sebastian Boeck Date: Wed, 1 May 2024 20:45:04 +0200 Subject: [PATCH 33/80] constants removed --- scripts/clc/item.py | 122 ++++---------------------------------------- 1 file changed, 10 insertions(+), 112 deletions(-) diff --git a/scripts/clc/item.py b/scripts/clc/item.py index 4fe9e25..b64bb8b 100644 --- a/scripts/clc/item.py +++ b/scripts/clc/item.py @@ -14,20 +14,6 @@ import rasterio as rio - - -def deconstruct_clc_name(filename: str): - id = os.path.basename(filename).split('.')[0] - p = re.compile(("U(?P[0-9]{4})_" - "(?PCLC|CHA)(?P[0-9]{4})_" - "V(?P[0-9]{4})_(?P[0-9a-z]*)" - "_?(?P[A-Z]*)?" - "_?(?P[A-Z]*)?")) - m = p.search(id) - - return(m.groupdict()) - - def deconstruct_clc_name(filename: str): p = re.compile('^(?P[A-Z0-9a-z_]*).(?P.*)$') m = p.search(os.path.basename(filename)) @@ -51,60 +37,11 @@ def create_asset(filename: str, DOM_code: str): filename_elements = deconstruct_clc_name(filename) suffix = filename_elements['suffix'].replace('.', '_') - DOM_DICT = { - 'GLP': 'Guadeloupe', - 'GUF': 'French Guyana', - 'MTQ': 'Martinique', - 'MYT': 'Mayotte', - 'REU': 'Réunion', - '': 'Europe', - } - - MEDIA_TYPE_DICT = { - 'tif': pystac.MediaType.COG, - 'tif_xml': pystac.MediaType.XML, - 'tif_aux_xml': pystac.MediaType.XML, - 'tif_ovr': 'image/tiff; application=geotiff; profile=pyramid', - 'tif_vat_cpg': pystac.MediaType.TEXT, - 'tif_vat_dbf': 'application/dbf', - 'txt': pystac.MediaType.TEXT, - 'tif_lyr': 'image/tiff; application=geotiff; profile=layer', - 'tfw': pystac.MediaType.TEXT, - 'xml': pystac.MediaType.XML, - } - label = DOM_DICT[DOM_code] - TITLE_DICT = { - 'tif': f'Single Band Land Classification {label}', - 'tif_xml': f'TIFF Metadata {label}', - 'tif_aux_xml': f'TIFF Statistics {label}', - 'tif_ovr': f'Pyramid {label}', - 'tif_vat_cpg': f'Encoding {label}', - 'tif_vat_dbf': f'Database {label}', - 'txt': f'Legends {label}', - 'tif_lyr': f'Legend Layer {label}', - 'tfw': f'World File {label}', - 'xml': f'Single Band Land Classification Metadata {label}', - } - - ROLES_DICT = { - 'tif': ['data', 'visual'], - 'tif_xml': ['metadata'], - 'tif_aux_xml': ['metadata'], - 'tif_ovr': ['metadata'], - 'tif_vat_cpg': ['metadata'], - 'tif_vat_dbf': ['metadata'], - 'txt': ['metadata'], - 'tif_lyr': ['metadata'], - 'tfw': ['metadata'], - 'xml': ['metadata'], - } - - asset = pystac.Asset(href=filename, title=TITLE_DICT[suffix], media_type=MEDIA_TYPE_DICT[suffix], roles=ROLES_DICT[suffix]) + asset = pystac.Asset(href=filename, title=TITLE_DICT[suffix].format(label=label), media_type=MEDIA_TYPE_DICT[suffix], roles=ROLES_DICT[suffix]) return(f"{filename_elements['id']}_{suffix}", asset) - def get_img_paths(path: str): img_paths=[] for root, dirs, files in os.walk(path): @@ -116,8 +53,8 @@ def get_img_paths(path: str): return(img_paths) - def get_asset_files(path, clc_name): + clc_name_elements = deconstruct_clc_name(clc_name) asset_files = [] @@ -130,7 +67,9 @@ def get_asset_files(path, clc_name): continue for file in files: - if file.startswith(clc_name + '.') or file.endswith((f'{clc_name_elements["DOM_code"]}.tif.lyr', 'QGIS.txt',)): + if (file.startswith(clc_name + '.') or + file.endswith((f'{clc_name_elements["DOM_code"]}.tif.lyr', 'QGIS.txt',)) and + clc_name in file): asset_files.append(os.path.join(root, file)) return(asset_files) @@ -140,54 +79,14 @@ def project_bbox(img, target_epsg=4326): bbox_warped = rio.warp.transform_bounds(img.crs, target_crs, *img.bounds) return(bbox_warped) +def create_item(img_path, root): -def create_item(img_path: str): clc_name_elements = deconstruct_clc_name(img_path) - + asset_files = get_asset_files(root, clc_name=clc_name_elements['id']) asset_files = [f for f in asset_files if not f.endswith('aux')] - year = clc_name_elements.get('reference_year') - - CLC_PROVIDER = pystac.provider.Provider( - name='Copernicus Land Monitoring Service', - description=('The Copernicus Land Monitoring Service provides ' - 'geographical information on land cover and its ' - 'changes, land use, ground motions, vegetation state, ' - 'water cycle and Earth\'s surface energy variables to ' - 'a broad range of users in Europe and across the World ' - 'in the field of environmental terrestrial applications.'), - roles=[ProviderRole.LICENSOR, ProviderRole.HOST], - url='https://land.copernicus.eu' - ) - - props = {'description': (f'Corine Land Cover {year} (CLC{year}) is one of the Corine Land Cover (CLC) ' - f'datasets produced within the frame the Copernicus Land Monitoring Service ' - f'referring to land cover / land use status of year {year}. ' - f'CLC service has a long-time heritage (formerly known as \"CORINE Land Cover Programme\"), ' - f'coordinated by the European Environment Agency (EEA). It provides consistent ' - f'and thematically detailed information on land cover and land cover changes across Europe. ' - f'CLC datasets are based on the classification of satellite images produced by the national ' - f'teams of the participating countries - the EEA members and cooperating countries (EEA39). ' - f'National CLC inventories are then further integrated into a seamless land cover map of Europe. ' - f'The resulting European database relies on standard methodology and nomenclature with following ' - f'base parameters: 44 classes in the hierarchical 3-level CLC nomenclature; ' - f'minimum mapping unit (MMU) for status layers is 25 hectares; ' - f'minimum width of linear elements is 100 metres. ' - f'Change layers have higher resolution, i.e. minimum mapping unit (MMU) is 5 hectares ' - f'for Land Cover Changes (LCC), and the minimum width of linear elements is 100 metres. ' - f'The CLC service delivers important data sets supporting the implementation of key priority ' - f'areas of the Environment Action Programmes of the European Union as e.g. protecting ecosystems, ' - f'halting the loss of biological diversity, tracking the impacts of climate change, ' - f'monitoring urban land take, assessing developments in agriculture or dealing with ' - f'water resources directives. CLC belongs to the Pan-European component of the ' - f'Copernicus Land Monitoring Service (https://land.copernicus.eu/), part of the ' - f'European Copernicus Programme coordinated by the European Environment Agency, ' - f'providing environmental information from a combination of air- and space-based observation ' - f'systems and in-situ monitoring. Additional information about CLC product description including ' - f'mapping guides can be found at https://land.copernicus.eu/user-corner/technical-library/. ' - f'CLC class descriptions can be found at ' - f'https://land.copernicus.eu/user-corner/technical-library/corine-land-cover-nomenclature-guidelines/html/.'), + props = {'description': ITEM_DESCRIPTION.format(year=year), 'created': None, 'providers': CLC_PROVIDER.to_dict(), } @@ -206,9 +105,8 @@ def create_item(img_path: str): } item = pystac.Item(**params) - + for asset_file in asset_files: - # print(asset_file) key, asset = create_asset(asset_file, DOM_code=clc_name_elements.get('DOM_code')) item.add_asset( key=key, @@ -225,4 +123,4 @@ def create_item(img_path: str): license = pystac.link.Link(rel='LICENSE', target="https://land.copernicus.eu/en/data-policy") item.add_link(license) - return(item) \ No newline at end of file + return(item) From f3d8f6515cfdf8394a0a93fd57fd520563d181cb Mon Sep 17 00:00:00 2001 From: Xiaoman Huang Date: Thu, 2 May 2024 15:02:00 +0800 Subject: [PATCH 34/80] Update item and collection validation --- .DS_Store | Bin 6148 -> 6148 bytes schema/.DS_Store | Bin 0 -> 6148 bytes scripts/.DS_Store | Bin 8196 -> 8196 bytes scripts/vabh/test_collection.json | 4 ++-- scripts/vabh/test_item.json | 27 +++++++++++++++------- scripts/vabh/vabh_collection.py | 31 +++++++++++++++++++------ scripts/vabh/vabh_item.py | 37 ++++++++++++++++++++++++++---- 7 files changed, 77 insertions(+), 22 deletions(-) create mode 100644 schema/.DS_Store diff --git a/.DS_Store b/.DS_Store index 3d5f93ace29183dca7189ecec81e2814e5802e8f..8af74eb9354ef021e5d8a003bdfc531346696b3e 100644 GIT binary patch delta 237 zcmZoMXfc=|#>B)qu~2NHo}wrV0|Nsi1A_nqLoP#NQh9MfQcix-W=5vvjItmpHilw` zWQGieRAgzO_>Re1jD^V()zyY(X2v=ShQ_rz3e}cI2098RW+t_@oE)Oc`qn}5**Up+ z`CUNuK)?vJ1`K$iG>qy7GEnR(3ogpb$tX#zCZ$PC4j&6^ohST?hB c@N)n?19b3r=E?jbmK;D!!1ino5ZS^E00amxjQ{`u delta 109 zcmZoMXfc=|#>B`mu~2NHo}wrd0|Nsi1A_nqLjgkxLvd1haY0hf#KPs14MbQb|7V&Y zVq#*UqhM%gR;!~>ZE0+*qhMxiuz4|a3d<&z4~(1HIruq%hHd`H{GE9+zlbFV&^VBJ LOq&BlwlD(#*^L_5 diff --git a/schema/.DS_Store b/schema/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..795a8847d734afe9e31c3119ffe63b4876fb75d1 GIT binary patch literal 6148 zcmeHK%}T>S5T4blTSUk~p~nTU1zVMZcnP(>fDt{Y)W#GI#%yU)dnkpR^@V&ApU0Wq z4Op6kClNaXyWi~m>}Ed5{s4gJPNFt|2LKWqp&+F~$Xw}KbHRkd%rSun$RP`-*{EWo zzi6UwmoS1cXo%q3_lJoN(f1LKk~qs-txvJGUfbB*l3Q|H-UUy38svj~l642;8ya0| zorI-62(RMdwD0Yn=`

                      2Pck;%JB=x7TqR>1kI_(kL^zfpy5btoOafY}W2{j+&}{ zG;gZe$ziLhjyuQmd0p=9ADmwFpOcqVzgaQ`@@{0uVF~Z3EY;D-#*{-CfC`VI?=X6wL?uFvGJ5t5)zZwW%>(05o^ zL=TFvsfadJxF?3N>1bCj&UaW?wCNzs$~cc(xqQ3`vpU+92?ya@k^@U3S4rm`~7^n3s b(lO3=SXiW4$ZpaR`66Hl;f@*j1qQwVu*gkQ literal 0 HcmV?d00001 diff --git a/scripts/.DS_Store b/scripts/.DS_Store index 518d29115e615d4116dfa7bf2f3fbef96920e02a..72531b7b9e3420c5bcd4b61cc67ceac66f3f6a96 100644 GIT binary patch delta 87 zcmZp1XmOa3%(!D?;WYM%4ZNG#Ib=8(cT83hC{&lIt~N3>FwjvjGpW^4sJ1i$GEIQ& pT22m8Wqs?Q`0SkAy!@`odj;efyEoqz&|=@rCh?tR^FI-GW&oR(8sGo` delta 88 zcmV-e0H^fDN+;2pk9jxsxOiaVQykdonXPHY^}5IWc`KAbUA8 uGb|uBGcbL92?!M5FY`Iv)m9W2eStl_5`#16bA#eV;dd- diff --git a/scripts/vabh/test_collection.json b/scripts/vabh/test_collection.json index 43d6271..3832710 100644 --- a/scripts/vabh/test_collection.json +++ b/scripts/vabh/test_collection.json @@ -139,8 +139,8 @@ "name": "Copernicus Land Monitoring Service", "description": "The Copernicus Land Monitoring Service provides geographical information on land cover and its changes, land use, ground motions, vegetation state, water cycle and Earth's surface energy variables to a broad range of users in Europe and across the World in the field of environmental terrestrial applications.", "roles": [ - "host", - "licensor" + "licensor", + "host" ], "url": "https://land.copernicus.eu" } diff --git a/scripts/vabh/test_item.json b/scripts/vabh/test_item.json index 5e98f12..7ce5149 100644 --- a/scripts/vabh/test_item.json +++ b/scripts/vabh/test_item.json @@ -7,6 +7,17 @@ "description": "2012 Wien building height", "start_datetime": "2011-04-25T00:00:00Z", "end_datetime": "2014-03-05T00:00:00Z", + "providers": [ + { + "name": "Copernicus Land Monitoring Service", + "description": "The Copernicus Land Monitoring Service provides geographical information on land cover and its changes, land use, ground motions, vegetation state, water cycle and Earth's surface energy variables to a broad range of users in Europe and across the World in the field of environmental terrestrial applications.", + "roles": [ + "licensor", + "host" + ], + "url": "https://land.copernicus.eu" + } + ], "proj:epsg": 3035, "proj:bbox": [ 4780170, @@ -82,14 +93,6 @@ "data" ] }, - "compressed_dataset": { - "href": "/Users/xiaomanhuang/pl/ETCDI_STAC/uabh_samples/AT001_WIEN_UA2012_DHM_v020.zip", - "type": "application/zip", - "title": "Compressed Building Height Metadata", - "roles": [ - "data" - ] - }, "quality_check_report": { "href": "/Users/xiaomanhuang/pl/ETCDI_STAC/uabh_samples/AT001_WIEN_UA2012_DHM_v020/Doc/AT001_WIEN_UA2012_DHM_QC_Reportv020.pdf", "type": "application/pdf", @@ -105,6 +108,14 @@ "roles": [ "metadata" ] + }, + "compressed_dataset": { + "href": "/Users/xiaomanhuang/pl/ETCDI_STAC/uabh_samples/AT001_WIEN_UA2012_DHM_v020.zip", + "type": "application/zip", + "title": "Compressed Building Height Metadata", + "roles": [ + "data" + ] } }, "bbox": [ diff --git a/scripts/vabh/vabh_collection.py b/scripts/vabh/vabh_collection.py index 4fd2ca2..92b58ee 100644 --- a/scripts/vabh/vabh_collection.py +++ b/scripts/vabh/vabh_collection.py @@ -6,6 +6,8 @@ import pystac import rasterio as rio +from jsonschema import Draft7Validator +from jsonschema.exceptions import best_match from pyproj import Transformer from pystac import Extent, SpatialExtent, TemporalExtent from pystac.extensions.item_assets import AssetDefinition, ItemAssetsExtension @@ -24,13 +26,18 @@ HOST_AND_LICENSOR: Final[pystac.Provider] = pystac.Provider( name="Copernicus Land Monitoring Service", description=( - "The Copernicus Land Monitoring Service provides geographical information on land cover and its changes, land" - " use, ground motions, vegetation state, water cycle and Earth's surface energy variables to a broad range of" - " users in Europe and across the World in the field of environmental terrestrial applications." + "The Copernicus Land Monitoring Service provides " + "geographical information on land cover and its " + "changes, land use, ground motions, vegetation state, " + "water cycle and Earth's surface energy variables to " + "a broad range of users in Europe and across the " + "World in the field of environmental terrestrial " + "applications." ), - roles=[ProviderRole.HOST, ProviderRole.LICENSOR], + roles=[ProviderRole.LICENSOR, ProviderRole.HOST], url="https://land.copernicus.eu", ) + COLLECTION_id = "urban-atlas-building-height" COLLECTION_title = "Urban Atlas Building Height 10m" COLLECTION_description = "Urban Atlas building height over capital cities." @@ -86,9 +93,13 @@ def get_collection_extent(bbox, start_datetime) -> Extent: return Extent(spatial=spatial_extent, temporal=temporal_extent) -# def get_item_assets() - -# def get_links() +def get_stac_validator(product_schema: str) -> Draft7Validator: + with open(product_schema, encoding="utf-8") as f: + schema = json.load(f) + registry = Registry().with_resources( + [("http://example.com/schema.json", Resource.from_contents(schema))], + ) + return Draft7Validator({"$ref": "http://example.com/schema.json"}, registry=registry) if __name__ == "__main__": @@ -158,3 +169,9 @@ def get_collection_extent(bbox, start_datetime) -> Extent: collection.set_self_href("scripts/vabh/test_collection.json") collection.save_object() + + # validate + validator = get_stac_validator("./schema/products/uabh.json") + error_msg = best_match(validator.iter_errors(collection.to_dict())) + if error_msg is not None: + print(error_msg) diff --git a/scripts/vabh/vabh_item.py b/scripts/vabh/vabh_item.py index f017173..6ba4aeb 100644 --- a/scripts/vabh/vabh_item.py +++ b/scripts/vabh/vabh_item.py @@ -7,12 +7,15 @@ import pystac import rasterio as rio +from jsonschema import Draft7Validator +from jsonschema.exceptions import best_match from pyproj import Transformer from pystac.extensions.projection import ProjectionExtension from pystac.link import Link from pystac.provider import ProviderRole from rasterio.coords import BoundingBox from rasterio.crs import CRS +from referencing import Registry, Resource from shapely.geometry import Polygon, box, mapping # KEY = "/Users/xiaomanhuang/pl/ETCDI_STAC/uabh_samples/AT001_WIEN_UA2012_DHM_v020/data/AT001_WIEN_UA2012_DHM_V020.tif" @@ -56,11 +59,15 @@ HOST_AND_LICENSOR: Final[pystac.Provider] = pystac.Provider( name="Copernicus Land Monitoring Service", description=( - "The Copernicus Land Monitoring Service provides geographical information on land cover and its changes, land" - " use, ground motions, vegetation state, water cycle and Earth's surface energy variables to a broad range of" - " users in Europe and across the World in the field of environmental terrestrial applications." + "The Copernicus Land Monitoring Service provides " + "geographical information on land cover and its " + "changes, land use, ground motions, vegetation state, " + "water cycle and Earth's surface energy variables to " + "a broad range of users in Europe and across the " + "World in the field of environmental terrestrial " + "applications." ), - roles=[ProviderRole.HOST, ProviderRole.LICENSOR], + roles=[ProviderRole.LICENSOR, ProviderRole.HOST], url="https://land.copernicus.eu", ) @@ -122,6 +129,15 @@ def get_description(product_id: str) -> str: return f"{year[2:]} {city.title()} building height" +def get_stac_validator(product_schema: str) -> Draft7Validator: + with open(product_schema, encoding="utf-8") as f: + schema = json.load(f) + registry = Registry().with_resources( + [("http://example.com/schema.json", Resource.from_contents(schema))], + ) + return Draft7Validator({"$ref": "http://example.com/schema.json"}, registry=registry) + + if __name__ == "__main__": head, tail = os.path.split(KEY) (product_id,) = tail.split(".")[0].rsplit("_", 0) @@ -138,10 +154,15 @@ def get_description(product_id: str) -> str: datetime=None, start_datetime=start_datetime, end_datetime=end_datetime, - properties={"created": created.strftime("%Y-%m-%dT%H:%M:%SZ"), "description": description}, + properties={ + "created": created.strftime("%Y-%m-%dT%H:%M:%SZ"), + "description": description, + }, collection=COLLECTION_id, ) + item.common_metadata.providers = [HOST_AND_LICENSOR] + # extensions projection = ProjectionExtension.ext(item, add_if_missing=True) projection.epsg = crs.to_epsg() @@ -162,3 +183,9 @@ def get_description(product_id: str) -> str: # item.set_self_href(os.path.join(KEY, f"{tail}.json")) item.set_self_href("scripts/vabh/test_item.json") item.save_object() + + # validate + validator = get_stac_validator("./schema/products/uabh.json") + error_msg = best_match(validator.iter_errors(item.to_dict())) + if error_msg is not None: + print(error_msg) From 295ab935a7f8d8d275f183e896c8c8f5ed1e4bd8 Mon Sep 17 00:00:00 2001 From: Xiaoman Huang Date: Thu, 2 May 2024 15:15:14 +0800 Subject: [PATCH 35/80] Update for correct dataset name --- .DS_Store | Bin 6148 -> 6148 bytes scripts/.DS_Store | Bin 8196 -> 8196 bytes scripts/{vabh => uabh}/test_collection.json | 0 scripts/{vabh => uabh}/test_item.json | 0 .../uabh_collection.py} | 0 .../{vabh/vabh_item.py => uabh/uabh_item.py} | 0 6 files changed, 0 insertions(+), 0 deletions(-) rename scripts/{vabh => uabh}/test_collection.json (100%) rename scripts/{vabh => uabh}/test_item.json (100%) rename scripts/{vabh/vabh_collection.py => uabh/uabh_collection.py} (100%) rename scripts/{vabh/vabh_item.py => uabh/uabh_item.py} (100%) diff --git a/.DS_Store b/.DS_Store index 8af74eb9354ef021e5d8a003bdfc531346696b3e..a73d78f03ed74e4910ec06c8f245e9062651c22e 100644 GIT binary patch delta 71 zcmZoMXffCz$;`NGvJ`Wnrc`yck+G41j)JbCfmyANLbWA`V`64fTg%BIs;qAv6rY`w Zo0s1+`2dqVWAEn6%*HI6**X650|35X6omi) delta 70 zcmZoMXffCz$;`NOvJ`WnhGccMp{0p|j)IYyajlL*wWSe|ZDM9pTg%BIs;qAv6rY`w Zo0s1``2dqVW6$O*%*HI6**O030|2n|6kh-U diff --git a/scripts/.DS_Store b/scripts/.DS_Store index 72531b7b9e3420c5bcd4b61cc67ceac66f3f6a96..ec31351cffbeef958fb7d7f8b147fad119556ad9 100644 GIT binary patch delta 67 zcmV-J0KET%K!iY$V*$Fcah3;>fDN+;2pk9jx|1XjaSa-KdonXJFf1S~F)%ihpb#4Y ZjFY?&9|4WC+z=@Tvj-UV1he}T2Ls_`6+Zv~ delta 91 zcmZp1XmOa3%(!D?;WYM%4ZNG#Ib=8(cT83hC{&lIt~N3>FwjvjGpW^4sJ1i$GEIQ& tT22m8Wqs?Q`0SkAy!@`o`vl|}yEoqx&}84tD)ED5@ Date: Thu, 2 May 2024 09:24:58 +0200 Subject: [PATCH 36/80] Update for full implementation --- create_vpp_items.py | 8 ++------ scripts/vpp/item.py | 2 +- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/create_vpp_items.py b/create_vpp_items.py index d459a1f..19bcb4a 100644 --- a/create_vpp_items.py +++ b/create_vpp_items.py @@ -15,12 +15,11 @@ def main(): validator = get_stac_validator("schema/products/vpp.json") product_list = create_product_list(2017, 2023) - # remove [:1] for full implementation - for product in product_list[:1]: + for product in product_list: page_iterator = create_page_iterator(AWS_SESSION, BUCKET, product) for page in page_iterator: tiles = [prefix["Prefix"] for prefix in page["CommonPrefixes"]] - with ThreadPoolExecutor(max_workers=10) as executor: + with ThreadPoolExecutor(max_workers=100) as executor: list( tqdm( executor.map( @@ -30,9 +29,6 @@ def main(): ) ) - # remove break for full implementation - break - if __name__ == "__main__": main() diff --git a/scripts/vpp/item.py b/scripts/vpp/item.py index 3850e4f..55e0ca2 100644 --- a/scripts/vpp/item.py +++ b/scripts/vpp/item.py @@ -47,7 +47,7 @@ def create_product_list(start_year: int, end_year: int) -> list[str]: def create_page_iterator(aws_session: boto3.Session, bucket: str, prefix: str) -> PageIterator: client = aws_session.client("s3") paginator = client.get_paginator("list_objects_v2") - return paginator.paginate(Bucket=bucket, Prefix=prefix, Delimiter="-", MaxKeys=10) + return paginator.paginate(Bucket=bucket, Prefix=prefix, Delimiter="-") def read_metadata_from_s3(bucket: str, key: str, aws_session: boto3.Session) -> tuple[BoundingBox, CRS, int, int]: From a32981d0611725d55ec9c767c42a5ba81064e4d9 Mon Sep 17 00:00:00 2001 From: Xiaoman Huang Date: Thu, 2 May 2024 15:25:26 +0800 Subject: [PATCH 37/80] Fix imports --- scripts/uabh/uabh_collection.py | 2 + scripts/uabh/uabh_item.py | 1 + scripts/vabh/test_collection.json | 153 ++++++++++++++++++++++++++++++ scripts/vabh/test_item.json | 131 +++++++++++++++++++++++++ 4 files changed, 287 insertions(+) create mode 100644 scripts/vabh/test_collection.json create mode 100644 scripts/vabh/test_item.json diff --git a/scripts/uabh/uabh_collection.py b/scripts/uabh/uabh_collection.py index 92b58ee..b5f53e1 100644 --- a/scripts/uabh/uabh_collection.py +++ b/scripts/uabh/uabh_collection.py @@ -1,5 +1,6 @@ from __future__ import annotations +import json import os from datetime import datetime from typing import Final @@ -15,6 +16,7 @@ from pystac.provider import ProviderRole from rasterio.coords import BoundingBox from rasterio.crs import CRS +from referencing import Registry, Resource from shapely.geometry import Polygon, box # KEY = "/Users/xiaomanhuang/pl/ETCDI_STAC/uabh_samples/AT001_WIEN_UA2012_DHM_v020/data/AT001_WIEN_UA2012_DHM_V020.tif" diff --git a/scripts/uabh/uabh_item.py b/scripts/uabh/uabh_item.py index 6ba4aeb..f680f0d 100644 --- a/scripts/uabh/uabh_item.py +++ b/scripts/uabh/uabh_item.py @@ -1,5 +1,6 @@ from __future__ import annotations +import json import os import xml.etree.ElementTree as ET from datetime import datetime diff --git a/scripts/vabh/test_collection.json b/scripts/vabh/test_collection.json new file mode 100644 index 0000000..3832710 --- /dev/null +++ b/scripts/vabh/test_collection.json @@ -0,0 +1,153 @@ +{ + "type": "Collection", + "id": "urban-atlas-building-height", + "stac_version": "1.0.0", + "description": "Urban Atlas building height over capital cities.", + "links": [ + { + "rel": "root", + "href": "/Users/xiaomanhuang/pl/ETCDI_STAC/clms-stac-1/scripts/vabh/test_collection.json", + "type": "application/json", + "title": "Urban Atlas Building Height 10m" + }, + { + "rel": "license", + "href": "https://land.copernicus.eu/en/data-policy", + "title": "Legal notice on the use of CLMS data" + }, + { + "rel": "root", + "href": "/Users/xiaomanhuang/pl/ETCDI_STAC/clms-stac-1/stacs/clms_catalog.json", + "title": "CLMS Catalog" + }, + { + "rel": "parent", + "href": "/Users/xiaomanhuang/pl/ETCDI_STAC/clms-stac-1/stacs/clms_catalog.json", + "title": "CLMS Catalog" + }, + { + "rel": "self", + "href": "/Users/xiaomanhuang/pl/ETCDI_STAC/clms-stac-1/scripts/vabh/test_collection.json", + "type": "application/json" + } + ], + "stac_extensions": [ + "https://stac-extensions.github.io/item-assets/v1.0.0/schema.json", + "https://stac-extensions.github.io/projection/v1.1.0/schema.json" + ], + "item_assets": { + "dataset": { + "title": "Building height raster", + "media_type": "image/tiff; application=geotiff", + "roles": [ + "data" + ] + }, + "quality_check_report": { + "title": "Quality check report", + "media_type": "application/pdf", + "roles": [ + "metadata" + ] + }, + "metadata": { + "title": "Metadata", + "media_type": "application/xml", + "roles": [ + "metadata" + ] + }, + "quality_control_report": { + "title": "Quality control report", + "media_type": "application/pdf", + "roles": [ + "metadata" + ] + }, + "pixel_based_info_shp": { + "title": "Pixel based info shape format", + "media_type": "application/octet-stream", + "roles": [ + "metadata" + ] + }, + "pixel_based_info_shx": { + "title": "Pixel based info shape index", + "media_type": "application/octet-stream", + "roles": [ + "metadata" + ] + }, + "pixel_based_info_dbf": { + "title": "Pixel based info attribute", + "media_type": "application/x-dbf", + "roles": [ + "metadata" + ] + }, + "pixel_based_info_prj": { + "title": "Pixel based info projection description", + "media_type": "text/plain", + "roles": [ + "metadata" + ] + }, + "pixel_based_info_cpg": { + "title": "Pixel based info character encoding", + "media_type": "text/plain", + "roles": [ + "metadata" + ] + }, + "compressed_dataset": { + "title": "Compressed building height raster", + "media_type": "application/zip", + "roles": [ + "data" + ] + } + }, + "title": "Urban Atlas Building Height 10m", + "extent": { + "spatial": { + "bbox": [ + [ + -21.210399013454868, + 62.99044383484405, + -20.96981298030872, + 63.339366607232876 + ] + ] + }, + "temporal": { + "interval": [ + [ + "2012-01-01T00:00:00Z", + null + ] + ] + } + }, + "license": "proprietary", + "keywords": [ + "Buildings", + "Building height", + "Elevation" + ], + "providers": [ + { + "name": "Copernicus Land Monitoring Service", + "description": "The Copernicus Land Monitoring Service provides geographical information on land cover and its changes, land use, ground motions, vegetation state, water cycle and Earth's surface energy variables to a broad range of users in Europe and across the World in the field of environmental terrestrial applications.", + "roles": [ + "licensor", + "host" + ], + "url": "https://land.copernicus.eu" + } + ], + "summaries": { + "proj:epsg": [ + 3035 + ] + } +} \ No newline at end of file diff --git a/scripts/vabh/test_item.json b/scripts/vabh/test_item.json new file mode 100644 index 0000000..7ce5149 --- /dev/null +++ b/scripts/vabh/test_item.json @@ -0,0 +1,131 @@ +{ + "type": "Feature", + "stac_version": "1.0.0", + "id": "AT001_WIEN_UA2012_DHM_v020", + "properties": { + "created": "2017-12-29T00:00:00Z", + "description": "2012 Wien building height", + "start_datetime": "2011-04-25T00:00:00Z", + "end_datetime": "2014-03-05T00:00:00Z", + "providers": [ + { + "name": "Copernicus Land Monitoring Service", + "description": "The Copernicus Land Monitoring Service provides geographical information on land cover and its changes, land use, ground motions, vegetation state, water cycle and Earth's surface energy variables to a broad range of users in Europe and across the World in the field of environmental terrestrial applications.", + "roles": [ + "licensor", + "host" + ], + "url": "https://land.copernicus.eu" + } + ], + "proj:epsg": 3035, + "proj:bbox": [ + 4780170, + 2793000, + 4809960, + 2821690 + ], + "proj:shape": [ + 2869, + 2979 + ], + "datetime": null + }, + "geometry": { + "type": "Polygon", + "coordinates": [ + [ + [ + -20.96981298030872, + 62.99044383484405 + ], + [ + -20.96981298030872, + 63.339366607232876 + ], + [ + -21.210399013454868, + 63.339366607232876 + ], + [ + -21.210399013454868, + 62.99044383484405 + ], + [ + -20.96981298030872, + 62.99044383484405 + ] + ] + ] + }, + "links": [ + { + "rel": "license", + "href": "https://land.copernicus.eu/en/data-policy" + }, + { + "rel": "root", + "href": "/Users/xiaomanhuang/pl/ETCDI_STAC/clms-stac-1/stacs/clms_catalog.json", + "title": "CLMS Catalog" + }, + { + "rel": "parent", + "href": "/Users/xiaomanhuang/pl/ETCDI_STAC/clms-stac-1/stacs/urban-atlas-building-height/urban-atlas-building-height.json", + "title": "Urban Atlas Building Height 10m" + }, + { + "rel": "collection", + "href": "/Users/xiaomanhuang/pl/ETCDI_STAC/clms-stac-1/stacs/urban-atlas-building-height/urban-atlas-building-height.json", + "title": "Urban Atlas Building Height 10m" + }, + { + "rel": "self", + "href": "/Users/xiaomanhuang/pl/ETCDI_STAC/clms-stac-1/scripts/vabh/test_item.json", + "type": "application/json" + } + ], + "assets": { + "dataset": { + "href": "/Users/xiaomanhuang/pl/ETCDI_STAC/uabh_samples/AT001_WIEN_UA2012_DHM_v020/Dataset/AT001_WIEN_UA2012_DHM_v020.tif", + "type": "image/tiff; application=geotiff", + "title": "Building Height Dataset", + "roles": [ + "data" + ] + }, + "quality_check_report": { + "href": "/Users/xiaomanhuang/pl/ETCDI_STAC/uabh_samples/AT001_WIEN_UA2012_DHM_v020/Doc/AT001_WIEN_UA2012_DHM_QC_Reportv020.pdf", + "type": "application/pdf", + "title": "Quality Check Report", + "roles": [ + "metadata" + ] + }, + "metadata": { + "href": "/Users/xiaomanhuang/pl/ETCDI_STAC/uabh_samples/AT001_WIEN_UA2012_DHM_v020/Metadata/AT001_WIEN_UA2012_DHM_metadata_v020.xml", + "type": "application/xml", + "title": "Building Height Dataset Metadata", + "roles": [ + "metadata" + ] + }, + "compressed_dataset": { + "href": "/Users/xiaomanhuang/pl/ETCDI_STAC/uabh_samples/AT001_WIEN_UA2012_DHM_v020.zip", + "type": "application/zip", + "title": "Compressed Building Height Metadata", + "roles": [ + "data" + ] + } + }, + "bbox": [ + -21.210399013454868, + 62.99044383484405, + -20.96981298030872, + 63.339366607232876 + ], + "stac_extensions": [ + "https://stac-extensions.github.io/projection/v1.1.0/schema.json" + ], + "collection": "urban-atlas-building-height" +} \ No newline at end of file From 2fdbd3dbfa9d731d2212ad4f914492caa69899f6 Mon Sep 17 00:00:00 2001 From: Xiaoman Huang Date: Thu, 2 May 2024 15:33:22 +0800 Subject: [PATCH 38/80] clean up --- scripts/uabh/uabh_collection.py | 1 - scripts/uabh/uabh_item.py | 1 - 2 files changed, 2 deletions(-) diff --git a/scripts/uabh/uabh_collection.py b/scripts/uabh/uabh_collection.py index b5f53e1..89b6409 100644 --- a/scripts/uabh/uabh_collection.py +++ b/scripts/uabh/uabh_collection.py @@ -19,7 +19,6 @@ from referencing import Registry, Resource from shapely.geometry import Polygon, box -# KEY = "/Users/xiaomanhuang/pl/ETCDI_STAC/uabh_samples/AT001_WIEN_UA2012_DHM_v020/data/AT001_WIEN_UA2012_DHM_V020.tif" KEY = "/Users/xiaomanhuang/pl/ETCDI_STAC/uabh_samples/AT001_WIEN_UA2012_DHM_v020" head, tail = os.path.split(KEY) (product_id, product_version) = tail.rsplit("_", 1) diff --git a/scripts/uabh/uabh_item.py b/scripts/uabh/uabh_item.py index f680f0d..ed4a0b7 100644 --- a/scripts/uabh/uabh_item.py +++ b/scripts/uabh/uabh_item.py @@ -19,7 +19,6 @@ from referencing import Registry, Resource from shapely.geometry import Polygon, box, mapping -# KEY = "/Users/xiaomanhuang/pl/ETCDI_STAC/uabh_samples/AT001_WIEN_UA2012_DHM_v020/data/AT001_WIEN_UA2012_DHM_V020.tif" KEY = "/Users/xiaomanhuang/pl/ETCDI_STAC/uabh_samples/AT001_WIEN_UA2012_DHM_v020" head, tail = os.path.split(KEY) (product_id, product_version) = tail.rsplit("_", 1) From 06bee66523148075cb09ae1f3b4fba3cd60e8dbd Mon Sep 17 00:00:00 2001 From: Xiaoman Huang Date: Thu, 2 May 2024 15:35:47 +0800 Subject: [PATCH 39/80] remove old branch --- scripts/vabh/test_collection.json | 153 ------------------------------ scripts/vabh/test_item.json | 131 ------------------------- 2 files changed, 284 deletions(-) delete mode 100644 scripts/vabh/test_collection.json delete mode 100644 scripts/vabh/test_item.json diff --git a/scripts/vabh/test_collection.json b/scripts/vabh/test_collection.json deleted file mode 100644 index 3832710..0000000 --- a/scripts/vabh/test_collection.json +++ /dev/null @@ -1,153 +0,0 @@ -{ - "type": "Collection", - "id": "urban-atlas-building-height", - "stac_version": "1.0.0", - "description": "Urban Atlas building height over capital cities.", - "links": [ - { - "rel": "root", - "href": "/Users/xiaomanhuang/pl/ETCDI_STAC/clms-stac-1/scripts/vabh/test_collection.json", - "type": "application/json", - "title": "Urban Atlas Building Height 10m" - }, - { - "rel": "license", - "href": "https://land.copernicus.eu/en/data-policy", - "title": "Legal notice on the use of CLMS data" - }, - { - "rel": "root", - "href": "/Users/xiaomanhuang/pl/ETCDI_STAC/clms-stac-1/stacs/clms_catalog.json", - "title": "CLMS Catalog" - }, - { - "rel": "parent", - "href": "/Users/xiaomanhuang/pl/ETCDI_STAC/clms-stac-1/stacs/clms_catalog.json", - "title": "CLMS Catalog" - }, - { - "rel": "self", - "href": "/Users/xiaomanhuang/pl/ETCDI_STAC/clms-stac-1/scripts/vabh/test_collection.json", - "type": "application/json" - } - ], - "stac_extensions": [ - "https://stac-extensions.github.io/item-assets/v1.0.0/schema.json", - "https://stac-extensions.github.io/projection/v1.1.0/schema.json" - ], - "item_assets": { - "dataset": { - "title": "Building height raster", - "media_type": "image/tiff; application=geotiff", - "roles": [ - "data" - ] - }, - "quality_check_report": { - "title": "Quality check report", - "media_type": "application/pdf", - "roles": [ - "metadata" - ] - }, - "metadata": { - "title": "Metadata", - "media_type": "application/xml", - "roles": [ - "metadata" - ] - }, - "quality_control_report": { - "title": "Quality control report", - "media_type": "application/pdf", - "roles": [ - "metadata" - ] - }, - "pixel_based_info_shp": { - "title": "Pixel based info shape format", - "media_type": "application/octet-stream", - "roles": [ - "metadata" - ] - }, - "pixel_based_info_shx": { - "title": "Pixel based info shape index", - "media_type": "application/octet-stream", - "roles": [ - "metadata" - ] - }, - "pixel_based_info_dbf": { - "title": "Pixel based info attribute", - "media_type": "application/x-dbf", - "roles": [ - "metadata" - ] - }, - "pixel_based_info_prj": { - "title": "Pixel based info projection description", - "media_type": "text/plain", - "roles": [ - "metadata" - ] - }, - "pixel_based_info_cpg": { - "title": "Pixel based info character encoding", - "media_type": "text/plain", - "roles": [ - "metadata" - ] - }, - "compressed_dataset": { - "title": "Compressed building height raster", - "media_type": "application/zip", - "roles": [ - "data" - ] - } - }, - "title": "Urban Atlas Building Height 10m", - "extent": { - "spatial": { - "bbox": [ - [ - -21.210399013454868, - 62.99044383484405, - -20.96981298030872, - 63.339366607232876 - ] - ] - }, - "temporal": { - "interval": [ - [ - "2012-01-01T00:00:00Z", - null - ] - ] - } - }, - "license": "proprietary", - "keywords": [ - "Buildings", - "Building height", - "Elevation" - ], - "providers": [ - { - "name": "Copernicus Land Monitoring Service", - "description": "The Copernicus Land Monitoring Service provides geographical information on land cover and its changes, land use, ground motions, vegetation state, water cycle and Earth's surface energy variables to a broad range of users in Europe and across the World in the field of environmental terrestrial applications.", - "roles": [ - "licensor", - "host" - ], - "url": "https://land.copernicus.eu" - } - ], - "summaries": { - "proj:epsg": [ - 3035 - ] - } -} \ No newline at end of file diff --git a/scripts/vabh/test_item.json b/scripts/vabh/test_item.json deleted file mode 100644 index 7ce5149..0000000 --- a/scripts/vabh/test_item.json +++ /dev/null @@ -1,131 +0,0 @@ -{ - "type": "Feature", - "stac_version": "1.0.0", - "id": "AT001_WIEN_UA2012_DHM_v020", - "properties": { - "created": "2017-12-29T00:00:00Z", - "description": "2012 Wien building height", - "start_datetime": "2011-04-25T00:00:00Z", - "end_datetime": "2014-03-05T00:00:00Z", - "providers": [ - { - "name": "Copernicus Land Monitoring Service", - "description": "The Copernicus Land Monitoring Service provides geographical information on land cover and its changes, land use, ground motions, vegetation state, water cycle and Earth's surface energy variables to a broad range of users in Europe and across the World in the field of environmental terrestrial applications.", - "roles": [ - "licensor", - "host" - ], - "url": "https://land.copernicus.eu" - } - ], - "proj:epsg": 3035, - "proj:bbox": [ - 4780170, - 2793000, - 4809960, - 2821690 - ], - "proj:shape": [ - 2869, - 2979 - ], - "datetime": null - }, - "geometry": { - "type": "Polygon", - "coordinates": [ - [ - [ - -20.96981298030872, - 62.99044383484405 - ], - [ - -20.96981298030872, - 63.339366607232876 - ], - [ - -21.210399013454868, - 63.339366607232876 - ], - [ - -21.210399013454868, - 62.99044383484405 - ], - [ - -20.96981298030872, - 62.99044383484405 - ] - ] - ] - }, - "links": [ - { - "rel": "license", - "href": "https://land.copernicus.eu/en/data-policy" - }, - { - "rel": "root", - "href": "/Users/xiaomanhuang/pl/ETCDI_STAC/clms-stac-1/stacs/clms_catalog.json", - "title": "CLMS Catalog" - }, - { - "rel": "parent", - "href": "/Users/xiaomanhuang/pl/ETCDI_STAC/clms-stac-1/stacs/urban-atlas-building-height/urban-atlas-building-height.json", - "title": "Urban Atlas Building Height 10m" - }, - { - "rel": "collection", - "href": "/Users/xiaomanhuang/pl/ETCDI_STAC/clms-stac-1/stacs/urban-atlas-building-height/urban-atlas-building-height.json", - "title": "Urban Atlas Building Height 10m" - }, - { - "rel": "self", - "href": "/Users/xiaomanhuang/pl/ETCDI_STAC/clms-stac-1/scripts/vabh/test_item.json", - "type": "application/json" - } - ], - "assets": { - "dataset": { - "href": "/Users/xiaomanhuang/pl/ETCDI_STAC/uabh_samples/AT001_WIEN_UA2012_DHM_v020/Dataset/AT001_WIEN_UA2012_DHM_v020.tif", - "type": "image/tiff; application=geotiff", - "title": "Building Height Dataset", - "roles": [ - "data" - ] - }, - "quality_check_report": { - "href": "/Users/xiaomanhuang/pl/ETCDI_STAC/uabh_samples/AT001_WIEN_UA2012_DHM_v020/Doc/AT001_WIEN_UA2012_DHM_QC_Reportv020.pdf", - "type": "application/pdf", - "title": "Quality Check Report", - "roles": [ - "metadata" - ] - }, - "metadata": { - "href": "/Users/xiaomanhuang/pl/ETCDI_STAC/uabh_samples/AT001_WIEN_UA2012_DHM_v020/Metadata/AT001_WIEN_UA2012_DHM_metadata_v020.xml", - "type": "application/xml", - "title": "Building Height Dataset Metadata", - "roles": [ - "metadata" - ] - }, - "compressed_dataset": { - "href": "/Users/xiaomanhuang/pl/ETCDI_STAC/uabh_samples/AT001_WIEN_UA2012_DHM_v020.zip", - "type": "application/zip", - "title": "Compressed Building Height Metadata", - "roles": [ - "data" - ] - } - }, - "bbox": [ - -21.210399013454868, - 62.99044383484405, - -20.96981298030872, - 63.339366607232876 - ], - "stac_extensions": [ - "https://stac-extensions.github.io/projection/v1.1.0/schema.json" - ], - "collection": "urban-atlas-building-height" -} \ No newline at end of file From b4e45407a191d52aa736529b252d7d74f04adf2d Mon Sep 17 00:00:00 2001 From: Sebastian Boeck Date: Thu, 2 May 2024 11:59:33 +0200 Subject: [PATCH 40/80] renames license constant and adds wd --- scripts/clc/constants.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/scripts/clc/constants.py b/scripts/clc/constants.py index 4f4b56b..4d2d091 100644 --- a/scripts/clc/constants.py +++ b/scripts/clc/constants.py @@ -5,7 +5,7 @@ from pystac.provider import ProviderRole - +WORKING_DIR = os.getcwd() STAC_DIR = 'stac_tests' # Collection @@ -53,6 +53,9 @@ } # Items + +CLMS_LICENSE = pystac.link.Link(rel='LICENSE', target="https://land.copernicus.eu/en/data-policy") + DOM_DICT = { 'GLP': 'Guadeloupe', 'GUF': 'French Guyana', @@ -141,3 +144,17 @@ 'mapping guides can be found at https://land.copernicus.eu/user-corner/technical-library/. ' 'CLC class descriptions can be found at ' 'https://land.copernicus.eu/user-corner/technical-library/corine-land-cover-nomenclature-guidelines/html/.') + + + +# CLMS_CATALOG_LINK = pystac.link.Link( +# rel=pystac.RelType.ROOT, target=pystac.STACObject.from_file(os.path.join(WORKING_DIR, "stacs/clms_catalog.json")) +# ) +# COLLECTION_LINK = pystac.link.Link( +# rel=pystac.RelType.COLLECTION, +# target=pystac.STACObject.from_file(os.path.join(WORKING_DIR, f"stacs/{COLLECTION_ID}/{COLLECTION_ID}.json")), +# ) +# ITEM_PARENT_LINK = pystac.link.Link( +# rel=pystac.RelType.PARENT, +# target=pystac.STACObject.from_file(os.path.join(WORKING_DIR, f"stacs/{COLLECTION_ID}/{COLLECTION_ID}.json")), +# ) \ No newline at end of file From 90e39febf59675e657f477bb4d6f008e2d0c662c Mon Sep 17 00:00:00 2001 From: Sebastian Boeck Date: Thu, 2 May 2024 12:00:38 +0200 Subject: [PATCH 41/80] perliminary constant import --- scripts/clc/item.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/scripts/clc/item.py b/scripts/clc/item.py index b64bb8b..d60d6d9 100644 --- a/scripts/clc/item.py +++ b/scripts/clc/item.py @@ -14,6 +14,18 @@ import rasterio as rio +from constants import * + +# from .constants import ( +# DOM_DICT, +# TITLE_DICT, +# MEDIA_TYPE_DICT, +# ROLES_DICT, +# ITEM_DESCRIPTION, +# CLC_PROVIDER, +# ITEM_LICENSE +# ) + def deconstruct_clc_name(filename: str): p = re.compile('^(?P[A-Z0-9a-z_]*).(?P.*)$') m = p.search(os.path.basename(filename)) @@ -120,7 +132,8 @@ def create_item(img_path, root): transform=[_ for _ in img.transform] + [0.0, 0.0, 1.0], ) - license = pystac.link.Link(rel='LICENSE', target="https://land.copernicus.eu/en/data-policy") - item.add_link(license) + + item.add_link(CLMS_LICENSE) return(item) + From e49f3c47ffee7f99ce7b941fb103e185e3048db6 Mon Sep 17 00:00:00 2001 From: Sebastian Boeck Date: Thu, 2 May 2024 13:21:00 +0200 Subject: [PATCH 42/80] working prototype --- scripts/clc/clms_collection_generator.ipynb | 1272 ++----------------- 1 file changed, 122 insertions(+), 1150 deletions(-) diff --git a/scripts/clc/clms_collection_generator.ipynb b/scripts/clc/clms_collection_generator.ipynb index 6de915d..5cd107e 100644 --- a/scripts/clc/clms_collection_generator.ipynb +++ b/scripts/clc/clms_collection_generator.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -15,47 +15,34 @@ "from pystac.provider import ProviderRole\n", "from pystac.extensions.projection import ProjectionExtension\n", "\n", - "from datetime import datetime, UTC" + "from pystac.extensions.item_assets import ItemAssetsExtension, AssetDefinition\n", + "\n", + "from datetime import datetime, UTC\n", + "\n", + "import rasterio.warp\n" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ - "COLLITAS_TITLE_DICT = {\n", - " 'clc_map': 'Corine Land Cover Map',\n", - " 'clc_map_statistics': 'Corine Land Cover Map Statistics',\n", - " 'clc_map_pyramid': 'Pyramid',\n", - " 'clc_map_encoding': 'Encoding',\n", - " 'clc_map_database': 'Database',\n", - " 'clc_map_database_metadata': 'Database Metadata',\n", - " 'clc_map_tif_metadata': 'TIFF Metadata',\n", - " 'clc_map_metadata': 'Corine Land Cover Map Metadata',\n", - "}\n", - "\n", - "COLLITAS_MEDIA_TYPE_DICT = {\n", - " 'clc_map': pystac.MediaType.COG,\n", - " 'clc_map_statistics': pystac.MediaType.XML,\n", - " 'clc_map_pyramid': 'image/tiff; application=geotiff; profile=pyramid',\n", - " 'clc_map_encoding': pystac.MediaType.TEXT,\n", - " 'clc_map_database': 'application/dbf',\n", - " 'clc_map_database_metadata': pystac.MediaType.TEXT,\n", - " 'clc_map_tif_metadata': 'image/tiff; application=geotiff; profile=layer',\n", - " 'clc_map_metadata': pystac.MediaType.XML,\n", - "}\n", - "\n", - "COLLITAS_ROLES_DICT = {\n", - " 'clc_map': ['data'],\n", - " 'clc_map_statistics': ['metadata'],\n", - " 'clc_map_pyramid': ['metadata'],\n", - " 'clc_map_encoding': ['metadata'],\n", - " 'clc_map_database': ['metadata'],\n", - " 'clc_map_database_metadata': ['metadata'],\n", - " 'clc_map_tif_metadata': ['metadata'],\n", - " 'clc_map_metadata': ['metadata'],\n", - "}" + "from constants import *\n", + "from item import create_item, get_img_paths" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "def proj_epsg_from_item_asset(item):\n", + " for asset_key in item.assets:\n", + " asset = item.assets[asset_key].to_dict()\n", + " if 'proj:epsg' in asset.keys():\n", + " return(asset.get('proj:epsg'))" ] }, { @@ -64,17 +51,17 @@ "metadata": {}, "outputs": [], "source": [ - "WORKING_DIR = os.getcwd()\n", - "STAC_DIR = 'stac_tests'\n", - "COLLECTION_ID = 'corine-land-cover-raster'\n", - "COLLECTION_TITLE = 'CORINE Land Cover Raster'\n", - "COLLECTION_DESCRIPTION = (\"The European Commission launched the CORINE (Coordination of Information on the Environment) \"\n", - " \"program in an effort to develop a standardized methodology for producing continent-scale land \"\n", - " \"cover, biotope, and air quality maps. The CORINE Land Cover (CLC) product offers a pan-European \"\n", - " \"land cover and land use inventory with 44 thematic classes, ranging from broad forested areas \"\n", - " \"to individual vineyards.\")\n", - "COLLECTION_KEYWORDS = [\"clms\", \"corine\", \"derived data\", \"land cover\", \"machine learning\", \"open data\"]\n", - "COLLECTION_LICENSE = 'proprietary'\n", + "# WORKING_DIR = os.getcwd()\n", + "# STAC_DIR = 'stac_tests'\n", + "# COLLECTION_ID = 'corine-land-cover-raster'\n", + "# COLLECTION_TITLE = 'CORINE Land Cover Raster'\n", + "# COLLECTION_DESCRIPTION = (\"The European Commission launched the CORINE (Coordination of Information on the Environment) \"\n", + "# \"program in an effort to develop a standardized methodology for producing continent-scale land \"\n", + "# \"cover, biotope, and air quality maps. The CORINE Land Cover (CLC) product offers a pan-European \"\n", + "# \"land cover and land use inventory with 44 thematic classes, ranging from broad forested areas \"\n", + "# \"to individual vineyards.\")\n", + "# COLLECTION_KEYWORDS = [\"clms\", \"corine\", \"derived data\", \"land cover\", \"machine learning\", \"open data\"]\n", + "# COLLECTION_LICENSE = 'proprietary'\n", "\n", "\n", "# CLMS_CATALOG_LINK = pystac.link.Link(\n", @@ -96,6 +83,17 @@ "metadata": {}, "outputs": [], "source": [ + "\n", + "data_root = '../CLC_100m'" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "\n", "\n", "sp_extent = pystac.SpatialExtent([None, None, None, None])\n", "tmp_extent = pystac.TemporalExtent([datetime(1990, 1, 1, microsecond=0, tzinfo=UTC), None])\n", @@ -106,1134 +104,108 @@ " title=COLLECTION_TITLE,\n", " extent=extent,\n", " keywords=COLLECTION_KEYWORDS,\n", - " license=COLLECTION_LICENSE\n", - " )" + " license=COLLECTION_LICENSE,\n", + " stac_extensions=[]\n", + " )\n", + "\n", + "\n", + "item_assets = ItemAssetsExtension.ext(collection, add_if_missing=True)\n", + "item_assets.item_assets = {\n", + " key: AssetDefinition({\"title\": COLLITAS_TITLE_DICT[key].format(label='').strip(),\n", + " \"media_type\": COLLITAS_MEDIA_TYPE_DICT[key], \n", + " \"roles\": COLLITAS_ROLES_DICT[key]})\n", + " for key in COLLITAS_TITLE_DICT\n", + "}" ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 7, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - "\n", - "\n", - "
                      \n", - "
                      \n", - "
                        \n", - " \n", - " \n", - " \n", - "
                      • \n", - " type\n", - " \"Collection\"\n", - "
                      • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
                      • \n", - " id\n", - " \"corine-land-cover-raster\"\n", - "
                      • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
                      • \n", - " stac_version\n", - " \"1.0.0\"\n", - "
                      • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
                      • \n", - " description\n", - " \"The European Commission launched the CORINE (Coordination of Information on the Environment) program in an effort to develop a standardized methodology for producing continent-scale land cover, biotope, and air quality maps. The CORINE Land Cover (CLC) product offers a pan-European land cover and land use inventory with 44 thematic classes, ranging from broad forested areas to individual vineyards.\"\n", - "
                      • \n", - " \n", - " \n", - " \n", - " \n", - "
                      • \n", - " links[] 0 items\n", - " \n", - "
                      • \n", - " \n", - " \n", - " \n", - " \n", - "
                      • \n", - " title\n", - " \"CORINE Land Cover Raster\"\n", - "
                      • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
                      • \n", - " extent\n", - "
                          \n", - " \n", - " \n", - " \n", - "
                        • \n", - " spatial\n", - "
                            \n", - " \n", - " \n", - "
                          • \n", - " bbox[] 4 items\n", - " \n", - "
                              \n", - " \n", - " \n", - " \n", - "
                            • \n", - " 0\n", - " None\n", - "
                            • \n", - " \n", - " \n", - " \n", - "
                            \n", - " \n", - "
                              \n", - " \n", - " \n", - " \n", - "
                            • \n", - " 1\n", - " None\n", - "
                            • \n", - " \n", - " \n", - " \n", - "
                            \n", - " \n", - "
                              \n", - " \n", - " \n", - " \n", - "
                            • \n", - " 2\n", - " None\n", - "
                            • \n", - " \n", - " \n", - " \n", - "
                            \n", - " \n", - "
                              \n", - " \n", - " \n", - " \n", - "
                            • \n", - " 3\n", - " None\n", - "
                            • \n", - " \n", - " \n", - " \n", - "
                            \n", - " \n", - "
                          • \n", - " \n", - " \n", - "
                          \n", - "
                        • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
                        • \n", - " temporal\n", - "
                            \n", - " \n", - " \n", - "
                          • \n", - " interval[] 1 items\n", - " \n", - "
                              \n", - " \n", - " \n", - "
                            • \n", - " 0[] 2 items\n", - " \n", - "
                                \n", - " \n", - " \n", - " \n", - "
                              • \n", - " 0\n", - " \"1990-01-01T00:00:00Z\"\n", - "
                              • \n", - " \n", - " \n", - " \n", - "
                              \n", - " \n", - "
                                \n", - " \n", - " \n", - " \n", - "
                              • \n", - " 1\n", - " None\n", - "
                              • \n", - " \n", - " \n", - " \n", - "
                              \n", - " \n", - "
                            • \n", - " \n", - " \n", - "
                            \n", - " \n", - "
                          • \n", - " \n", - " \n", - "
                          \n", - "
                        • \n", - " \n", - " \n", - " \n", - "
                        \n", - "
                      • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
                      • \n", - " license\n", - " \"proprietary\"\n", - "
                      • \n", - " \n", - " \n", - " \n", - " \n", - "
                      • \n", - " keywords[] 6 items\n", - " \n", - "
                          \n", - " \n", - " \n", - " \n", - "
                        • \n", - " 0\n", - " \"clms\"\n", - "
                        • \n", - " \n", - " \n", - " \n", - "
                        \n", - " \n", - "
                          \n", - " \n", - " \n", - " \n", - "
                        • \n", - " 1\n", - " \"corine\"\n", - "
                        • \n", - " \n", - " \n", - " \n", - "
                        \n", - " \n", - "
                          \n", - " \n", - " \n", - " \n", - "
                        • \n", - " 2\n", - " \"derived data\"\n", - "
                        • \n", - " \n", - " \n", - " \n", - "
                        \n", - " \n", - "
                          \n", - " \n", - " \n", - " \n", - "
                        • \n", - " 3\n", - " \"land cover\"\n", - "
                        • \n", - " \n", - " \n", - " \n", - "
                        \n", - " \n", - "
                          \n", - " \n", - " \n", - " \n", - "
                        • \n", - " 4\n", - " \"machine learning\"\n", - "
                        • \n", - " \n", - " \n", - " \n", - "
                        \n", - " \n", - "
                          \n", - " \n", - " \n", - " \n", - "
                        • \n", - " 5\n", - " \"open data\"\n", - "
                        • \n", - " \n", - " \n", - " \n", - "
                        \n", - " \n", - "
                      • \n", - " \n", - " \n", - "
                      \n", - "
                      \n", - "
                      " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "collection" + "collection.add_link(CLMS_LICENSE)\n" ] }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ - "from pystac.extensions.item_assets import ItemAssetsExtension, AssetDefinition\n", + "collection.set_self_href(os.path.join(WORKING_DIR, f\"{STAC_DIR}/{collection.id}/{collection.id}.json\"))\n", + "catalog = pystac.read_file(f\"{WORKING_DIR}/{STAC_DIR}/clms_catalog.json\")\n", "\n", - "item_assets = ItemAssetsExtension.ext(collection, add_if_missing=True)\n", - "item_assets.item_assets = {\n", - " key: AssetDefinition({\"title\": COLLITAS_TITLE_DICT[key].format(label='').strip(),\n", - " \"media_type\": COLLITAS_MEDIA_TYPE_DICT[key], \n", - " \"roles\": COLLITAS_ROLES_DICT[key]})\n", - " for key in COLLITAS_TITLE_DICT\n", - "}" + "collection.set_root(catalog)\n", + "collection.set_parent(catalog)\n", + "\n", + "collection.save_object()\n", + "\n" ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "img_paths = get_img_paths(path=data_root)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, "metadata": {}, "outputs": [ { - "data": { - "text/html": [ - "\n", - "\n", - "\n", - "
                      \n", - "
                      \n", - "
                        \n", - " \n", - " \n", - " \n", - "
                      • \n", - " type\n", - " \"Collection\"\n", - "
                      • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
                      • \n", - " id\n", - " \"corine-land-cover-raster\"\n", - "
                      • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
                      • \n", - " stac_version\n", - " \"1.0.0\"\n", - "
                      • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
                      • \n", - " description\n", - " \"The European Commission launched the CORINE (Coordination of Information on the Environment) program in an effort to develop a standardized methodology for producing continent-scale land cover, biotope, and air quality maps. The CORINE Land Cover (CLC) product offers a pan-European land cover and land use inventory with 44 thematic classes, ranging from broad forested areas to individual vineyards.\"\n", - "
                      • \n", - " \n", - " \n", - " \n", - " \n", - "
                      • \n", - " links[] 0 items\n", - " \n", - "
                      • \n", - " \n", - " \n", - " \n", - "
                      • \n", - " stac_extensions[] 1 items\n", - " \n", - "
                          \n", - " \n", - " \n", - " \n", - "
                        • \n", - " 0\n", - " \"https://stac-extensions.github.io/item-assets/v1.0.0/schema.json\"\n", - "
                        • \n", - " \n", - " \n", - " \n", - "
                        \n", - " \n", - "
                      • \n", - " \n", - " \n", - " \n", - " \n", - "
                      • \n", - " item_assets\n", - "
                          \n", - " \n", - " \n", - " \n", - "
                        • \n", - " clc_map\n", - "
                            \n", - " \n", - " \n", - " \n", - "
                          • \n", - " title\n", - " \"Corine Land Cover Map\"\n", - "
                          • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
                          • \n", - " media_type\n", - " \"image/tiff; application=geotiff; profile=cloud-optimized\"\n", - "
                          • \n", - " \n", - " \n", - " \n", - " \n", - "
                          • \n", - " roles[] 1 items\n", - " \n", - "
                              \n", - " \n", - " \n", - " \n", - "
                            • \n", - " 0\n", - " \"data\"\n", - "
                            • \n", - " \n", - " \n", - " \n", - "
                            \n", - " \n", - "
                          • \n", - " \n", - " \n", - "
                          \n", - "
                        • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
                        • \n", - " clc_map_statistics\n", - "
                            \n", - " \n", - " \n", - " \n", - "
                          • \n", - " title\n", - " \"Corine Land Cover Map Statistics\"\n", - "
                          • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
                          • \n", - " media_type\n", - " \"application/xml\"\n", - "
                          • \n", - " \n", - " \n", - " \n", - " \n", - "
                          • \n", - " roles[] 1 items\n", - " \n", - "
                              \n", - " \n", - " \n", - " \n", - "
                            • \n", - " 0\n", - " \"metadata\"\n", - "
                            • \n", - " \n", - " \n", - " \n", - "
                            \n", - " \n", - "
                          • \n", - " \n", - " \n", - "
                          \n", - "
                        • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
                        • \n", - " clc_map_pyramid\n", - "
                            \n", - " \n", - " \n", - " \n", - "
                          • \n", - " title\n", - " \"Pyramid\"\n", - "
                          • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
                          • \n", - " media_type\n", - " \"image/tiff; application=geotiff; profile=pyramid\"\n", - "
                          • \n", - " \n", - " \n", - " \n", - " \n", - "
                          • \n", - " roles[] 1 items\n", - " \n", - "
                              \n", - " \n", - " \n", - " \n", - "
                            • \n", - " 0\n", - " \"metadata\"\n", - "
                            • \n", - " \n", - " \n", - " \n", - "
                            \n", - " \n", - "
                          • \n", - " \n", - " \n", - "
                          \n", - "
                        • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
                        • \n", - " clc_map_encoding\n", - "
                            \n", - " \n", - " \n", - " \n", - "
                          • \n", - " title\n", - " \"Encoding\"\n", - "
                          • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
                          • \n", - " media_type\n", - " \"text/plain\"\n", - "
                          • \n", - " \n", - " \n", - " \n", - " \n", - "
                          • \n", - " roles[] 1 items\n", - " \n", - "
                              \n", - " \n", - " \n", - " \n", - "
                            • \n", - " 0\n", - " \"metadata\"\n", - "
                            • \n", - " \n", - " \n", - " \n", - "
                            \n", - " \n", - "
                          • \n", - " \n", - " \n", - "
                          \n", - "
                        • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
                        • \n", - " clc_map_database\n", - "
                            \n", - " \n", - " \n", - " \n", - "
                          • \n", - " title\n", - " \"Database\"\n", - "
                          • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
                          • \n", - " media_type\n", - " \"application/dbf\"\n", - "
                          • \n", - " \n", - " \n", - " \n", - " \n", - "
                          • \n", - " roles[] 1 items\n", - " \n", - "
                              \n", - " \n", - " \n", - " \n", - "
                            • \n", - " 0\n", - " \"metadata\"\n", - "
                            • \n", - " \n", - " \n", - " \n", - "
                            \n", - " \n", - "
                          • \n", - " \n", - " \n", - "
                          \n", - "
                        • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
                        • \n", - " clc_map_database_metadata\n", - "
                            \n", - " \n", - " \n", - " \n", - "
                          • \n", - " title\n", - " \"Database Metadata\"\n", - "
                          • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
                          • \n", - " media_type\n", - " \"text/plain\"\n", - "
                          • \n", - " \n", - " \n", - " \n", - " \n", - "
                          • \n", - " roles[] 1 items\n", - " \n", - "
                              \n", - " \n", - " \n", - " \n", - "
                            • \n", - " 0\n", - " \"metadata\"\n", - "
                            • \n", - " \n", - " \n", - " \n", - "
                            \n", - " \n", - "
                          • \n", - " \n", - " \n", - "
                          \n", - "
                        • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
                        • \n", - " clc_map_tif_metadata\n", - "
                            \n", - " \n", - " \n", - " \n", - "
                          • \n", - " title\n", - " \"TIFF Metadata\"\n", - "
                          • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
                          • \n", - " media_type\n", - " \"image/tiff; application=geotiff; profile=layer\"\n", - "
                          • \n", - " \n", - " \n", - " \n", - " \n", - "
                          • \n", - " roles[] 1 items\n", - " \n", - "
                              \n", - " \n", - " \n", - " \n", - "
                            • \n", - " 0\n", - " \"metadata\"\n", - "
                            • \n", - " \n", - " \n", - " \n", - "
                            \n", - " \n", - "
                          • \n", - " \n", - " \n", - "
                          \n", - "
                        • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
                        • \n", - " clc_map_metadata\n", - "
                            \n", - " \n", - " \n", - " \n", - "
                          • \n", - " title\n", - " \"Corine Land Cover Map Metadata\"\n", - "
                          • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
                          • \n", - " media_type\n", - " \"application/xml\"\n", - "
                          • \n", - " \n", - " \n", - " \n", - " \n", - "
                          • \n", - " roles[] 1 items\n", - " \n", - "
                              \n", - " \n", - " \n", - " \n", - "
                            • \n", - " 0\n", - " \"metadata\"\n", - "
                            • \n", - " \n", - " \n", - " \n", - "
                            \n", - " \n", - "
                          • \n", - " \n", - " \n", - "
                          \n", - "
                        • \n", - " \n", - " \n", - " \n", - "
                        \n", - "
                      • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
                      • \n", - " title\n", - " \"CORINE Land Cover Raster\"\n", - "
                      • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
                      • \n", - " extent\n", - "
                          \n", - " \n", - " \n", - " \n", - "
                        • \n", - " spatial\n", - "
                            \n", - " \n", - " \n", - "
                          • \n", - " bbox[] 4 items\n", - " \n", - "
                              \n", - " \n", - " \n", - " \n", - "
                            • \n", - " 0\n", - " None\n", - "
                            • \n", - " \n", - " \n", - " \n", - "
                            \n", - " \n", - "
                              \n", - " \n", - " \n", - " \n", - "
                            • \n", - " 1\n", - " None\n", - "
                            • \n", - " \n", - " \n", - " \n", - "
                            \n", - " \n", - "
                              \n", - " \n", - " \n", - " \n", - "
                            • \n", - " 2\n", - " None\n", - "
                            • \n", - " \n", - " \n", - " \n", - "
                            \n", - " \n", - "
                              \n", - " \n", - " \n", - " \n", - "
                            • \n", - " 3\n", - " None\n", - "
                            • \n", - " \n", - " \n", - " \n", - "
                            \n", - " \n", - "
                          • \n", - " \n", - " \n", - "
                          \n", - "
                        • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
                        • \n", - " temporal\n", - "
                            \n", - " \n", - " \n", - "
                          • \n", - " interval[] 1 items\n", - " \n", - "
                              \n", - " \n", - " \n", - "
                            • \n", - " 0[] 2 items\n", - " \n", - "
                                \n", - " \n", - " \n", - " \n", - "
                              • \n", - " 0\n", - " \"1990-01-01T00:00:00Z\"\n", - "
                              • \n", - " \n", - " \n", - " \n", - "
                              \n", - " \n", - "
                                \n", - " \n", - " \n", - " \n", - "
                              • \n", - " 1\n", - " None\n", - "
                              • \n", - " \n", - " \n", - " \n", - "
                              \n", - " \n", - "
                            • \n", - " \n", - " \n", - "
                            \n", - " \n", - "
                          • \n", - " \n", - " \n", - "
                          \n", - "
                        • \n", - " \n", - " \n", - " \n", - "
                        \n", - "
                      • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
                      • \n", - " license\n", - " \"proprietary\"\n", - "
                      • \n", - " \n", - " \n", - " \n", - " \n", - "
                      • \n", - " keywords[] 6 items\n", - " \n", - "
                          \n", - " \n", - " \n", - " \n", - "
                        • \n", - " 0\n", - " \"clms\"\n", - "
                        • \n", - " \n", - " \n", - " \n", - "
                        \n", - " \n", - "
                          \n", - " \n", - " \n", - " \n", - "
                        • \n", - " 1\n", - " \"corine\"\n", - "
                        • \n", - " \n", - " \n", - " \n", - "
                        \n", - " \n", - "
                          \n", - " \n", - " \n", - " \n", - "
                        • \n", - " 2\n", - " \"derived data\"\n", - "
                        • \n", - " \n", - " \n", - " \n", - "
                        \n", - " \n", - "
                          \n", - " \n", - " \n", - " \n", - "
                        • \n", - " 3\n", - " \"land cover\"\n", - "
                        • \n", - " \n", - " \n", - " \n", - "
                        \n", - " \n", - "
                          \n", - " \n", - " \n", - " \n", - "
                        • \n", - " 4\n", - " \"machine learning\"\n", - "
                        • \n", - " \n", - " \n", - " \n", - "
                        \n", - " \n", - "
                          \n", - " \n", - " \n", - " \n", - "
                        • \n", - " 5\n", - " \"open data\"\n", - "
                        • \n", - " \n", - " \n", - " \n", - "
                        \n", - " \n", - "
                      • \n", - " \n", - " \n", - "
                      \n", - "
                      \n", - "
                      " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + "An error occured: 'tif_vat_dbf_xml'\n", + "An error occured: 'tif_ovr_aux_xml'\n" + ] } ], "source": [ - "collection" + "proj_epsg = []\n", + "for img_path in img_paths:\n", + " item = create_item(img_path, data_root)\n", + " collection.add_item(item)\n", + "\n", + " item_epsg = proj_epsg_from_item_asset(item)\n", + " proj_epsg.append(item_epsg)\n", + "\n", + " item.set_self_href(os.path.join(WORKING_DIR, f\"{STAC_DIR}/{COLLECTION_ID}/{item.id}/{item.id}.json\"))\n", + " item.save_object()" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [], + "source": [ + "collection.make_all_asset_hrefs_relative()\n", + "collection.update_extent_from_items()\n", + "ProjectionExtension.add_to(collection)" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [], + "source": [ + "collection.summaries = pystac.Summaries({'proj:epsg': list(set(proj_epsg))})" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [], + "source": [ + "collection.save_object()" ] }, { From af75d6c08b4458e2b68acdc57520be6d4e16a7f5 Mon Sep 17 00:00:00 2001 From: Sebastian Boeck Date: Thu, 2 May 2024 13:21:46 +0200 Subject: [PATCH 43/80] adds explicit rasterio warp import, missing key in asset creation handling and licenses --- scripts/clc/item.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/scripts/clc/item.py b/scripts/clc/item.py index d60d6d9..5fe85ad 100644 --- a/scripts/clc/item.py +++ b/scripts/clc/item.py @@ -13,6 +13,7 @@ from datetime import datetime, UTC import rasterio as rio +import rasterio.warp from constants import * @@ -119,11 +120,15 @@ def create_item(img_path, root): item = pystac.Item(**params) for asset_file in asset_files: - key, asset = create_asset(asset_file, DOM_code=clc_name_elements.get('DOM_code')) - item.add_asset( - key=key, - asset=asset, - ) + try: + key, asset = create_asset(asset_file, DOM_code=clc_name_elements.get('DOM_code')) + item.add_asset( + key=key, + asset=asset, + ) + except KeyError as e: + print("An error occured:", e) + proj_ext = ProjectionExtension.ext(item.assets[os.path.basename(img_path).replace('.', '_')], add_if_missing=True) proj_ext.apply(epsg=rio.crs.CRS(img.crs).to_epsg(), @@ -133,7 +138,8 @@ def create_item(img_path, root): ) - item.add_link(CLMS_LICENSE) + links = [CLMS_LICENSE, CLMS_CATALOG_LINK, ITEM_PARENT_LINK, COLLECTION_LINK] + item.add_links(links) return(item) From 475699b347690e9f376d17ddec473033b2eb7fa6 Mon Sep 17 00:00:00 2001 From: Sebastian Boeck Date: Thu, 2 May 2024 13:22:08 +0200 Subject: [PATCH 44/80] adds links --- scripts/clc/constants.py | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/scripts/clc/constants.py b/scripts/clc/constants.py index 4d2d091..d73b9b4 100644 --- a/scripts/clc/constants.py +++ b/scripts/clc/constants.py @@ -5,7 +5,9 @@ from pystac.provider import ProviderRole +os.chdir('x:\\projects\\ETC-DI\\Task_18\\clms-stac') WORKING_DIR = os.getcwd() + STAC_DIR = 'stac_tests' # Collection @@ -147,14 +149,14 @@ -# CLMS_CATALOG_LINK = pystac.link.Link( -# rel=pystac.RelType.ROOT, target=pystac.STACObject.from_file(os.path.join(WORKING_DIR, "stacs/clms_catalog.json")) -# ) -# COLLECTION_LINK = pystac.link.Link( -# rel=pystac.RelType.COLLECTION, -# target=pystac.STACObject.from_file(os.path.join(WORKING_DIR, f"stacs/{COLLECTION_ID}/{COLLECTION_ID}.json")), -# ) -# ITEM_PARENT_LINK = pystac.link.Link( -# rel=pystac.RelType.PARENT, -# target=pystac.STACObject.from_file(os.path.join(WORKING_DIR, f"stacs/{COLLECTION_ID}/{COLLECTION_ID}.json")), -# ) \ No newline at end of file +CLMS_CATALOG_LINK = pystac.link.Link( + rel=pystac.RelType.ROOT, target=pystac.STACObject.from_file(os.path.join(WORKING_DIR, f"{STAC_DIR}/clms_catalog.json")) +) +COLLECTION_LINK = pystac.link.Link( + rel=pystac.RelType.COLLECTION, + target=pystac.STACObject.from_file(os.path.join(WORKING_DIR, f"{STAC_DIR}/{COLLECTION_ID}/{COLLECTION_ID}.json")), +) +ITEM_PARENT_LINK = pystac.link.Link( + rel=pystac.RelType.PARENT, + target=pystac.STACObject.from_file(os.path.join(WORKING_DIR, f"{STAC_DIR}/{COLLECTION_ID}/{COLLECTION_ID}.json")), +) \ No newline at end of file From 79e0dd4b4d90d25b681f98dfa65c7cc17218158c Mon Sep 17 00:00:00 2001 From: Sebastian Boeck Date: Thu, 2 May 2024 13:26:48 +0200 Subject: [PATCH 45/80] dedicated collection creation script --- scripts/clc/collection.py | 78 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) create mode 100644 scripts/clc/collection.py diff --git a/scripts/clc/collection.py b/scripts/clc/collection.py new file mode 100644 index 0000000..16c5fb8 --- /dev/null +++ b/scripts/clc/collection.py @@ -0,0 +1,78 @@ +import os +import re + +import pystac +import pystac.item +import pystac.link +from pystac.provider import ProviderRole +from pystac.extensions.projection import ProjectionExtension + +from pystac.extensions.item_assets import ItemAssetsExtension, AssetDefinition + +from datetime import datetime, UTC + +import rasterio.warp + +from constants import * +from item import create_item, get_img_paths + +def proj_epsg_from_item_asset(item): + for asset_key in item.assets: + asset = item.assets[asset_key].to_dict() + if 'proj:epsg' in asset.keys(): + return(asset.get('proj:epsg')) + + +def create_collection(data_root: str): + + sp_extent = pystac.SpatialExtent([None, None, None, None]) + tmp_extent = pystac.TemporalExtent([datetime(1990, 1, 1, microsecond=0, tzinfo=UTC), None]) + extent = pystac.Extent(sp_extent, tmp_extent) + + collection = pystac.Collection(id=COLLECTION_ID, + description=COLLECTION_DESCRIPTION, + title=COLLECTION_TITLE, + extent=extent, + keywords=COLLECTION_KEYWORDS, + license=COLLECTION_LICENSE, + stac_extensions=[] + ) + + + item_assets = ItemAssetsExtension.ext(collection, add_if_missing=True) + item_assets.item_assets = { + key: AssetDefinition({"title": COLLITAS_TITLE_DICT[key].format(label='').strip(), + "media_type": COLLITAS_MEDIA_TYPE_DICT[key], + "roles": COLLITAS_ROLES_DICT[key]}) + for key in COLLITAS_TITLE_DICT + } + + collection.add_link(CLMS_LICENSE) + collection.set_self_href(os.path.join(WORKING_DIR, f"{STAC_DIR}/{collection.id}/{collection.id}.json")) + catalog = pystac.read_file(f"{WORKING_DIR}/{STAC_DIR}/clms_catalog.json") + + collection.set_root(catalog) + collection.set_parent(catalog) + + collection.save_object() + + img_paths = get_img_paths(path=data_root) + + proj_epsg = [] + for img_path in img_paths: + item = create_item(img_path, data_root) + collection.add_item(item) + + item_epsg = proj_epsg_from_item_asset(item) + proj_epsg.append(item_epsg) + + item.set_self_href(os.path.join(WORKING_DIR, f"{STAC_DIR}/{COLLECTION_ID}/{item.id}/{item.id}.json")) + item.save_object() + + collection.make_all_asset_hrefs_relative() + collection.update_extent_from_items() + ProjectionExtension.add_to(collection) + collection.summaries = pystac.Summaries({'proj:epsg': list(set(proj_epsg))}) + + collection.save_object() + return(collection) \ No newline at end of file From 7a91ab60a1970e6295cf50be0bfd31828189b46b Mon Sep 17 00:00:00 2001 From: Sebastian Boeck Date: Thu, 2 May 2024 13:51:14 +0200 Subject: [PATCH 46/80] adds create clc collection script (untested) --- create_clc_collection.py | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 create_clc_collection.py diff --git a/create_clc_collection.py b/create_clc_collection.py new file mode 100644 index 0000000..6ccbafa --- /dev/null +++ b/create_clc_collection.py @@ -0,0 +1,9 @@ +import logging + +from scripts.clc.collection import create_collection + +LOGGER = logging.getLogger(__name__) + +if __name__ == "__main__": + logging.basicConfig(filename="create_clc_collection.log") + create_collection("") \ No newline at end of file From 5562eb80cd6c5f21c57850c663d2486912c6f588 Mon Sep 17 00:00:00 2001 From: Sebastian Boeck Date: Thu, 2 May 2024 13:51:38 +0200 Subject: [PATCH 47/80] adds __init__ files --- scripts/__init__.py | 0 scripts/clc/__init__.py | 0 2 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 scripts/__init__.py create mode 100644 scripts/clc/__init__.py diff --git a/scripts/__init__.py b/scripts/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/scripts/clc/__init__.py b/scripts/clc/__init__.py new file mode 100644 index 0000000..e69de29 From 1a7cc692a0b04d191816ddaf026d3b246b9a8406 Mon Sep 17 00:00:00 2001 From: chorng Date: Thu, 2 May 2024 14:00:52 +0200 Subject: [PATCH 48/80] Refactor vpp item creation script * add create_asset_href function * break create_item to small functions * add boto core error and rasterio error to error handler --- scripts/vpp/item.py | 90 ++++++++++++++++++++++++++++++++------------- 1 file changed, 64 insertions(+), 26 deletions(-) diff --git a/scripts/vpp/item.py b/scripts/vpp/item.py index 55e0ca2..04604f8 100644 --- a/scripts/vpp/item.py +++ b/scripts/vpp/item.py @@ -9,12 +9,14 @@ import boto3 import pystac import rasterio as rio +from botocore.exceptions import BotoCoreError from botocore.paginate import PageIterator from jsonschema import Draft7Validator from jsonschema.exceptions import best_match from pystac.extensions.projection import ProjectionExtension from rasterio.coords import BoundingBox from rasterio.crs import CRS +from rasterio.errors import RasterioIOError from rasterio.warp import transform_bounds from referencing import Registry, Resource from shapely.geometry import Polygon, box, mapping @@ -47,7 +49,7 @@ def create_product_list(start_year: int, end_year: int) -> list[str]: def create_page_iterator(aws_session: boto3.Session, bucket: str, prefix: str) -> PageIterator: client = aws_session.client("s3") paginator = client.get_paginator("list_objects_v2") - return paginator.paginate(Bucket=bucket, Prefix=prefix, Delimiter="-") + return paginator.paginate(Bucket=bucket, Prefix=prefix, Delimiter="-", MaxKeys=10) def read_metadata_from_s3(bucket: str, key: str, aws_session: boto3.Session) -> tuple[BoundingBox, CRS, int, int]: @@ -79,17 +81,64 @@ def get_datetime(product_id: str) -> tuple[datetime, datetime]: return (datetime(year=year, month=1, day=1), datetime(year=year, month=12, day=31)) +def create_asset_href(bucket: str, asset_key: str) -> str: + return f"s3://{bucket}/" + asset_key + + def create_asset(asset_key: str) -> pystac.Asset: parameter = asset_key.split("_")[-1].split(".")[0] version = asset_key.split("_")[-3] + href = create_asset_href(BUCKET, asset_key) return pystac.Asset( - href=f"s3://{BUCKET}/" + asset_key, + href=href, media_type=pystac.MediaType.GEOTIFF, title=TITLE_MAP[parameter] + f" {version}", roles=["data"], ) +def create_core_item( + product_id: str, + geometry: Polygon, + start_datetime: datetime, + end_datetime: datetime, + create_datetime: datetime, + description: str, + collection: str, +): + return pystac.Item( + id=product_id, + geometry=mapping(geometry), + bbox=list(geometry.bounds), + datetime=None, + start_datetime=start_datetime, + end_datetime=end_datetime, + properties={"created": create_datetime.strftime("%Y-%m-%dT%H:%M:%SZ"), "description": description}, + collection=collection, + ) + + +def add_providers_to_item(item: pystac.Item, provider_list: list[pystac.Provider]) -> None: + item.common_metadata.providers = provider_list + + +def add_projection_extension_to_item(item: pystac.Item, crs: CRS, bounds: BoundingBox, height: int, width: int) -> None: + projection = ProjectionExtension.ext(item, add_if_missing=True) + projection.epsg = crs.to_epsg() + projection.bbox = [int(bounds.left), int(bounds.bottom), int(bounds.right), int(bounds.top)] + projection.shape = [height, width] + + +def add_links_to_item(item: pystac.Item, link_list: list[pystac.Link]) -> None: + for link in link_list: + item.links.append(link) + + +def add_assets_to_item(item: pystac.Item, asset_dict: dict[str, pystac.Asset]) -> None: + for key, asset in asset_dict.items(): + item.add_asset(key, asset) + + def create_item(aws_session: boto3.Session, bucket: str, tile: str) -> pystac.Item: client = aws_session.client("s3") parameters = client.list_objects(Bucket=bucket, Prefix=tile, Delimiter=".")["CommonPrefixes"] @@ -101,34 +150,23 @@ def create_item(aws_session: boto3.Session, bucket: str, tile: str) -> pystac.It description = get_description(product_id) start_datetime, end_datetime = get_datetime(product_id) + # core metadata + item = create_core_item(product_id, geom_wgs84, start_datetime, end_datetime, created, description, COLLECTION_ID) + # common metadata - item = pystac.Item( - id=product_id, - geometry=mapping(geom_wgs84), - bbox=list(geom_wgs84.bounds), - datetime=None, - start_datetime=start_datetime, - end_datetime=end_datetime, - properties={"created": created.strftime("%Y-%m-%dT%H:%M:%SZ"), "description": description}, - collection=COLLECTION_ID, - ) - item.common_metadata.providers = [VPP_HOST_AND_LICENSOR, VPP_PRODUCER_AND_PROCESSOR] + provider_list = [VPP_HOST_AND_LICENSOR, VPP_PRODUCER_AND_PROCESSOR] + add_providers_to_item(item, provider_list) # extensions - projection = ProjectionExtension.ext(item, add_if_missing=True) - projection.epsg = crs.to_epsg() - projection.bbox = [int(bounds.left), int(bounds.bottom), int(bounds.right), int(bounds.top)] - projection.shape = [height, width] + add_projection_extension_to_item(item, crs, bounds, height, width) # links - links = [CLMS_LICENSE, CLMS_CATALOG_LINK, ITEM_PARENT_LINK, COLLECTION_LINK] - for link in links: - item.links.append(link) + link_list = [CLMS_LICENSE, CLMS_CATALOG_LINK, ITEM_PARENT_LINK, COLLECTION_LINK] + add_links_to_item(item, link_list) # assets assets = {os.path.split(key)[-1][:-4].lower(): create_asset(key) for key in asset_keys} - for key, asset in assets.items(): - item.add_asset(key, asset) + add_assets_to_item(item, assets) return item @@ -142,11 +180,11 @@ def get_stac_validator(product_schema: str) -> Draft7Validator: def create_vpp_item(aws_session: boto3.Session, bucket: str, validator: Draft7Validator, tile: str) -> None: - item = create_item(aws_session, bucket, tile) - item.set_self_href(os.path.join(WORKING_DIR, f"{STAC_DIR}/{COLLECTION_ID}/{item.id}/{item.id}.json")) - error_msg = best_match(validator.iter_errors(item.to_dict())) try: + item = create_item(aws_session, bucket, tile) + item.set_self_href(os.path.join(WORKING_DIR, f"{STAC_DIR}/{COLLECTION_ID}/{item.id}/{item.id}.json")) + error_msg = best_match(validator.iter_errors(item.to_dict())) assert error_msg is None, f"Failed to create {item.id} item. Reason: {error_msg}." item.save_object() - except AssertionError as error: + except (AssertionError, BotoCoreError, RasterioIOError) as error: LOGGER.error(error) From db7df04a2e13fd50ca338eea5a98fee4a89b83f0 Mon Sep 17 00:00:00 2001 From: Xiaoman Huang Date: Thu, 2 May 2024 22:49:51 +0800 Subject: [PATCH 49/80] Updates for pre-commit hook --- scripts/.DS_Store | Bin 8196 -> 8196 bytes scripts/uabh/test_collection.json | 68 +++++++----------------------- scripts/uabh/test_item.json | 64 ++++++---------------------- scripts/uabh/uabh_collection.py | 2 - scripts/uabh/uabh_item.py | 6 +-- 5 files changed, 32 insertions(+), 108 deletions(-) diff --git a/scripts/.DS_Store b/scripts/.DS_Store index ec31351cffbeef958fb7d7f8b147fad119556ad9..64e4227841b26927f89261dee143e2eb51ae82b8 100644 GIT binary patch delta 41 rcmZp1XmQveD8N`cSxDd_W9jB600 Draft7Validator: # validate validator = get_stac_validator("./schema/products/uabh.json") error_msg = best_match(validator.iter_errors(collection.to_dict())) - if error_msg is not None: - print(error_msg) diff --git a/scripts/uabh/uabh_item.py b/scripts/uabh/uabh_item.py index ed4a0b7..499ea8f 100644 --- a/scripts/uabh/uabh_item.py +++ b/scripts/uabh/uabh_item.py @@ -2,7 +2,7 @@ import json import os -import xml.etree.ElementTree as ET +import xml.etree.ElementTree as ETree from datetime import datetime from typing import Final @@ -105,7 +105,7 @@ def str_to_datetime(datetime_str: str): def get_metadata_from_xml(xml: str) -> tuple[datetime, datetime, datetime]: - tree = ET.parse(xml) + tree = ETree.parse(xml) for t in tree.iter("{http://www.opengis.net/gml}beginPosition"): start_datetime = t.text for t in tree.iter("{http://www.opengis.net/gml}endPosition"): @@ -187,5 +187,3 @@ def get_stac_validator(product_schema: str) -> Draft7Validator: # validate validator = get_stac_validator("./schema/products/uabh.json") error_msg = best_match(validator.iter_errors(item.to_dict())) - if error_msg is not None: - print(error_msg) From 0979813b915bce27589e0d5c4e28c37391ba052e Mon Sep 17 00:00:00 2001 From: Sebastian Boeck Date: Fri, 3 May 2024 14:20:43 +0200 Subject: [PATCH 50/80] adds project_data_window_bbox to get more meaningful WGS84 bounds from CLC over Europe --- scripts/clc/item.py | 40 ++++++++++++++++++++++++++++------------ 1 file changed, 28 insertions(+), 12 deletions(-) diff --git a/scripts/clc/item.py b/scripts/clc/item.py index 5fe85ad..d640911 100644 --- a/scripts/clc/item.py +++ b/scripts/clc/item.py @@ -10,10 +10,12 @@ from pyproj import Transformer from shapely.geometry import GeometryCollection, box, shape, mapping + from datetime import datetime, UTC import rasterio as rio import rasterio.warp +from rasterio.warp import Resampling from constants import * @@ -46,7 +48,7 @@ def deconstruct_clc_name(filename: str): return(filename_split) -def create_asset(filename: str, DOM_code: str): +def create_asset(filename: str, DOM_code: str) -> pystac.Asset: filename_elements = deconstruct_clc_name(filename) suffix = filename_elements['suffix'].replace('.', '_') @@ -55,7 +57,7 @@ def create_asset(filename: str, DOM_code: str): asset = pystac.Asset(href=filename, title=TITLE_DICT[suffix].format(label=label), media_type=MEDIA_TYPE_DICT[suffix], roles=ROLES_DICT[suffix]) return(f"{filename_elements['id']}_{suffix}", asset) -def get_img_paths(path: str): +def get_img_paths(path: str) -> list[str]: img_paths=[] for root, dirs, files in os.walk(path): if root.endswith(('DATA', 'French_DOMs')): @@ -66,7 +68,7 @@ def get_img_paths(path: str): return(img_paths) -def get_asset_files(path, clc_name): +def get_asset_files(path: str, clc_name: str) -> list[str]: clc_name_elements = deconstruct_clc_name(clc_name) @@ -87,12 +89,24 @@ def get_asset_files(path, clc_name): return(asset_files) -def project_bbox(img, target_epsg=4326): - target_crs = rio.crs.CRS.from_epsg(target_epsg) - bbox_warped = rio.warp.transform_bounds(img.crs, target_crs, *img.bounds) - return(bbox_warped) - -def create_item(img_path, root): +def project_bbox(img: rio.open, dst_crs: rio.CRS = rio.CRS.from_epsg(4326)) -> tuple[float]: + bbox = rio.warp.transform_bounds(img.crs, dst_crs, *img.bounds) + return(bbox) + +def project_data_window_bbox(src: rio.open, dst_crs: rio.CRS = rio.CRS.from_epsg(4326), dst_resolution: tuple = (0.25, 0.25)) -> tuple[float]: + data, transform = rio.warp.reproject(source=src.read(), + src_transform=src.transform, + src_crs=src.crs, + dst_crs=dst_crs, + dst_nodata=src.nodata, + dst_resolution=dst_resolution, + resampling=rio.warp.Resampling.max) + + data_window = rio.windows.get_data_window(data, nodata=src.nodata) + bbox = rio.windows.bounds(data_window, transform=transform) + return(bbox) + +def create_item(img_path: str, root: str) -> pystac.Item: clc_name_elements = deconstruct_clc_name(img_path) @@ -100,13 +114,15 @@ def create_item(img_path, root): asset_files = [f for f in asset_files if not f.endswith('aux')] year = clc_name_elements.get('reference_year') props = {'description': ITEM_DESCRIPTION.format(year=year), - 'created': None, - 'providers': CLC_PROVIDER.to_dict(), + 'created': None, + 'providers': CLC_PROVIDER.to_dict(), } with rio.open(img_path) as img: - bbox = project_bbox(img) + #bbox = project_bbox(img) + bbox = project_data_window_bbox(img) + params = { 'id': clc_name_elements.get('id'), 'bbox': bbox, From b087f2f9607cafc84d199488fdb2026993fffd2e Mon Sep 17 00:00:00 2001 From: Sebastian Boeck Date: Fri, 3 May 2024 16:40:56 +0200 Subject: [PATCH 51/80] changes constant imports, fixes link definition before collection exists and splits create_collection in two different parts --- scripts/clc/collection.py | 21 ++++++++++++++++--- scripts/clc/constants.py | 13 +----------- scripts/clc/item.py | 44 ++++++++++++++++++++++++++------------- 3 files changed, 48 insertions(+), 30 deletions(-) diff --git a/scripts/clc/collection.py b/scripts/clc/collection.py index 16c5fb8..adc933c 100644 --- a/scripts/clc/collection.py +++ b/scripts/clc/collection.py @@ -13,8 +13,21 @@ import rasterio.warp -from constants import * -from item import create_item, get_img_paths +from .constants import ( + COLLECTION_DESCRIPTION, + COLLECTION_ID, + COLLECTION_KEYWORDS, + COLLECTION_TITLE, + COLLECTION_LICENSE, + COLLITAS_MEDIA_TYPE_DICT, + COLLITAS_ROLES_DICT, + COLLITAS_TITLE_DICT, + CLMS_LICENSE, + WORKING_DIR, + STAC_DIR +) + +from .item import create_item, get_img_paths def proj_epsg_from_item_asset(item): for asset_key in item.assets: @@ -23,7 +36,7 @@ def proj_epsg_from_item_asset(item): return(asset.get('proj:epsg')) -def create_collection(data_root: str): +def create_collection() -> pystac.Collection: sp_extent = pystac.SpatialExtent([None, None, None, None]) tmp_extent = pystac.TemporalExtent([datetime(1990, 1, 1, microsecond=0, tzinfo=UTC), None]) @@ -55,7 +68,9 @@ def create_collection(data_root: str): collection.set_parent(catalog) collection.save_object() + return(collection) +def populate_collection(collection: pystac.Collection, data_root: str) -> pystac.Collection: img_paths = get_img_paths(path=data_root) proj_epsg = [] diff --git a/scripts/clc/constants.py b/scripts/clc/constants.py index d73b9b4..1f3d166 100644 --- a/scripts/clc/constants.py +++ b/scripts/clc/constants.py @@ -5,7 +5,7 @@ from pystac.provider import ProviderRole -os.chdir('x:\\projects\\ETC-DI\\Task_18\\clms-stac') +# os.chdir('x:\\projects\\ETC-DI\\Task_18\\clms-stac') WORKING_DIR = os.getcwd() STAC_DIR = 'stac_tests' @@ -149,14 +149,3 @@ -CLMS_CATALOG_LINK = pystac.link.Link( - rel=pystac.RelType.ROOT, target=pystac.STACObject.from_file(os.path.join(WORKING_DIR, f"{STAC_DIR}/clms_catalog.json")) -) -COLLECTION_LINK = pystac.link.Link( - rel=pystac.RelType.COLLECTION, - target=pystac.STACObject.from_file(os.path.join(WORKING_DIR, f"{STAC_DIR}/{COLLECTION_ID}/{COLLECTION_ID}.json")), -) -ITEM_PARENT_LINK = pystac.link.Link( - rel=pystac.RelType.PARENT, - target=pystac.STACObject.from_file(os.path.join(WORKING_DIR, f"{STAC_DIR}/{COLLECTION_ID}/{COLLECTION_ID}.json")), -) \ No newline at end of file diff --git a/scripts/clc/item.py b/scripts/clc/item.py index d640911..4ce3dda 100644 --- a/scripts/clc/item.py +++ b/scripts/clc/item.py @@ -17,17 +17,20 @@ import rasterio.warp from rasterio.warp import Resampling -from constants import * - -# from .constants import ( -# DOM_DICT, -# TITLE_DICT, -# MEDIA_TYPE_DICT, -# ROLES_DICT, -# ITEM_DESCRIPTION, -# CLC_PROVIDER, -# ITEM_LICENSE -# ) +# from .constants import * + +from .constants import ( + DOM_DICT, + TITLE_DICT, + MEDIA_TYPE_DICT, + ROLES_DICT, + ITEM_DESCRIPTION, + CLC_PROVIDER, + CLMS_LICENSE, + WORKING_DIR, + STAC_DIR, + COLLECTION_ID +) def deconstruct_clc_name(filename: str): p = re.compile('^(?P[A-Z0-9a-z_]*).(?P.*)$') @@ -89,11 +92,11 @@ def get_asset_files(path: str, clc_name: str) -> list[str]: return(asset_files) -def project_bbox(img: rio.open, dst_crs: rio.CRS = rio.CRS.from_epsg(4326)) -> tuple[float]: - bbox = rio.warp.transform_bounds(img.crs, dst_crs, *img.bounds) +def project_bbox(src: rio.io.DatasetReader, dst_crs: rio.CRS = rio.CRS.from_epsg(4326)) -> tuple[float]: + bbox = rio.warp.transform_bounds(src.crs, dst_crs, *src.bounds) return(bbox) -def project_data_window_bbox(src: rio.open, dst_crs: rio.CRS = rio.CRS.from_epsg(4326), dst_resolution: tuple = (0.25, 0.25)) -> tuple[float]: +def project_data_window_bbox(src: rio.io.DatasetReader, dst_crs: rio.CRS = rio.CRS.from_epsg(4326), dst_resolution: tuple = (0.25, 0.25)) -> tuple[float]: data, transform = rio.warp.reproject(source=src.read(), src_transform=src.transform, src_crs=src.crs, @@ -153,7 +156,18 @@ def create_item(img_path: str, root: str) -> pystac.Item: transform=[_ for _ in img.transform] + [0.0, 0.0, 1.0], ) - + CLMS_CATALOG_LINK = pystac.link.Link( + rel=pystac.RelType.ROOT, target=pystac.STACObject.from_file(os.path.join(WORKING_DIR, f"{STAC_DIR}/clms_catalog.json")) + ) + COLLECTION_LINK = pystac.link.Link( + rel=pystac.RelType.COLLECTION, + target=pystac.STACObject.from_file(os.path.join(WORKING_DIR, f"{STAC_DIR}/{COLLECTION_ID}/{COLLECTION_ID}.json")), + ) + ITEM_PARENT_LINK = pystac.link.Link( + rel=pystac.RelType.PARENT, + target=pystac.STACObject.from_file(os.path.join(WORKING_DIR, f"{STAC_DIR}/{COLLECTION_ID}/{COLLECTION_ID}.json")), + ) + links = [CLMS_LICENSE, CLMS_CATALOG_LINK, ITEM_PARENT_LINK, COLLECTION_LINK] item.add_links(links) From ac440ecf5325c96bba5dd3a6ae42d425aa9db3e5 Mon Sep 17 00:00:00 2001 From: Sebastian Boeck Date: Fri, 3 May 2024 16:42:15 +0200 Subject: [PATCH 52/80] collection creation split in two parts --- create_clc_collection.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/create_clc_collection.py b/create_clc_collection.py index 6ccbafa..6facfb6 100644 --- a/create_clc_collection.py +++ b/create_clc_collection.py @@ -1,9 +1,10 @@ import logging -from scripts.clc.collection import create_collection +from scripts.clc.collection import create_collection, populate_collection LOGGER = logging.getLogger(__name__) if __name__ == "__main__": logging.basicConfig(filename="create_clc_collection.log") - create_collection("") \ No newline at end of file + collection = create_collection() + populate_collection(collection, data_root="../CLC_100m") \ No newline at end of file From e2e4569fb4d2a5490be1c99e003ddff0b8f3ac70 Mon Sep 17 00:00:00 2001 From: chorng Date: Mon, 6 May 2024 12:11:32 +0200 Subject: [PATCH 53/80] Add euhydro stac collection creation script --- create_euhydro_collection.py | 9 +++ schema/products/eu-hydro.json | 11 +-- scripts/euhydro/__init__.py | 0 scripts/euhydro/collection.py | 145 ++++++++++++++++++++++++++++++++++ scripts/euhydro/constants.py | 50 ++++++++++++ stacs/eu-hydro/eu-hydro.json | 12 +-- 6 files changed, 216 insertions(+), 11 deletions(-) create mode 100644 create_euhydro_collection.py create mode 100644 scripts/euhydro/__init__.py create mode 100644 scripts/euhydro/collection.py create mode 100644 scripts/euhydro/constants.py diff --git a/create_euhydro_collection.py b/create_euhydro_collection.py new file mode 100644 index 0000000..adacd87 --- /dev/null +++ b/create_euhydro_collection.py @@ -0,0 +1,9 @@ +import logging + +from scripts.euhydro.collection import create_euhydro_collection + +LOGGER = logging.getLogger(__name__) + +if __name__ == "__main__": + logging.basicConfig(filename="create_euhydro_collection.log") + create_euhydro_collection("") diff --git a/schema/products/eu-hydro.json b/schema/products/eu-hydro.json index 9954bbe..afe0cb2 100644 --- a/schema/products/eu-hydro.json +++ b/schema/products/eu-hydro.json @@ -62,7 +62,7 @@ "name": "Copernicus Land Monitoring Service", "description": "The Copernicus Land Monitoring Service provides geographical information on land cover and its changes, land use, ground motions, vegetation state, water cycle and Earth's surface energy variables to a broad range of users in Europe and across the World in the field of environmental terrestrial applications.", "roles": ["licensor", "host"], - "url": "https://land.copernicus.eu/en" + "url": "https://land.copernicus.eu" } ] }, @@ -79,9 +79,7 @@ }, "temporal": { "const": { - "interval": [ - ["2006-01-01T00:00:00.000Z", "2012-12-31T00:00:00.000Z"] - ] + "interval": [["2006-01-01T00:00:00Z", "2012-12-31T00:00:00Z"]] } } } @@ -151,7 +149,10 @@ }, "assets": { "type": "object", - "required": ["eu-hydro_v1p3_user_guide", "how_use_esri_fgdb_in_qgis"], + "required": [ + "EU-HYDRO_V1p3_User_Guide_pdf", + "How_use_ESRI_FGDB_in_QGIS_pdf" + ], "additionalProperties": { "type": "object", "properties": { diff --git a/scripts/euhydro/__init__.py b/scripts/euhydro/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/scripts/euhydro/collection.py b/scripts/euhydro/collection.py new file mode 100644 index 0000000..dbcb7d5 --- /dev/null +++ b/scripts/euhydro/collection.py @@ -0,0 +1,145 @@ +from __future__ import annotations + +import json +import logging +import os + +import pystac +from jsonschema import Draft7Validator +from jsonschema.exceptions import best_match +from pystac.extensions.projection import ProjectionExtension +from pystac.media_type import MediaType +from referencing import Registry, Resource + +from .constants import ( + CLMS_LICENSE, + COLLECTION_DESCRIPTION, + COLLECTION_EXTENT, + COLLECTION_ID, + COLLECTION_KEYWORDS, + COLLECTION_TITLE, + EUHYDRO_HOST_AND_LICENSOR, + STAC_DIR, + WORKING_DIR, +) + +LOGGER = logging.getLogger(__name__) + + +class CollectionCreationError(Exception): + pass + + +def get_stac_validator(product_schema: str) -> Draft7Validator: + with open(product_schema, encoding="utf-8") as f: + schema = json.load(f) + registry = Registry().with_resources( + [("http://example.com/schema.json", Resource.from_contents(schema))], + ) + return Draft7Validator({"$ref": "http://example.com/schema.json"}, registry=registry) + + +def get_files(root: str, file_extension: str) -> list[str]: + files = [] + for dirpath, _, filenames in os.walk(root): + files += [os.path.join(dirpath, filename) for filename in filenames if filename.endswith(f".{file_extension}")] + return files + + +def get_gdb(root: str) -> list[str]: + gdb_dirs = [] + for dirpath, dirnames, _ in os.walk(root): + if dirnames: + gdb_dirs += [os.path.join(dirpath, dirname) for dirname in dirnames if dirname.endswith(".gdb")] + return gdb_dirs + + +def create_asset(filename, asset_path): + extension = filename.split(".")[-1] + asset_id = filename.replace(".", "_") + media_type_map = { + "gpkg": MediaType.GEOPACKAGE, + "gdb": "application/x-filegdb", + "xml": MediaType.XML, + "pdf": MediaType.PDF, + } + role_map = { + "gpkg": ["data"], + "gdb": ["data"], + "xml": ["metadata"], + "pdf": ["metadata"], + } + title = " ".join([word.capitalize() for word in asset_id.split("_")[:-1]]) + return asset_id, pystac.Asset( + href=asset_path, media_type=media_type_map[extension], title=title, roles=role_map[extension] + ) + + +def collect_assets(root: str) -> list[str]: + asset_list = get_files(root, "xml") + get_files(root, "pdf") + get_files(root, "gpkg") + get_gdb(root) + assets = {} + for asset_path in asset_list: + _, tail = os.path.split(asset_path) + asset_id, asset = create_asset(tail, asset_path) + if asset_id not in assets: + assets[asset_id] = asset + return assets + + +def add_summaries_to_collection(collection: pystac.Collection, epsg_list: list[int]) -> None: + summaries = ProjectionExtension.summaries(collection, add_if_missing=True) + summaries.epsg = epsg_list + + +def add_links_to_collection(collection: pystac.Collection, link_list: list[pystac.Link]) -> None: + for link in link_list: + collection.links.append(link) + + +def add_assets_to_collection(collection: pystac.Collection, asset_dict: dict[str, pystac.Asset]) -> None: + for key, asset in asset_dict.items(): + collection.add_asset(key, asset) + + +def create_collection(euhydro_root: str) -> pystac.Collection: + try: + collection = pystac.Collection( + id=COLLECTION_ID, + description=COLLECTION_DESCRIPTION, + extent=COLLECTION_EXTENT, + title=COLLECTION_TITLE, + keywords=COLLECTION_KEYWORDS, + providers=[EUHYDRO_HOST_AND_LICENSOR], + ) + + # summaries + epsg_list = [3035] + add_summaries_to_collection(collection, epsg_list) + + # links + link_list = [CLMS_LICENSE] + add_links_to_collection(collection, link_list) + + # assets + assets = collect_assets(euhydro_root) + add_assets_to_collection(collection, assets) + + # update links + collection.set_self_href(os.path.join(WORKING_DIR, f"{STAC_DIR}/{COLLECTION_ID}/{collection.id}.json")) + catalog = pystac.read_file(f"{WORKING_DIR}/{STAC_DIR}/clms_catalog.json") + collection.set_root(catalog) + collection.set_parent(catalog) + except Exception as error: + raise CollectionCreationError(f"Reason: {error}") + return collection + + +def create_euhydro_collection(euhydro_root: str) -> None: + try: + collection = create_collection(euhydro_root) + validator = get_stac_validator("schema/products/eu-hydro.json") + error_msg = best_match(validator.iter_errors(collection.to_dict())) + assert error_msg is None, f"Failed to create {collection.id} collection. Reason: {error_msg}." + collection.save_object() + except (AssertionError, CollectionCreationError) as error: + LOGGER.error(error) diff --git a/scripts/euhydro/constants.py b/scripts/euhydro/constants.py new file mode 100644 index 0000000..4795cd2 --- /dev/null +++ b/scripts/euhydro/constants.py @@ -0,0 +1,50 @@ +import os +from datetime import datetime +from typing import Final + +import pystac +from pystac.link import Link +from pystac.provider import ProviderRole + +COLLECTION_ID = "eu-hydro" +COLLECTION_DESCRIPTION = ( + "EU-Hydro is a dataset for all EEA38 countries and the United Kingdom providing photo-interpreted river network," + " consistent of surface interpretation of water bodies (lakes and wide rivers), and a drainage model (also called" + " Drainage Network), derived from EU-DEM, with catchments and drainage lines and nodes." +) +COLLECTION_EXTENT = pystac.Extent( + spatial=pystac.SpatialExtent([[-61.906047, -21.482245, 55.935919, 71.409109]]), + temporal=pystac.TemporalExtent([[datetime(year=2006, month=1, day=1), datetime(year=2012, month=12, day=31)]]), +) +COLLECTION_TITLE = "EU-Hydro River Network Database" +COLLECTION_KEYWORDS = [ + "Hydrography", + "Land cover", + "River", + "Environment", + "Ocean", + "Catchment area", + "Land", + "Hydrographic network", + "Drainage system", + "Hydrology", + "Landscape alteration", + "Inland water", + "Canal", + "Drainage", + "Catchment", + "Water body", +] +EUHYDRO_HOST_AND_LICENSOR: Final[pystac.Provider] = pystac.Provider( + name="Copernicus Land Monitoring Service", + description=( + "The Copernicus Land Monitoring Service provides geographical information on land cover and its changes, land" + " use, ground motions, vegetation state, water cycle and Earth's surface energy variables to a broad range of" + " users in Europe and across the World in the field of environmental terrestrial applications." + ), + roles=[ProviderRole.LICENSOR, ProviderRole.HOST], + url="https://land.copernicus.eu", +) +CLMS_LICENSE: Final[Link] = Link(rel="license", target="https://land.copernicus.eu/en/data-policy") +WORKING_DIR = os.getcwd() +STAC_DIR = "stac_tests" diff --git a/stacs/eu-hydro/eu-hydro.json b/stacs/eu-hydro/eu-hydro.json index 945eb2b..e4bffc9 100644 --- a/stacs/eu-hydro/eu-hydro.json +++ b/stacs/eu-hydro/eu-hydro.json @@ -31,7 +31,7 @@ "name": "Copernicus Land Monitoring Service", "description": "The Copernicus Land Monitoring Service provides geographical information on land cover and its changes, land use, ground motions, vegetation state, water cycle and Earth's surface energy variables to a broad range of users in Europe and across the World in the field of environmental terrestrial applications.", "roles": ["licensor", "host"], - "url": "https://land.copernicus.eu/en" + "url": "https://land.copernicus.eu" } ], "extent": { @@ -39,29 +39,29 @@ "bbox": [[-61.906047, -21.482245, 55.935919, 71.409109]] }, "temporal": { - "interval": [["2006-01-01T00:00:00.000Z", "2012-12-31T00:00:00.000Z"]] + "interval": [["2006-01-01T00:00:00Z", "2012-12-31T00:00:00Z"]] } }, "assets": { - "eu-hydro_v1p3_user_guide": { + "EU-HYDRO_V1p3_User_Guide_pdf": { "href": "EU_HYDRO_v13\\fgdb\\euhydro_angerman_v013_FGDB\\Documentation\\EU-HYDRO_V1p3_User_Guide.pdf", "title": "EU Hydro v1.3 user guide", "type": "application/pdf", "roles": ["metadata"] }, - "how_use_esri_fgdb_in_qgis": { + "How_use_ESRI_FGDB_in_QGIS_pdf": { "href": "EU_HYDRO_v13\\fgdb\\euhydro_angerman_v013_FGDB\\Documentation\\How_use_ESRI_FGDB_in_QGIS.pdf", "title": "How use ESRI FGDB in QGIS", "type": "application/pdf", "roles": ["metadata"] }, - "angerman_v013_FGDB": { + "angerman_v013_fgdb": { "href": "EU_HYDRO_v13\\fgdb\\euhydro_angerman_v013_FGDB\\euhydro_angerman_v013.gdb", "title": "Angerman FGDB", "type": "application/x-filegdb", "roles": ["data"] }, - "angerman_v013_GPKG": { + "angerman_v013_gpkg": { "href": "EU_HYDRO_v13\\geopackage\\euhydro_angerman_v013.gpkg", "title": "Angerman GPKG", "type": "application/geopackage+sqlite3", From 62cebebe9b2b8c2f4131f681f2d13cf6aa769a47 Mon Sep 17 00:00:00 2001 From: chorng Date: Mon, 6 May 2024 12:26:19 +0200 Subject: [PATCH 54/80] Add n2k stac collection creation script --- create_n2k_collection.py | 9 ++ schema/products/n2k.json | 6 +- scripts/n2k/__init__.py | 0 scripts/n2k/collection.py | 169 +++++++++++++++++++++++++++++++ scripts/n2k/constants.py | 44 ++++++++ stacs/natura2000/natura2000.json | 12 +-- 6 files changed, 231 insertions(+), 9 deletions(-) create mode 100644 create_n2k_collection.py create mode 100644 scripts/n2k/__init__.py create mode 100644 scripts/n2k/collection.py create mode 100644 scripts/n2k/constants.py diff --git a/create_n2k_collection.py b/create_n2k_collection.py new file mode 100644 index 0000000..7a74691 --- /dev/null +++ b/create_n2k_collection.py @@ -0,0 +1,9 @@ +import logging + +from scripts.n2k.collection import create_n2k_collection + +LOGGER = logging.getLogger(__name__) + +if __name__ == "__main__": + logging.basicConfig(filename="create_n2k_collection.log") + create_n2k_collection("") diff --git a/schema/products/n2k.json b/schema/products/n2k.json index 1663b8f..a8442ab 100644 --- a/schema/products/n2k.json +++ b/schema/products/n2k.json @@ -54,7 +54,7 @@ "name": "Copernicus Land Monitoring Service", "description": "The Copernicus Land Monitoring Service provides geographical information on land cover and its changes, land use, ground motions, vegetation state, water cycle and Earth's surface energy variables to a broad range of users in Europe and across the World in the field of environmental terrestrial applications.", "roles": ["licensor", "host"], - "url": "https://land.copernicus.eu/en" + "url": "https://land.copernicus.eu" } ] }, @@ -71,7 +71,7 @@ ] }, "temporal": { - "const": { "interval": [["2006-01-01T00:00:00.000Z", null]] } + "const": { "interval": [["2006-01-01T00:00:00Z", null]] } } } }, @@ -141,7 +141,7 @@ "assets": { "type": "object", "propertyNames": { - "pattern": "gdb$|gpkg$|zip$|metadata$|arcgis_layer$|qgis_layer$|ogc_layer$" + "pattern": "gdb$|gpkg$|zip$|xml$|lyr$|qml$|sld$" } } } diff --git a/scripts/n2k/__init__.py b/scripts/n2k/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/scripts/n2k/collection.py b/scripts/n2k/collection.py new file mode 100644 index 0000000..fc71bc2 --- /dev/null +++ b/scripts/n2k/collection.py @@ -0,0 +1,169 @@ +from __future__ import annotations + +import json +import logging +import os + +import pystac +from jsonschema import Draft7Validator +from jsonschema.exceptions import best_match +from pystac import MediaType +from pystac.extensions.projection import ProjectionExtension +from referencing import Registry, Resource + +from .constants import ( + CLMS_LICENSE, + COLLECTION_DESCRIPTION, + COLLECTION_EXTENT, + COLLECTION_ID, + COLLECTION_KEYWORDS, + COLLECTION_TITLE, + N2K_HOST_AND_LICENSOR, + STAC_DIR, + WORKING_DIR, +) + +LOGGER = logging.getLogger(__name__) + + +class CollectionCreationError(Exception): + pass + + +def get_stac_validator(product_schema: str) -> Draft7Validator: + with open(product_schema, encoding="utf-8") as f: + schema = json.load(f) + registry = Registry().with_resources( + [("http://example.com/schema.json", Resource.from_contents(schema))], + ) + return Draft7Validator({"$ref": "http://example.com/schema.json"}, registry=registry) + + +def get_files(n2k_root: str, file_extension: str) -> list[str]: + files = [] + for dirpath, _, filenames in os.walk(n2k_root): + files += [os.path.join(dirpath, filename) for filename in filenames if filename.endswith(f".{file_extension}")] + return files + + +def get_gdb(n2k_root: str) -> list[str]: + gdb_dirs = [] + for dirpath, dirnames, _ in os.walk(n2k_root): + if dirnames: + gdb_dirs += [os.path.join(dirpath, dirname) for dirname in dirnames if dirname.endswith(".gdb")] + return gdb_dirs + + +def create_asset(filename, asset_path): + extension = filename.split(".")[-1] + asset_id = filename.replace(".", "_") + year = filename.split("_")[1] + file_format = asset_id.split("_")[-2].upper() + media_type_map = { + "zip": "application/zip", + "gpkg": MediaType.GEOPACKAGE, + "gdb": "application/x-filegdb", + "xml": MediaType.XML, + "lyr": "application/octet-stream", + "qml": "application/octet-stream", + "sld": "application/octet-stream", + } + title_map = { + "zip": f"Compressed Natura 2000 {year} Land Cover/Land Use Status {file_format}", + "gpkg": f"Natura 2000 {year} Land Cover/Land Use Status {file_format}", + "gdb": f"Natura 2000 {year} Land Cover/Land Use Status {file_format}", + "xml": f"Natura 2000 {year} Land Cover/Land Use Status Metadata", + "lyr": f"Natura 2000 {year} Land Cover/Land Use Status ArcGIS Layer File", + "qml": f"Natura 2000 {year} Land Cover/Land Use Status QGIS Layer File", + "sld": f"Natura 2000 {year} Land Cover/Land Use Status OGC Layer File", + } + role_map = { + "zip": ["data"], + "gpkg": ["data"], + "gdb": ["data"], + "xml": ["metadata"], + "lyr": ["metadata"], + "qml": ["metadata"], + "sld": ["metadata"], + } + return asset_id, pystac.Asset( + href=asset_path, media_type=media_type_map[extension], title=title_map[extension], roles=role_map[extension] + ) + + +def collect_assets(n2k_root: str) -> list[str]: + asset_list = ( + get_files(n2k_root, "xml") + + get_files(n2k_root, "lyr") + + get_files(n2k_root, "qml") + + get_files(n2k_root, "sld") + + get_files(n2k_root, "gpkg") + + get_gdb(n2k_root) + + get_files(n2k_root, "zip") + ) + assets = {} + for asset_path in asset_list: + _, tail = os.path.split(asset_path) + asset_id, asset = create_asset(tail, asset_path) + if asset_id not in assets: + assets[asset_id] = asset + return assets + + +def add_summaries_to_collection(collection: pystac.Collection, epsg_list: list[int]) -> None: + summaries = ProjectionExtension.summaries(collection, add_if_missing=True) + summaries.epsg = epsg_list + + +def add_links_to_collection(collection: pystac.Collection, link_list: list[pystac.Link]) -> None: + for link in link_list: + collection.links.append(link) + + +def add_assets_to_collection(collection: pystac.Collection, asset_dict: dict[str, pystac.Asset]) -> None: + for key, asset in asset_dict.items(): + collection.add_asset(key, asset) + + +def create_collection(n2k_root: str) -> pystac.Collection: + try: + collection = pystac.Collection( + id=COLLECTION_ID, + description=COLLECTION_DESCRIPTION, + extent=COLLECTION_EXTENT, + title=COLLECTION_TITLE, + keywords=COLLECTION_KEYWORDS, + providers=[N2K_HOST_AND_LICENSOR], + ) + + # summaries + epsg_list = [3035] + add_summaries_to_collection(collection, epsg_list) + + # links + link_list = [CLMS_LICENSE] + add_links_to_collection(collection, link_list) + + # assets + assets = collect_assets(n2k_root) + add_assets_to_collection(collection, assets) + + # update links + collection.set_self_href(os.path.join(WORKING_DIR, f"{STAC_DIR}/{COLLECTION_ID}/{collection.id}.json")) + catalog = pystac.read_file(f"{WORKING_DIR}/{STAC_DIR}/clms_catalog.json") + collection.set_root(catalog) + collection.set_parent(catalog) + except Exception as error: + raise CollectionCreationError(f"Reasom: {error}") + return collection + + +def create_n2k_collection(n2k_root: str) -> None: + try: + collection = create_collection(n2k_root) + validator = get_stac_validator("schema/products/n2k.json") + error_msg = best_match(validator.iter_errors(collection.to_dict())) + assert error_msg is None, f"Failed to create {collection.id} collection. Reason: {error_msg}." + collection.save_object() + except (AssertionError, CollectionCreationError) as error: + LOGGER.error(error) diff --git a/scripts/n2k/constants.py b/scripts/n2k/constants.py new file mode 100644 index 0000000..2f00773 --- /dev/null +++ b/scripts/n2k/constants.py @@ -0,0 +1,44 @@ +import os +from datetime import datetime +from typing import Final + +import pystac +from pystac.link import Link +from pystac.provider import ProviderRole + +COLLECTION_ID = "natura2000" +COLLECTION_DESCRIPTION = ( + "The Copernicus Land Cover/Land Use (LC/LU) status map as part of the Copernicus Land Monitoring Service (CLMS)" + " Local Component, tailored to the needs of biodiversity monitoring in selected Natura2000 sites (4790 sites of" + " natural and semi-natural grassland formations listed in Annex I of the Habitats Directive) including a 2km buffer" + " zone surrounding the sites and covering an area of 631.820 km² across Europe. LC/LU is extracted from VHR" + " satellite data and other available data." +) +COLLECTION_EXTENT = pystac.Extent( + spatial=pystac.SpatialExtent([[-16.82, 27.87, 33.17, 66.79]]), + temporal=pystac.TemporalExtent([[datetime(year=2006, month=1, day=1), None]]), +) +COLLECTION_TITLE = "Natura 2000 Land Cover/Land Use Status" +COLLECTION_KEYWORDS = [ + "Copernicus", + "Satellite image interpretation", + "Land monitoring", + "Land", + "Landscape alteration", + "Land use", + "Land cover", + "Landscape", +] +N2K_HOST_AND_LICENSOR: Final[pystac.Provider] = pystac.Provider( + name="Copernicus Land Monitoring Service", + description=( + "The Copernicus Land Monitoring Service provides geographical information on land cover and its changes, land" + " use, ground motions, vegetation state, water cycle and Earth's surface energy variables to a broad range of" + " users in Europe and across the World in the field of environmental terrestrial applications." + ), + roles=[ProviderRole.LICENSOR, ProviderRole.HOST], + url="https://land.copernicus.eu", +) +CLMS_LICENSE: Final[Link] = Link(rel="license", target="https://land.copernicus.eu/en/data-policy") +WORKING_DIR = os.getcwd() +STAC_DIR = "stac_tests" diff --git a/stacs/natura2000/natura2000.json b/stacs/natura2000/natura2000.json index 1a76943..055b201 100644 --- a/stacs/natura2000/natura2000.json +++ b/stacs/natura2000/natura2000.json @@ -23,7 +23,7 @@ "name": "Copernicus Land Monitoring Service", "description": "The Copernicus Land Monitoring Service provides geographical information on land cover and its changes, land use, ground motions, vegetation state, water cycle and Earth's surface energy variables to a broad range of users in Europe and across the World in the field of environmental terrestrial applications.", "roles": ["licensor", "host"], - "url": "https://land.copernicus.eu/en" + "url": "https://land.copernicus.eu" } ], "extent": { @@ -31,7 +31,7 @@ "bbox": [[-16.82, 27.87, 33.17, 66.79]] }, "temporal": { - "interval": [["2006-01-01T00:00:00.000Z", null]] + "interval": [["2006-01-01T00:00:00Z", null]] } }, "assets": { @@ -59,25 +59,25 @@ "type": "application/zip", "roles": ["data"] }, - "N2K_2018_3035_v010_metadata": { + "N2K_2018_3035_v010_xml": { "href": "Natura2000\\N2K2018\\N2K_2018_3035_v010_fgdb\\Metadata\\N2K_2018_3035_v010.xml", "title": "Natura 2000 Land Cover/Land Use Status 2018 Metadata", "type": "application/xml", "roles": ["metadata"] }, - "N2K_2018_3035_v010_arcgis_layer": { + "N2K_2018_3035_v010_lyr": { "href": "Natura2000\\N2K2018\\N2K_2018_3035_v010_fgdb\\Symbology\\N2K_LCLU_2018_v1-4-2.lyr", "title": "Natura 2000 Land Cover/Land Use Status 2018 ArcGIS Layer File", "type": "image/tiff; application=geotiff; profile=layer", "roles": ["metadata"] }, - "N2K_2018_3035_v010_qgis_layer": { + "N2K_2018_3035_v010_qml": { "href": "Natura2000\\N2K2018\\N2K_2018_3035_v010_fgdb\\Symbology\\N2K_LCLU_2018_v1-4-2.qml", "title": "Natura 2000 Land Cover/Land Use Status 2018 QGIS Layer File", "type": "image/tiff; application=geotiff; profile=layer", "roles": ["metadata"] }, - "N2K_2018_3035_v010_ogc_layer": { + "N2K_2018_3035_v010_sld": { "href": "Natura2000\\N2K2018\\N2K_2018_3035_v010_fgdb\\Symbology\\N2K_LCLU_2018_v1-4-2.sld", "title": "Natura 2000 Land Cover/Land Use Status 2018 OGC Layer File", "type": "image/tiff; application=geotiff; profile=layer", From 57d305590c11a16f55fed00d5100c39ac980b769 Mon Sep 17 00:00:00 2001 From: Sebastian Boeck Date: Mon, 6 May 2024 13:47:45 +0200 Subject: [PATCH 55/80] get_asset_files now catches readme plus bug fix in legend file handling --- scripts/clc/item.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/scripts/clc/item.py b/scripts/clc/item.py index 4ce3dda..ff67535 100644 --- a/scripts/clc/item.py +++ b/scripts/clc/item.py @@ -32,7 +32,7 @@ COLLECTION_ID ) -def deconstruct_clc_name(filename: str): +def deconstruct_clc_name(filename: str) -> dict[str]: p = re.compile('^(?P[A-Z0-9a-z_]*).(?P.*)$') m = p.search(os.path.basename(filename)) @@ -74,20 +74,23 @@ def get_img_paths(path: str) -> list[str]: def get_asset_files(path: str, clc_name: str) -> list[str]: clc_name_elements = deconstruct_clc_name(clc_name) + id = clc_name_elements['id'] + dom_code = clc_name_elements['DOM_code'] asset_files = [] - for root, dirs, files in os.walk(path): - if not clc_name_elements['DOM_code'] and 'French_DOMs' in root: + for root, _, files in os.walk(path): + if not dom_code and 'French_DOMs' in root: continue - if clc_name_elements['DOM_code'] and ('Legend' in root and not 'French_DOMs' in root): + if dom_code and ('Legend' in root and not 'French_DOMs' in root): continue for file in files: - if (file.startswith(clc_name + '.') or - file.endswith((f'{clc_name_elements["DOM_code"]}.tif.lyr', 'QGIS.txt',)) and - clc_name in file): + if (file.startswith(id + '.') or + file.endswith((f'{dom_code}.tif.lyr', 'QGIS.txt',)) and id.lower() in root or + file == f'readme_{id}.txt'): + asset_files.append(os.path.join(root, file)) return(asset_files) From b01951564e216c6664d1eae3e58bbff72e2a566b Mon Sep 17 00:00:00 2001 From: Sebastian Boeck Date: Mon, 6 May 2024 13:53:03 +0200 Subject: [PATCH 56/80] adds stac_tests --- .gitignore | Bin 1088 -> 1220 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/.gitignore b/.gitignore index 50511a3c5fdd504eed151b9f9532d190a4d0234d..4ef32a5a510c16c749bd1cd45a9e6fdfeb8af1df 100644 GIT binary patch literal 1220 zcmaJ>&2HN;4EEW8-$6ik3Si|j>;c+tK!>g2e*U4c`$ggh zKTB;@q@b$25LW_T61Zs4k!2$zX8W~?DVTOj4WNx>#I`U&uKtcmF;9~N4Ixy z1dG0gzK^U@CLj3hEUJ!8t4FSAVC$DgvX?_QT%a~?kEBTBVp5h+l6p^VFV6h7`~Y#Y1_WWTr^j<8l4N>GS?Rz6 zp`8X3>&^tw3B#P-4Bev%X;M~zJDPxQ1efTRBdJtA|9`%F%(86Nfh2aWVs1W%$$apo zJ3n_2gb(;c^+be}2z9zw#>KNbvVK-L^E-O@S6(3dyTIMlS~G%ppvf~XX|&4qlj=b( zl*zXH0DB1FG}DQL9tIfSM8tec{fyxdi5|dFC*!+%kMRmSa?4{R4g$fz?T)jb$;L;s zl-&ycU5IaF5o;qHct2m!v`30H9jKiAbt3o(raMf~A|BZ3XV*5wczy6_mQir_O&&H4 zG$8Lr)4^U%mKjPJOyowR+nb(Nf*F5WVv&kkZpi#+Rx;AW18wN~2YGl}lAw7-ns<7!cr1++W`Zc9gWL>H{8T z9`o^L=89i0nHuG%U@U3gav!pp%&#in-rw^R^oey{Sd>&FbzRGLA}l|Y*FV|rLs*2e zG9|{Y_zkC*OfDJk5Td)B55fW6PdLA`4wGp{a5+QI=X}3EU_nwV{Sy zjR5Y7-^RF;Z<#PIWobc%P^V!vl{(gJ`Q2uX-1;Ha5D-Lu$yL^534G?!q(+-aD*L!p zD_lI9m}hA@xr>S3P2_T77u--@IX*u;R!Q{o+*ZRXt=U$Hs3&|K!lEhi5d9N^@I*O- z)Wk%phD~3uuQFbJ_ksTLCPMuVb(ni=CgMl569nYRN;k}MAW=u4s`imi6v-(JM}Zti zn%?Yb_0T-~cur_J0rRSR7w9qFcuh=P5~hjTLi4nQg^6-@HCW5bcg4R!!`JXY?Ux($ zaYC~+$@1*$43fadgU{4^e}*M2uJ16h>Kwc&Oapeec99{6GS{7^lbkIk4d^nAo$~Hv z2F&-?a!TkcJg2;+Q}l&JCjKJr_6a0;)QNFm)+=ed*QRXlzF<$ztd19GtJNM<3DK%INB}${-we>fL#BVhK From 97eefc993e393f3e0f8783c8cf83e6a41778d445 Mon Sep 17 00:00:00 2001 From: Sebastian Boeck Date: Mon, 6 May 2024 13:55:08 +0200 Subject: [PATCH 57/80] this time really adds stac_tests --- .gitignore | Bin 1220 -> 1101 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/.gitignore b/.gitignore index 4ef32a5a510c16c749bd1cd45a9e6fdfeb8af1df..26bc096be54d0d51756b422b8b06c5af114c1521 100644 GIT binary patch literal 1101 zcmaJ=O>f*F5WVv&kkZpihD+5SkffDTrO~Rp%B89-jAw1K7!cr1++W`Zc9gWL>H{9$ z4D<13=89i0nHue24Z)(@$MfFSZqu5wS7z-Jy!YI+k%XCIfU zg^NcM^QUA&(MU$buA`VpMy7rX_(!uU1Z3i%yrw-NzE3M2J146t@iF@ z2F&-?a!TkcJg2;+Q}Dtf6MvC*`vj6a>cluO>$S4&Yg0CNU$7@upp)oKqqs#0%S z|L}bXqj4|1_j)e#p~~7|4|J04wkR;#FV|&hD6*{LK*;~Cw*OI3?T%5oWUJ$Cxp!hJ hi3gw8se{8(AQ);jlT9ih;Ybz^YTlh(G_Zv=VbJYL< literal 1220 zcmaJ>&2HN;4EEW8-$6ik3Si|j>;c+tK!>g2e*U4c`$ggh zKTB;@q@b$25LW_T61Zs4k!2$zX8W~?DVTOj4WNx>#I`U&uKtcmF;9~N4Ixy z1dG0gzK^U@CLj3hEUJ!8t4FSAVC$DgvX?_QT%a~?kEBTBVp5h+l6p^VFV6h7`~Y#Y1_WWTr^j<8l4N>GS?Rz6 zp`8X3>&^tw3B#P-4Bev%X;M~zJDPxQ1efTRBdJtA|9`%F%(86Nfh2aWVs1W%$$apo zJ3n_2gb(;c^+be}2z9zw#>KNbvVK-L^E-O@S6(3dyTIMlS~G%ppvf~XX|&4qlj=b( zl*zXH0DB1FG}DQL9tIfSM8tec{fyxdi5|dFC*!+%kMRmSa?4{R4g$fz?T)jb$;L;s zl-&ycU5IaF5o;qHct2m!v`30H9jKiAbt3o(raMf~A|BZ3XV*5wczy6_mQir_O&&H4 zG$8Lr)4^U%mKjPJOyowR+nb(N Date: Mon, 6 May 2024 15:31:01 +0200 Subject: [PATCH 58/80] get_asset_files selects right legend files now; some cosmetic code changes --- scripts/clc/item.py | 49 ++++++++++++++++++++++++++++----------------- 1 file changed, 31 insertions(+), 18 deletions(-) diff --git a/scripts/clc/item.py b/scripts/clc/item.py index ff67535..a0a3605 100644 --- a/scripts/clc/item.py +++ b/scripts/clc/item.py @@ -46,54 +46,67 @@ def deconstruct_clc_name(filename: str) -> dict[str]: m = p.search(filename_split['id']) if m: - return(m.groupdict() | filename_split) + return m.groupdict() | filename_split else: - return(filename_split) + return filename_split def create_asset(filename: str, DOM_code: str) -> pystac.Asset: filename_elements = deconstruct_clc_name(filename) + id = filename_elements['id'] suffix = filename_elements['suffix'].replace('.', '_') + if id.startswith('readme'): + key = 'readme_' + suffix + elif id.endswith('QGIS'): + key = 'legend_' + suffix + else: + key = suffix + label = DOM_DICT[DOM_code] - asset = pystac.Asset(href=filename, title=TITLE_DICT[suffix].format(label=label), media_type=MEDIA_TYPE_DICT[suffix], roles=ROLES_DICT[suffix]) - return(f"{filename_elements['id']}_{suffix}", asset) + asset = pystac.Asset(href=filename, title=TITLE_DICT[key].format(label=label), media_type=MEDIA_TYPE_DICT[key], roles=ROLES_DICT[key]) + return f"{filename_elements['id']}_{suffix}", asset -def get_img_paths(path: str) -> list[str]: +def get_img_paths(data_root: str) -> list[str]: img_paths=[] - for root, dirs, files in os.walk(path): + for root, _, files in os.walk(data_root): if root.endswith(('DATA', 'French_DOMs')): for file in files: if file.endswith('.tif'): img_paths.append(os.path.join(root, file)) - return(img_paths) + return img_paths -def get_asset_files(path: str, clc_name: str) -> list[str]: +def get_asset_files(data_root: str, img_path: str) -> list[str]: - clc_name_elements = deconstruct_clc_name(clc_name) + clc_name_elements = deconstruct_clc_name(img_path) id = clc_name_elements['id'] dom_code = clc_name_elements['DOM_code'] asset_files = [] - for root, _, files in os.walk(path): + for root, _, files in os.walk(data_root): if not dom_code and 'French_DOMs' in root: + continue + + if dom_code and 'Legend' in root and not 'French_DOMs' in root: continue - if dom_code and ('Legend' in root and not 'French_DOMs' in root): + if not 'U{update_campaign}_{theme}{reference_year}_V{release_year}'.format(**clc_name_elements).lower() in root: continue - + for file in files: + if (file.startswith(id + '.') or - file.endswith((f'{dom_code}.tif.lyr', 'QGIS.txt',)) and id.lower() in root or + file.endswith(f'{dom_code}.tif.lyr') or + file.endswith('QGIS.txt',) or file == f'readme_{id}.txt'): asset_files.append(os.path.join(root, file)) - return(asset_files) + return asset_files def project_bbox(src: rio.io.DatasetReader, dst_crs: rio.CRS = rio.CRS.from_epsg(4326)) -> tuple[float]: bbox = rio.warp.transform_bounds(src.crs, dst_crs, *src.bounds) @@ -110,13 +123,13 @@ def project_data_window_bbox(src: rio.io.DatasetReader, dst_crs: rio.CRS = rio.C data_window = rio.windows.get_data_window(data, nodata=src.nodata) bbox = rio.windows.bounds(data_window, transform=transform) - return(bbox) + return bbox -def create_item(img_path: str, root: str) -> pystac.Item: +def create_item(img_path: str, data_root: str) -> pystac.Item: clc_name_elements = deconstruct_clc_name(img_path) - asset_files = get_asset_files(root, clc_name=clc_name_elements['id']) + asset_files = get_asset_files(data_root, img_path) asset_files = [f for f in asset_files if not f.endswith('aux')] year = clc_name_elements.get('reference_year') props = {'description': ITEM_DESCRIPTION.format(year=year), @@ -174,5 +187,5 @@ def create_item(img_path: str, root: str) -> pystac.Item: links = [CLMS_LICENSE, CLMS_CATALOG_LINK, ITEM_PARENT_LINK, COLLECTION_LINK] item.add_links(links) - return(item) + return item From f16f063cc6a6e5278e4b6b959db506fd70ee0acb Mon Sep 17 00:00:00 2001 From: Sebastian Boeck Date: Mon, 6 May 2024 15:33:20 +0200 Subject: [PATCH 59/80] adds readme to dicts and adjusts keys for txt accordingly --- scripts/clc/constants.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/scripts/clc/constants.py b/scripts/clc/constants.py index 1f3d166..3ffc85e 100644 --- a/scripts/clc/constants.py +++ b/scripts/clc/constants.py @@ -74,10 +74,11 @@ 'tif_ovr': 'image/tiff; application=geotiff; profile=pyramid', 'tif_vat_cpg': pystac.MediaType.TEXT, 'tif_vat_dbf': 'application/dbf', - 'txt': pystac.MediaType.TEXT, + 'legend_txt': pystac.MediaType.TEXT, 'tif_lyr': 'image/tiff; application=geotiff; profile=layer', 'tfw': pystac.MediaType.TEXT, 'xml': pystac.MediaType.XML, + 'readme_txt': pystac.MediaType.TEXT, } ROLES_DICT = { @@ -87,10 +88,11 @@ 'tif_ovr': ['metadata'], 'tif_vat_cpg': ['metadata'], 'tif_vat_dbf': ['metadata'], - 'txt': ['metadata'], + 'legend_txt': ['metadata'], 'tif_lyr': ['metadata'], 'tfw': ['metadata'], 'xml': ['metadata'], + 'readme_txt': ['metadata'], } TITLE_DICT = { @@ -100,10 +102,11 @@ 'tif_ovr': 'Pyramid {label}', 'tif_vat_cpg': 'Encoding {label}', 'tif_vat_dbf': 'Database {label}', - 'txt': 'Legends {label}', + 'legend_txt': 'Legends {label}', 'tif_lyr': 'Legend Layer {label}', 'tfw': 'World File {label}', 'xml': 'Single Band Land Classification Metadata {label}', + 'readme_txt': 'Description {label}', } CLC_PROVIDER = pystac.provider.Provider( From 402b35d4c4f7c21541451ffcf06301b07198f4eb Mon Sep 17 00:00:00 2001 From: Sebastian Boeck Date: Mon, 6 May 2024 15:33:45 +0200 Subject: [PATCH 60/80] introduces main() --- create_clc_collection.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/create_clc_collection.py b/create_clc_collection.py index 6facfb6..47a2bcd 100644 --- a/create_clc_collection.py +++ b/create_clc_collection.py @@ -4,7 +4,11 @@ LOGGER = logging.getLogger(__name__) -if __name__ == "__main__": +def main(): logging.basicConfig(filename="create_clc_collection.log") collection = create_collection() - populate_collection(collection, data_root="../CLC_100m") \ No newline at end of file + populate_collection(collection, data_root="../CLC_100m") + + +if __name__ == "__main__": + main() \ No newline at end of file From 48d990cd29182432640e7dfa5c1a83bfca115515 Mon Sep 17 00:00:00 2001 From: chorng Date: Mon, 6 May 2024 19:59:00 +0200 Subject: [PATCH 61/80] Refactor item creation script * package scripts.uabh * move constants to constants.py * fix get_metadata_from_xml * fix get_geom_wgs84 * break create_uabh_item into small functions * add create_uabh_items.py to go over all UABH archive and create items * update schema * update sample --- create_uabh_items.py | 18 ++ schema/products/uabh.json | 11 +- scripts/__init__.py | 0 scripts/uabh/__init__.py | 0 scripts/uabh/constants.py | 39 +++ scripts/uabh/item.py | 272 ++++++++++++++++++ scripts/uabh/test_collection.json | 117 -------- scripts/uabh/test_item.json | 95 ------ scripts/uabh/uabh_item.py | 189 ------------ .../AL001_TIRANA_UA2012_DHM_V020.json | 8 +- 10 files changed, 334 insertions(+), 415 deletions(-) create mode 100644 create_uabh_items.py create mode 100644 scripts/__init__.py create mode 100644 scripts/uabh/__init__.py create mode 100644 scripts/uabh/constants.py create mode 100644 scripts/uabh/item.py delete mode 100644 scripts/uabh/test_collection.json delete mode 100644 scripts/uabh/test_item.json delete mode 100644 scripts/uabh/uabh_item.py diff --git a/create_uabh_items.py b/create_uabh_items.py new file mode 100644 index 0000000..619c533 --- /dev/null +++ b/create_uabh_items.py @@ -0,0 +1,18 @@ +import logging +from glob import glob + +from scripts.uabh.item import create_uabh_item, get_stac_validator + +LOGGER = logging.getLogger(__name__) + + +def main(): + logging.basicConfig(filename="create_uabh_items.log") + validator = get_stac_validator("schema/products/uabh.json") + zip_list = glob("/Users/chung-xianghong/Downloads/uabh_samples/**/*.zip") + for zip_file in zip_list: + create_uabh_item(zip_file, validator) + + +if __name__ == "__main__": + main() diff --git a/schema/products/uabh.json b/schema/products/uabh.json index 616973a..f14b366 100644 --- a/schema/products/uabh.json +++ b/schema/products/uabh.json @@ -142,17 +142,8 @@ }, "assets": { "type": "object", - "minProperties": 4, - "maxProperties": 6, "propertyNames": { - "anyOf": [ - { "const": "dataset" }, - { "const": "quality_check_report" }, - { "const": "metadata" }, - { "const": "quality_control_report" }, - { "type": "string", "pattern": "^pixel_based_info" }, - { "const": "compressed_dataset" } - ] + "pattern": "^(A[LT]|B[AEG]|C[HYZ]|D[EK]|E[ELS]|F[IR]|H[RU]|I[EST]|L[TUV]|M[EKT]|N[LO]|P[LT]|R[OS]|S[EIK]|TR|UK|XK)[0-9]{3}" } } } diff --git a/scripts/__init__.py b/scripts/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/scripts/uabh/__init__.py b/scripts/uabh/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/scripts/uabh/constants.py b/scripts/uabh/constants.py new file mode 100644 index 0000000..e750b94 --- /dev/null +++ b/scripts/uabh/constants.py @@ -0,0 +1,39 @@ +import os +from typing import Final + +import pystac +from pystac.link import Link +from pystac.provider import ProviderRole + +HOST_AND_LICENSOR: Final[pystac.Provider] = pystac.Provider( + name="Copernicus Land Monitoring Service", + description=( + "The Copernicus Land Monitoring Service provides " + "geographical information on land cover and its " + "changes, land use, ground motions, vegetation state, " + "water cycle and Earth's surface energy variables to " + "a broad range of users in Europe and across the " + "World in the field of environmental terrestrial " + "applications." + ), + roles=[ProviderRole.LICENSOR, ProviderRole.HOST], + url="https://land.copernicus.eu", +) + +COLLECTION_ID = "urban-atlas-building-height" + +CLMS_LICENSE: Final[Link] = Link(rel="license", target="https://land.copernicus.eu/en/data-policy") + +WORKING_DIR = os.getcwd() +STAC_DIR = "stac_tests" +CLMS_CATALOG_LINK: Final[Link] = Link( + rel=pystac.RelType.ROOT, target=pystac.STACObject.from_file(os.path.join(WORKING_DIR, "stacs/clms_catalog.json")) +) +COLLECTION_LINK: Final[Link] = Link( + rel=pystac.RelType.COLLECTION, + target=pystac.STACObject.from_file(os.path.join(WORKING_DIR, f"stacs/{COLLECTION_ID}/{COLLECTION_ID}.json")), +) +ITEM_PARENT_LINK: Final[Link] = Link( + rel=pystac.RelType.PARENT, + target=pystac.STACObject.from_file(os.path.join(WORKING_DIR, f"stacs/{COLLECTION_ID}/{COLLECTION_ID}.json")), +) diff --git a/scripts/uabh/item.py b/scripts/uabh/item.py new file mode 100644 index 0000000..8c84c05 --- /dev/null +++ b/scripts/uabh/item.py @@ -0,0 +1,272 @@ +from __future__ import annotations + +import json +import logging +import os +import re +import xml.etree.ElementTree as ETree +from datetime import datetime +from glob import glob + +import pystac +import rasterio as rio +from jsonschema import Draft7Validator +from jsonschema.exceptions import best_match +from pystac.extensions.projection import ProjectionExtension +from pystac.media_type import MediaType +from rasterio.coords import BoundingBox +from rasterio.crs import CRS +from rasterio.warp import transform_bounds +from referencing import Registry, Resource +from shapely.geometry import Polygon, box, mapping + +from .constants import ( + CLMS_CATALOG_LINK, + CLMS_LICENSE, + COLLECTION_ID, + COLLECTION_LINK, + HOST_AND_LICENSOR, + ITEM_PARENT_LINK, + STAC_DIR, + WORKING_DIR, +) + +LOGGER = logging.getLogger(__name__) + + +class ItemCreationError(Exception): + pass + + +def get_metadata_from_tif(root_dir: str, product_id: str) -> tuple[BoundingBox, CRS, int, int]: + tif_path = os.path.join(root_dir, f"Dataset/{product_id}.tif") + with rio.open(tif_path) as tif: + bounds = tif.bounds + crs = tif.crs + height = tif.height + width = tif.width + return (bounds, crs, height, width) + + +def str_to_datetime(datetime_str: str): + year, month, day = datetime_str[0:10].split("-") + return datetime(year=int(year), month=int(month), day=int(day)) + + +def get_namespace(tag: str, xml_string: str) -> str: + return re.search(r"xmlns:" + tag + '="([^"]+)"', xml_string).group(0).split("=")[1][1:-1] + + +def get_metadata_from_xml(xml: str) -> tuple[datetime, datetime, datetime]: + with open(xml, encoding="utf-8") as f: + xml_string = f.read() + gmd_namespace = get_namespace("gmd", xml_string) + gml_namespace = get_namespace("gml", xml_string) + tree = ETree.parse(xml) + root = tree.getroot() + start_datetime = root.findall("".join((".//{", gml_namespace, "}beginPosition")))[0].text # noqa: FLY002 + end_datetime = root.findall("".join((".//{", gml_namespace, "}endPosition")))[0].text # noqa: FLY002 + created = root.findall( + "".join( # noqa: FLY002 + ( + ".//{", + gmd_namespace, + "}CI_DateTypeCode[@codeListValue='creation']....//{", + gmd_namespace, + "}date/*", + ) + ) + )[0].text + return (str_to_datetime(start_datetime), str_to_datetime(end_datetime), str_to_datetime(created)) + + +def get_geom_wgs84(bounds: BoundingBox, crs: CRS) -> Polygon: + bbox = rio.coords.BoundingBox( + *transform_bounds(crs.to_epsg(), 4326, bounds.left, bounds.bottom, bounds.right, bounds.top) + ) + return box(*(bbox.left, bbox.bottom, bbox.right, bbox.top)) + + +def get_description(product_id: str) -> str: + _, city, year, _, version = product_id.split("_") + return f"{year[2:]} {city.title()} building height {version}" + + +def get_files(uabh_root: str, city_code: str, asset_type: str) -> list[str]: + files = [] + for dirpath, _, filenames in os.walk(uabh_root): + files += [ + os.path.join(dirpath, filename) + for filename in filenames + if filename.startswith(city_code) and dirpath.endswith(asset_type) + ] + return files + + +def get_zip(uabh_root: str, city_code: str) -> str: + files = [] + for dirpath, _, filenames in os.walk(uabh_root): + files += [ + os.path.join(dirpath, filename) + for filename in filenames + if filename.startswith(city_code) and filename.endswith(".zip") + ] + return files + + +def collect_assets(uabh_root: str, city_code: str) -> dict[str, pystac.Asset]: + asset_list = ( + get_files(uabh_root, city_code, "Dataset") + + get_files(uabh_root, city_code, "Doc") + + get_files(uabh_root, city_code, "Metadata") + + get_files(uabh_root, city_code, "PixelBasedInfo") + + get_files(uabh_root, city_code, "QC") + + get_zip(uabh_root, city_code) + ) + assets = {} + for asset_path in asset_list: + asset_id, asset = create_asset(asset_path) + assets[asset_id] = asset + return assets + + +def create_asset(asset_path: str) -> tuple[str, pystac.Asset]: + _, tail = os.path.split(asset_path) + asset_id = tail.replace(".", "_") + asset_type = asset_path.split("/")[-2] + extension = tail.split(".")[-1] + media_type_map = { + "tif": MediaType.GEOTIFF, + "xml": MediaType.XML, + "pdf": MediaType.PDF, + "zip": "application/zip", + "shp": "application/octet-stream", + "shx": "application/octet-stream", + "dbf": "application/x-dbf", + "cpg": "text/plain", + "prj": "text/plain", + } + title_map = { + "Dataset": "Building Height Dataset", + "Doc": "Quality Check Report", + "Metadata": "Building Height Dataset Metadata", + "PixelBasedInfo": f"pixel_based_info_{extension}", + "QC": "Quality Control Report", + } + role_map = { + "tif": ["data"], + "xml": ["metadata"], + "pdf": ["metadata"], + "zip": ["data"], + "shp": ["metadata"], + "shx": ["metadata"], + "dbf": ["metadata"], + "cpg": ["metadata"], + "prj": ["metadata"], + } + if extension == "zip": + title = "Compressed Building Height Metadata" + else: + title = title_map[asset_type] + return asset_id, pystac.Asset( + href=asset_path, media_type=media_type_map[extension], title=title, roles=role_map[extension] + ) + + +def create_core_item( + product_id: str, + geometry: Polygon, + start_datetime: datetime, + end_datetime: datetime, + created_datetime: datetime, + description: str, + collection: str, +): + return pystac.Item( + id=product_id, + geometry=mapping(geometry), + bbox=list(geometry.bounds), + datetime=None, + start_datetime=start_datetime, + end_datetime=end_datetime, + properties={ + "created": created_datetime.strftime("%Y-%m-%dT%H:%M:%SZ"), + "description": description, + }, + collection=collection, + ) + + +def add_providers_to_item(item: pystac.Item, provider_list: list[pystac.Provider]) -> None: + item.common_metadata.providers = provider_list + + +def add_projection_extension_to_item(item: pystac.Item, crs: CRS, bounds: BoundingBox, height: int, width: int) -> None: + projection = ProjectionExtension.ext(item, add_if_missing=True) + projection.epsg = crs.to_epsg() + projection.bbox = [int(bounds.left), int(bounds.bottom), int(bounds.right), int(bounds.top)] + projection.shape = [height, width] + + +def add_links_to_item(item: pystac.Item, link_list: list[pystac.Link]) -> None: + for link in link_list: + item.links.append(link) + + +def add_assets_to_item(item: pystac.Item, asset_dict: dict[str, pystac.Asset]) -> None: + for key, asset in asset_dict.items(): + item.add_asset(key, asset) + + +def create_item(zip_path: str) -> pystac.Item: + try: + head, tail = os.path.split(zip_path) + product_id = tail.split(".")[0].upper() + bounds, crs, height, width = get_metadata_from_tif(head, product_id) + xml_path = glob(os.path.join(head, "Metadata", f"{product_id.split('_')[0]}*.xml"))[0] + start_datetime, end_datetime, created_datetime = get_metadata_from_xml(xml_path) + geom_wgs84 = get_geom_wgs84(bounds, crs) + description = get_description(product_id) + + # create core item + item = create_core_item( + product_id, geom_wgs84, start_datetime, end_datetime, created_datetime, description, COLLECTION_ID + ) + + # common metadata + provider_list = [HOST_AND_LICENSOR] + add_providers_to_item(item, provider_list) + + # extensions + add_projection_extension_to_item(item, crs, bounds, height, width) + + # links + link_list = [CLMS_LICENSE, CLMS_CATALOG_LINK, ITEM_PARENT_LINK, COLLECTION_LINK] + add_links_to_item(item, link_list) + + # assets + asset_dict = collect_assets(head, product_id.split("_")[0]) + add_assets_to_item(item, asset_dict) + except Exception as error: + raise ItemCreationError(error) + return item + + +def create_uabh_item(zip_path: str, validator: Draft7Validator) -> None: + try: + item = create_item(zip_path) + item.set_self_href(os.path.join(WORKING_DIR, f"{STAC_DIR}/{COLLECTION_ID}/{item.id}/{item.id}.json")) + error_msg = best_match(validator.iter_errors(item.to_dict())) + assert error_msg is None, f"Failed to create {item.id} item. Reason: {error_msg}." + item.save_object() + except (AssertionError, ItemCreationError) as error: + LOGGER.error(error) + + +def get_stac_validator(product_schema: str) -> Draft7Validator: + with open(product_schema, encoding="utf-8") as f: + schema = json.load(f) + registry = Registry().with_resources( + [("http://example.com/schema.json", Resource.from_contents(schema))], + ) + return Draft7Validator({"$ref": "http://example.com/schema.json"}, registry=registry) diff --git a/scripts/uabh/test_collection.json b/scripts/uabh/test_collection.json deleted file mode 100644 index 04a3357..0000000 --- a/scripts/uabh/test_collection.json +++ /dev/null @@ -1,117 +0,0 @@ -{ - "type": "Collection", - "id": "urban-atlas-building-height", - "stac_version": "1.0.0", - "description": "Urban Atlas building height over capital cities.", - "links": [ - { - "rel": "root", - "href": "/Users/xiaomanhuang/pl/ETCDI_STAC/clms-stac-1/scripts/vabh/test_collection.json", - "type": "application/json", - "title": "Urban Atlas Building Height 10m" - }, - { - "rel": "license", - "href": "https://land.copernicus.eu/en/data-policy", - "title": "Legal notice on the use of CLMS data" - }, - { - "rel": "root", - "href": "/Users/xiaomanhuang/pl/ETCDI_STAC/clms-stac-1/stacs/clms_catalog.json", - "title": "CLMS Catalog" - }, - { - "rel": "parent", - "href": "/Users/xiaomanhuang/pl/ETCDI_STAC/clms-stac-1/stacs/clms_catalog.json", - "title": "CLMS Catalog" - }, - { - "rel": "self", - "href": "/Users/xiaomanhuang/pl/ETCDI_STAC/clms-stac-1/scripts/vabh/test_collection.json", - "type": "application/json" - } - ], - "stac_extensions": [ - "https://stac-extensions.github.io/item-assets/v1.0.0/schema.json", - "https://stac-extensions.github.io/projection/v1.1.0/schema.json" - ], - "item_assets": { - "dataset": { - "title": "Building height raster", - "media_type": "image/tiff; application=geotiff", - "roles": ["data"] - }, - "quality_check_report": { - "title": "Quality check report", - "media_type": "application/pdf", - "roles": ["metadata"] - }, - "metadata": { - "title": "Metadata", - "media_type": "application/xml", - "roles": ["metadata"] - }, - "quality_control_report": { - "title": "Quality control report", - "media_type": "application/pdf", - "roles": ["metadata"] - }, - "pixel_based_info_shp": { - "title": "Pixel based info shape format", - "media_type": "application/octet-stream", - "roles": ["metadata"] - }, - "pixel_based_info_shx": { - "title": "Pixel based info shape index", - "media_type": "application/octet-stream", - "roles": ["metadata"] - }, - "pixel_based_info_dbf": { - "title": "Pixel based info attribute", - "media_type": "application/x-dbf", - "roles": ["metadata"] - }, - "pixel_based_info_prj": { - "title": "Pixel based info projection description", - "media_type": "text/plain", - "roles": ["metadata"] - }, - "pixel_based_info_cpg": { - "title": "Pixel based info character encoding", - "media_type": "text/plain", - "roles": ["metadata"] - }, - "compressed_dataset": { - "title": "Compressed building height raster", - "media_type": "application/zip", - "roles": ["data"] - } - }, - "title": "Urban Atlas Building Height 10m", - "extent": { - "spatial": { - "bbox": [ - [ - -21.210399013454868, 62.99044383484405, -20.96981298030872, - 63.339366607232876 - ] - ] - }, - "temporal": { - "interval": [["2012-01-01T00:00:00Z", null]] - } - }, - "license": "proprietary", - "keywords": ["Buildings", "Building height", "Elevation"], - "providers": [ - { - "name": "Copernicus Land Monitoring Service", - "description": "The Copernicus Land Monitoring Service provides geographical information on land cover and its changes, land use, ground motions, vegetation state, water cycle and Earth's surface energy variables to a broad range of users in Europe and across the World in the field of environmental terrestrial applications.", - "roles": ["licensor", "host"], - "url": "https://land.copernicus.eu" - } - ], - "summaries": { - "proj:epsg": [3035] - } -} diff --git a/scripts/uabh/test_item.json b/scripts/uabh/test_item.json deleted file mode 100644 index 9ce11a2..0000000 --- a/scripts/uabh/test_item.json +++ /dev/null @@ -1,95 +0,0 @@ -{ - "type": "Feature", - "stac_version": "1.0.0", - "id": "AT001_WIEN_UA2012_DHM_v020", - "properties": { - "created": "2017-12-29T00:00:00Z", - "description": "2012 Wien building height", - "start_datetime": "2011-04-25T00:00:00Z", - "end_datetime": "2014-03-05T00:00:00Z", - "providers": [ - { - "name": "Copernicus Land Monitoring Service", - "description": "The Copernicus Land Monitoring Service provides geographical information on land cover and its changes, land use, ground motions, vegetation state, water cycle and Earth's surface energy variables to a broad range of users in Europe and across the World in the field of environmental terrestrial applications.", - "roles": ["licensor", "host"], - "url": "https://land.copernicus.eu" - } - ], - "proj:epsg": 3035, - "proj:bbox": [4780170, 2793000, 4809960, 2821690], - "proj:shape": [2869, 2979], - "datetime": null - }, - "geometry": { - "type": "Polygon", - "coordinates": [ - [ - [-20.96981298030872, 62.99044383484405], - [-20.96981298030872, 63.339366607232876], - [-21.210399013454868, 63.339366607232876], - [-21.210399013454868, 62.99044383484405], - [-20.96981298030872, 62.99044383484405] - ] - ] - }, - "links": [ - { - "rel": "license", - "href": "https://land.copernicus.eu/en/data-policy" - }, - { - "rel": "root", - "href": "/Users/xiaomanhuang/pl/ETCDI_STAC/clms-stac-1/stacs/clms_catalog.json", - "title": "CLMS Catalog" - }, - { - "rel": "parent", - "href": "/Users/xiaomanhuang/pl/ETCDI_STAC/clms-stac-1/stacs/urban-atlas-building-height/urban-atlas-building-height.json", - "title": "Urban Atlas Building Height 10m" - }, - { - "rel": "collection", - "href": "/Users/xiaomanhuang/pl/ETCDI_STAC/clms-stac-1/stacs/urban-atlas-building-height/urban-atlas-building-height.json", - "title": "Urban Atlas Building Height 10m" - }, - { - "rel": "self", - "href": "/Users/xiaomanhuang/pl/ETCDI_STAC/clms-stac-1/scripts/vabh/test_item.json", - "type": "application/json" - } - ], - "assets": { - "dataset": { - "href": "/Users/xiaomanhuang/pl/ETCDI_STAC/uabh_samples/AT001_WIEN_UA2012_DHM_v020/Dataset/AT001_WIEN_UA2012_DHM_v020.tif", - "type": "image/tiff; application=geotiff", - "title": "Building Height Dataset", - "roles": ["data"] - }, - "quality_check_report": { - "href": "/Users/xiaomanhuang/pl/ETCDI_STAC/uabh_samples/AT001_WIEN_UA2012_DHM_v020/Doc/AT001_WIEN_UA2012_DHM_QC_Reportv020.pdf", - "type": "application/pdf", - "title": "Quality Check Report", - "roles": ["metadata"] - }, - "metadata": { - "href": "/Users/xiaomanhuang/pl/ETCDI_STAC/uabh_samples/AT001_WIEN_UA2012_DHM_v020/Metadata/AT001_WIEN_UA2012_DHM_metadata_v020.xml", - "type": "application/xml", - "title": "Building Height Dataset Metadata", - "roles": ["metadata"] - }, - "compressed_dataset": { - "href": "/Users/xiaomanhuang/pl/ETCDI_STAC/uabh_samples/AT001_WIEN_UA2012_DHM_v020.zip", - "type": "application/zip", - "title": "Compressed Building Height Metadata", - "roles": ["data"] - } - }, - "bbox": [ - -21.210399013454868, 62.99044383484405, -20.96981298030872, - 63.339366607232876 - ], - "stac_extensions": [ - "https://stac-extensions.github.io/projection/v1.1.0/schema.json" - ], - "collection": "urban-atlas-building-height" -} diff --git a/scripts/uabh/uabh_item.py b/scripts/uabh/uabh_item.py deleted file mode 100644 index 499ea8f..0000000 --- a/scripts/uabh/uabh_item.py +++ /dev/null @@ -1,189 +0,0 @@ -from __future__ import annotations - -import json -import os -import xml.etree.ElementTree as ETree -from datetime import datetime -from typing import Final - -import pystac -import rasterio as rio -from jsonschema import Draft7Validator -from jsonschema.exceptions import best_match -from pyproj import Transformer -from pystac.extensions.projection import ProjectionExtension -from pystac.link import Link -from pystac.provider import ProviderRole -from rasterio.coords import BoundingBox -from rasterio.crs import CRS -from referencing import Registry, Resource -from shapely.geometry import Polygon, box, mapping - -KEY = "/Users/xiaomanhuang/pl/ETCDI_STAC/uabh_samples/AT001_WIEN_UA2012_DHM_v020" -head, tail = os.path.split(KEY) -(product_id, product_version) = tail.rsplit("_", 1) - -PATH_Dataset = os.path.join(KEY, "Dataset/" + tail + ".tif") -PATH_Doc = os.path.join(KEY, "Doc/" + product_id + "_QC_Report" + product_version + ".pdf") -PATH_Metadata = os.path.join(KEY, "Metadata/" + product_id + "_metadata_" + product_version + ".xml") -PATH_Zip = os.path.join(head, tail + ".zip") - -ASSET_dataset = pystac.Asset( - href=PATH_Dataset, - media_type=pystac.MediaType.GEOTIFF, - title="Building Height Dataset", - roles=["data"], -) - -ASSET_quality_check_report = pystac.Asset( - href=PATH_Doc, - media_type=pystac.MediaType.PDF, - title="Quality Check Report", - roles=["metadata"], -) - -ASSET_metadata = pystac.Asset( - href=PATH_Metadata, - media_type=pystac.MediaType.XML, - title="Building Height Dataset Metadata", - roles=["metadata"], -) - -ASSET_compressed_dataset = pystac.Asset( - href=PATH_Zip, - media_type="application/zip", - title="Compressed Building Height Metadata", - roles=["data"], -) - -HOST_AND_LICENSOR: Final[pystac.Provider] = pystac.Provider( - name="Copernicus Land Monitoring Service", - description=( - "The Copernicus Land Monitoring Service provides " - "geographical information on land cover and its " - "changes, land use, ground motions, vegetation state, " - "water cycle and Earth's surface energy variables to " - "a broad range of users in Europe and across the " - "World in the field of environmental terrestrial " - "applications." - ), - roles=[ProviderRole.LICENSOR, ProviderRole.HOST], - url="https://land.copernicus.eu", -) - -COLLECTION_id = "urban-atlas-building-height" - -CLMS_LICENSE: Final[Link] = Link(rel="license", target="https://land.copernicus.eu/en/data-policy") - -WORKING_DIR = os.getcwd() -CLMS_CATALOG_LINK: Final[Link] = Link( - rel=pystac.RelType.ROOT, target=pystac.STACObject.from_file(os.path.join(WORKING_DIR, "stacs/clms_catalog.json")) -) -COLLECTION_LINK: Final[Link] = Link( - rel=pystac.RelType.COLLECTION, - target=pystac.STACObject.from_file(os.path.join(WORKING_DIR, f"stacs/{COLLECTION_id}/{COLLECTION_id}.json")), -) -ITEM_PARENT_LINK: Final[Link] = Link( - rel=pystac.RelType.PARENT, - target=pystac.STACObject.from_file(os.path.join(WORKING_DIR, f"stacs/{COLLECTION_id}/{COLLECTION_id}.json")), -) - - -def get_metadata_from_tif(key: str) -> tuple[BoundingBox, CRS, int, int]: - with rio.open(key) as tif: - bounds = tif.bounds - crs = tif.crs - height = tif.height - width = tif.width - tif.close() - return (bounds, crs, height, width) - - -def str_to_datetime(datetime_str: str): - year, month, day = datetime_str.split("-") - return datetime(year=int(year), month=int(month), day=int(day)) - - -def get_metadata_from_xml(xml: str) -> tuple[datetime, datetime, datetime]: - tree = ETree.parse(xml) - for t in tree.iter("{http://www.opengis.net/gml}beginPosition"): - start_datetime = t.text - for t in tree.iter("{http://www.opengis.net/gml}endPosition"): - end_datetime = t.text - for t in tree.iter("{http://www.isotc211.org/2005/gmd}dateStamp"): - created = t.find("{http://www.isotc211.org/2005/gco}Date").text - - return (str_to_datetime(start_datetime), str_to_datetime(end_datetime), str_to_datetime(created)) - - -def get_geom_wgs84(bounds: BoundingBox, crs: CRS) -> Polygon: - transformer = Transformer.from_crs(crs.to_epsg(), 4326) - miny, minx = transformer.transform(bounds.left, bounds.bottom) - maxy, maxx = transformer.transform(bounds.right, bounds.top) - bbox = (minx, miny, maxx, maxy) - return box(*bbox) - - -def get_description(product_id: str) -> str: - country, city, year, product, version = product_id.split("_") - return f"{year[2:]} {city.title()} building height" - - -def get_stac_validator(product_schema: str) -> Draft7Validator: - with open(product_schema, encoding="utf-8") as f: - schema = json.load(f) - registry = Registry().with_resources( - [("http://example.com/schema.json", Resource.from_contents(schema))], - ) - return Draft7Validator({"$ref": "http://example.com/schema.json"}, registry=registry) - - -if __name__ == "__main__": - head, tail = os.path.split(KEY) - (product_id,) = tail.split(".")[0].rsplit("_", 0) - bounds, crs, height, width = get_metadata_from_tif(PATH_Dataset) - start_datetime, end_datetime, created = get_metadata_from_xml(PATH_Metadata) - geom_wgs84 = get_geom_wgs84(bounds, crs) - description = get_description(product_id) - - item = pystac.Item( - stac_extensions=["https://stac-extensions.github.io/projection/v1.1.0/schema.json"], - id=tail, - geometry=mapping(geom_wgs84), - bbox=list(geom_wgs84.bounds), - datetime=None, - start_datetime=start_datetime, - end_datetime=end_datetime, - properties={ - "created": created.strftime("%Y-%m-%dT%H:%M:%SZ"), - "description": description, - }, - collection=COLLECTION_id, - ) - - item.common_metadata.providers = [HOST_AND_LICENSOR] - - # extensions - projection = ProjectionExtension.ext(item, add_if_missing=True) - projection.epsg = crs.to_epsg() - projection.bbox = [int(bounds.left), int(bounds.bottom), int(bounds.right), int(bounds.top)] - projection.shape = [height, width] - - # links - links = [CLMS_LICENSE, CLMS_CATALOG_LINK, ITEM_PARENT_LINK, COLLECTION_LINK] - for link in links: - item.links.append(link) - - # assets - item.add_asset("dataset", ASSET_dataset) - item.add_asset("quality_check_report", ASSET_quality_check_report) - item.add_asset("metadata", ASSET_metadata) - item.add_asset("compressed_dataset", ASSET_compressed_dataset) - - # item.set_self_href(os.path.join(KEY, f"{tail}.json")) - item.set_self_href("scripts/vabh/test_item.json") - item.save_object() - - # validate - validator = get_stac_validator("./schema/products/uabh.json") - error_msg = best_match(validator.iter_errors(item.to_dict())) diff --git a/stacs/urban-atlas-building-height/AL001_TIRANA_UA2012_DHM_V020/AL001_TIRANA_UA2012_DHM_V020.json b/stacs/urban-atlas-building-height/AL001_TIRANA_UA2012_DHM_V020/AL001_TIRANA_UA2012_DHM_V020.json index 1374b5e..dd5384c 100644 --- a/stacs/urban-atlas-building-height/AL001_TIRANA_UA2012_DHM_V020/AL001_TIRANA_UA2012_DHM_V020.json +++ b/stacs/urban-atlas-building-height/AL001_TIRANA_UA2012_DHM_V020/AL001_TIRANA_UA2012_DHM_V020.json @@ -68,25 +68,25 @@ } ], "assets": { - "dataset": { + "AL001_TIRANA_UA2012_DHM_V020_tif": { "href": "BuildingHeight\\USBBH2012\\Dataset\\AL001_TIRANA_UA2012_DHM_V020.tif", "type": "image/tiff; application=geotiff", "title": "Building Height Dataset", "roles": ["data"] }, - "quality_check_report": { + "AL001_TIRANA_UA2012_DHM_QC_20191031_v020_pdf": { "href": "BuildingHeight\\USBBH2012\\Doc\\AL001_TIRANA_UA2012_DHM_QC_20191031_v020.pdf", "type": "application/pdf", "title": "Quality Check Report", "roles": ["metadata"] }, - "metadata": { + "AL001_TIRANA_UA2012_DHM_metadata_v020_xml": { "href": "BuildingHeight\\USBBH2012\\Metadata\\AL001_TIRANA_UA2012_DHM_metadata_v020.xml", "type": "application/xml", "title": "Building Height Dataset Metadata", "roles": ["metadata"] }, - "compressed_dataset": { + "AL001_TIRANA_UA2012_DHM_v020_zip": { "href": "BuildingHeight\\USBBH2012\\AL001_TIRANA_UA2012_DHM_v020.zip", "type": "application/zip", "title": "Compressed Building Height Metadata", From 07bc9cfb06342b43cc949da528b632bfd28ba543 Mon Sep 17 00:00:00 2001 From: Sebastian Boeck Date: Mon, 6 May 2024 21:32:00 +0200 Subject: [PATCH 62/80] adds functions for creating and adding assets to the collection, improves variable naming --- scripts/clc/collection.py | 78 ++++++++++++++++++++++++++++++--------- 1 file changed, 61 insertions(+), 17 deletions(-) diff --git a/scripts/clc/collection.py b/scripts/clc/collection.py index adc933c..24368d3 100644 --- a/scripts/clc/collection.py +++ b/scripts/clc/collection.py @@ -19,22 +19,56 @@ COLLECTION_KEYWORDS, COLLECTION_TITLE, COLLECTION_LICENSE, - COLLITAS_MEDIA_TYPE_DICT, - COLLITAS_ROLES_DICT, - COLLITAS_TITLE_DICT, + COLLECTION_TITLE_MAP, + COLLECTION_MEDIA_TYPE_MAP, + COLLECTION_ROLES_MAP, + COLLITAS_MEDIA_TYPE_MAP, + COLLITAS_ROLES_MAP, + COLLITAS_TITLE_MAP, CLMS_LICENSE, WORKING_DIR, STAC_DIR ) -from .item import create_item, get_img_paths +from .item import create_item, get_img_paths, deconstruct_clc_name -def proj_epsg_from_item_asset(item): +def proj_epsg_from_item_asset(item: pystac.Item) -> int: for asset_key in item.assets: asset = item.assets[asset_key].to_dict() if 'proj:epsg' in asset.keys(): - return(asset.get('proj:epsg')) - + return asset.get('proj:epsg') + +def get_collection_asset_files(data_root: str) -> list[str]: + + asset_files = [] + + for root, _, files in os.walk(data_root): + + for file in files: + + if ((file.startswith('clc-country-coverage') and file.endswith('pdf')) or + file.startswith('clc-file-naming-convention') or + (file.startswith('readme') and file.endswith('raster.txt'))): + + asset_files.append(os.path.join(root, file)) + + return asset_files + +def create_collection_asset(asset_file: str) -> pystac.Asset: + + filename_elements = deconstruct_clc_name(asset_file) + id = filename_elements['id'] + + if id.startswith('clc-file-naming'): + key = 'clc_file_naming' + elif id.startswith('clc-country-coverage'): + key = 'clc_country_coverage' + elif id.startswith('readme'): + key = 'readme' + + asset = pystac.Asset(href=asset_file, title=COLLECTION_TITLE_MAP[key], media_type=COLLECTION_MEDIA_TYPE_MAP[key], roles=COLLECTION_ROLES_MAP[key]) + return id, asset + def create_collection() -> pystac.Collection: @@ -54,10 +88,10 @@ def create_collection() -> pystac.Collection: item_assets = ItemAssetsExtension.ext(collection, add_if_missing=True) item_assets.item_assets = { - key: AssetDefinition({"title": COLLITAS_TITLE_DICT[key].format(label='').strip(), - "media_type": COLLITAS_MEDIA_TYPE_DICT[key], - "roles": COLLITAS_ROLES_DICT[key]}) - for key in COLLITAS_TITLE_DICT + key: AssetDefinition({"title": COLLITAS_TITLE_MAP[key].format(label='').strip(), + "media_type": COLLITAS_MEDIA_TYPE_MAP[key], + "roles": COLLITAS_ROLES_MAP[key]}) + for key in COLLITAS_TITLE_MAP } collection.add_link(CLMS_LICENSE) @@ -66,12 +100,12 @@ def create_collection() -> pystac.Collection: collection.set_root(catalog) collection.set_parent(catalog) - collection.save_object() - return(collection) + + return collection def populate_collection(collection: pystac.Collection, data_root: str) -> pystac.Collection: - img_paths = get_img_paths(path=data_root) + img_paths = get_img_paths(data_root) proj_epsg = [] for img_path in img_paths: @@ -81,13 +115,23 @@ def populate_collection(collection: pystac.Collection, data_root: str) -> pystac item_epsg = proj_epsg_from_item_asset(item) proj_epsg.append(item_epsg) - item.set_self_href(os.path.join(WORKING_DIR, f"{STAC_DIR}/{COLLECTION_ID}/{item.id}/{item.id}.json")) + DOM_code = deconstruct_clc_name(img_path).get('DOM_code') + href = os.path.join(WORKING_DIR, f"{STAC_DIR}/{COLLECTION_ID}/{item.id.removesuffix(f'_FR_{DOM_code}')}/{item.id}.json") + item.set_self_href(href) item.save_object() + asset_files = get_collection_asset_files(data_root) + + for asset_file in asset_files: + key, asset = create_collection_asset(asset_file) + collection.assets |= {key: asset} + # if not key in collection.assets.keys(): + # collection.add_asset(key, asset) + collection.make_all_asset_hrefs_relative() collection.update_extent_from_items() ProjectionExtension.add_to(collection) collection.summaries = pystac.Summaries({'proj:epsg': list(set(proj_epsg))}) - collection.save_object() - return(collection) \ No newline at end of file + + return collection \ No newline at end of file From 1fe482d3df0ea8668a1dff79d025c0f0cd3fc5aa Mon Sep 17 00:00:00 2001 From: Sebastian Boeck Date: Mon, 6 May 2024 21:33:25 +0200 Subject: [PATCH 63/80] adds dicts for collection assets, improves naming --- scripts/clc/constants.py | 33 ++++++++++++++++++++++++++------- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/scripts/clc/constants.py b/scripts/clc/constants.py index 3ffc85e..f82b3a2 100644 --- a/scripts/clc/constants.py +++ b/scripts/clc/constants.py @@ -21,7 +21,26 @@ COLLECTION_KEYWORDS = ["clms", "corine", "derived data", "land cover", "machine learning", "open data"] COLLECTION_LICENSE = 'proprietary' -COLLITAS_TITLE_DICT = { + +COLLECTION_TITLE_MAP = { + 'clc_country_coverage': 'Coverage', + 'clc_file_naming': 'Naming Convention Description', + 'readme': 'Description', +} + +COLLECTION_MEDIA_TYPE_MAP = { + 'clc_country_coverage': pystac.MediaType.PDF, + 'clc_file_naming': pystac.MediaType.TEXT, + 'readme': pystac.MediaType.TEXT, +} + +COLLECTION_ROLES_MAP = { + 'clc_country_coverage': ['metadata'], + 'clc_file_naming': ['metadata'], + 'readme': ['metadata'], +} + +COLLITAS_TITLE_MAP = { 'clc_map': 'Corine Land Cover Map', 'clc_map_statistics': 'Corine Land Cover Map Statistics', 'clc_map_pyramid': 'Pyramid', @@ -32,7 +51,7 @@ 'clc_map_metadata': 'Corine Land Cover Map Metadata', } -COLLITAS_MEDIA_TYPE_DICT = { +COLLITAS_MEDIA_TYPE_MAP = { 'clc_map': pystac.MediaType.COG, 'clc_map_statistics': pystac.MediaType.XML, 'clc_map_pyramid': 'image/tiff; application=geotiff; profile=pyramid', @@ -43,7 +62,7 @@ 'clc_map_metadata': pystac.MediaType.XML, } -COLLITAS_ROLES_DICT = { +COLLITAS_ROLES_MAP = { 'clc_map': ['data'], 'clc_map_statistics': ['metadata'], 'clc_map_pyramid': ['metadata'], @@ -58,7 +77,7 @@ CLMS_LICENSE = pystac.link.Link(rel='LICENSE', target="https://land.copernicus.eu/en/data-policy") -DOM_DICT = { +DOM_MAP = { 'GLP': 'Guadeloupe', 'GUF': 'French Guyana', 'MTQ': 'Martinique', @@ -67,7 +86,7 @@ '': 'Europe', } -MEDIA_TYPE_DICT = { +ITEM_MEDIA_TYPE_MAP = { 'tif': pystac.MediaType.COG, 'tif_xml': pystac.MediaType.XML, 'tif_aux_xml': pystac.MediaType.XML, @@ -81,7 +100,7 @@ 'readme_txt': pystac.MediaType.TEXT, } -ROLES_DICT = { +ITEM_ROLES_MAP = { 'tif': ['data', 'visual'], 'tif_xml': ['metadata'], 'tif_aux_xml': ['metadata'], @@ -95,7 +114,7 @@ 'readme_txt': ['metadata'], } -TITLE_DICT = { +ITEM_TITLE_MAP = { 'tif': 'Single Band Land Classification {label}', 'tif_xml': 'TIFF Metadata {label}', 'tif_aux_xml': 'TIFF Statistics {label}', From 061ef3f11ab01f662a173e8d4709cc9b6bfa7fd8 Mon Sep 17 00:00:00 2001 From: Sebastian Boeck Date: Mon, 6 May 2024 21:38:19 +0200 Subject: [PATCH 64/80] makes deconstruct_clc_name regex split on dot, reproject raster for Europe only (else bbox) --- scripts/clc/item.py | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/scripts/clc/item.py b/scripts/clc/item.py index a0a3605..905f278 100644 --- a/scripts/clc/item.py +++ b/scripts/clc/item.py @@ -20,10 +20,10 @@ # from .constants import * from .constants import ( - DOM_DICT, - TITLE_DICT, - MEDIA_TYPE_DICT, - ROLES_DICT, + DOM_MAP, + ITEM_TITLE_MAP, + ITEM_MEDIA_TYPE_MAP, + ITEM_ROLES_MAP, ITEM_DESCRIPTION, CLC_PROVIDER, CLMS_LICENSE, @@ -32,8 +32,9 @@ COLLECTION_ID ) + def deconstruct_clc_name(filename: str) -> dict[str]: - p = re.compile('^(?P[A-Z0-9a-z_]*).(?P.*)$') + p = re.compile('^(?P[A-Z0-9a-z_-]*)\\.(?P.*)$') m = p.search(os.path.basename(filename)) filename_split = m.groupdict() @@ -51,8 +52,8 @@ def deconstruct_clc_name(filename: str) -> dict[str]: return filename_split -def create_asset(filename: str, DOM_code: str) -> pystac.Asset: - filename_elements = deconstruct_clc_name(filename) +def create_item_asset(asset_file: str, DOM_code: str) -> pystac.Asset: + filename_elements = deconstruct_clc_name(asset_file) id = filename_elements['id'] suffix = filename_elements['suffix'].replace('.', '_') @@ -63,9 +64,9 @@ def create_asset(filename: str, DOM_code: str) -> pystac.Asset: else: key = suffix - label = DOM_DICT[DOM_code] + label = DOM_MAP[DOM_code] - asset = pystac.Asset(href=filename, title=TITLE_DICT[key].format(label=label), media_type=MEDIA_TYPE_DICT[key], roles=ROLES_DICT[key]) + asset = pystac.Asset(href=asset_file, title=ITEM_TITLE_MAP[key].format(label=label), media_type=ITEM_MEDIA_TYPE_MAP[key], roles=ITEM_ROLES_MAP[key]) return f"{filename_elements['id']}_{suffix}", asset def get_img_paths(data_root: str) -> list[str]: @@ -79,7 +80,7 @@ def get_img_paths(data_root: str) -> list[str]: return img_paths -def get_asset_files(data_root: str, img_path: str) -> list[str]: +def get_item_asset_files(data_root: str, img_path: str) -> list[str]: clc_name_elements = deconstruct_clc_name(img_path) id = clc_name_elements['id'] @@ -129,7 +130,7 @@ def create_item(img_path: str, data_root: str) -> pystac.Item: clc_name_elements = deconstruct_clc_name(img_path) - asset_files = get_asset_files(data_root, img_path) + asset_files = get_item_asset_files(data_root, img_path) asset_files = [f for f in asset_files if not f.endswith('aux')] year = clc_name_elements.get('reference_year') props = {'description': ITEM_DESCRIPTION.format(year=year), @@ -139,8 +140,10 @@ def create_item(img_path: str, data_root: str) -> pystac.Item: with rio.open(img_path) as img: - #bbox = project_bbox(img) - bbox = project_data_window_bbox(img) + if clc_name_elements['DOM_code']: + bbox = project_bbox(img) + else: + bbox = project_data_window_bbox(img) params = { 'id': clc_name_elements.get('id'), @@ -156,7 +159,7 @@ def create_item(img_path: str, data_root: str) -> pystac.Item: for asset_file in asset_files: try: - key, asset = create_asset(asset_file, DOM_code=clc_name_elements.get('DOM_code')) + key, asset = create_item_asset(asset_file, DOM_code=clc_name_elements.get('DOM_code')) item.add_asset( key=key, asset=asset, From 0253e83f07cca3b13003bf561ad588a0ac38e2b3 Mon Sep 17 00:00:00 2001 From: Sebastian Boeck Date: Mon, 6 May 2024 21:56:13 +0200 Subject: [PATCH 65/80] dev notebooks prior to removal --- scripts/clc/clms_collection_generator.ipynb | 155 +- scripts/clc/clms_item_generator.ipynb | 1759 ++++--------------- scripts/clc/reproject_data_bounds.ipynb | 112 ++ 3 files changed, 477 insertions(+), 1549 deletions(-) create mode 100644 scripts/clc/reproject_data_bounds.ipynb diff --git a/scripts/clc/clms_collection_generator.ipynb b/scripts/clc/clms_collection_generator.ipynb index 5cd107e..46fd7a3 100644 --- a/scripts/clc/clms_collection_generator.ipynb +++ b/scripts/clc/clms_collection_generator.ipynb @@ -83,7 +83,6 @@ "metadata": {}, "outputs": [], "source": [ - "\n", "data_root = '../CLC_100m'" ] }, @@ -93,127 +92,59 @@ "metadata": {}, "outputs": [], "source": [ + "def create_collection(data_root: str):\n", "\n", + " sp_extent = pystac.SpatialExtent([None, None, None, None])\n", + " tmp_extent = pystac.TemporalExtent([datetime(1990, 1, 1, microsecond=0, tzinfo=UTC), None])\n", + " extent = pystac.Extent(sp_extent, tmp_extent)\n", "\n", - "sp_extent = pystac.SpatialExtent([None, None, None, None])\n", - "tmp_extent = pystac.TemporalExtent([datetime(1990, 1, 1, microsecond=0, tzinfo=UTC), None])\n", - "extent = pystac.Extent(sp_extent, tmp_extent)\n", + " collection = pystac.Collection(id=COLLECTION_ID,\n", + " description=COLLECTION_DESCRIPTION,\n", + " title=COLLECTION_TITLE,\n", + " extent=extent,\n", + " keywords=COLLECTION_KEYWORDS,\n", + " license=COLLECTION_LICENSE,\n", + " stac_extensions=[]\n", + " )\n", "\n", - "collection = pystac.Collection(id=COLLECTION_ID,\n", - " description=COLLECTION_DESCRIPTION,\n", - " title=COLLECTION_TITLE,\n", - " extent=extent,\n", - " keywords=COLLECTION_KEYWORDS,\n", - " license=COLLECTION_LICENSE,\n", - " stac_extensions=[]\n", - " )\n", "\n", + " item_assets = ItemAssetsExtension.ext(collection, add_if_missing=True)\n", + " item_assets.item_assets = {\n", + " key: AssetDefinition({\"title\": COLLITAS_TITLE_DICT[key].format(label='').strip(),\n", + " \"media_type\": COLLITAS_MEDIA_TYPE_DICT[key], \n", + " \"roles\": COLLITAS_ROLES_DICT[key]})\n", + " for key in COLLITAS_TITLE_DICT\n", + " }\n", "\n", - "item_assets = ItemAssetsExtension.ext(collection, add_if_missing=True)\n", - "item_assets.item_assets = {\n", - " key: AssetDefinition({\"title\": COLLITAS_TITLE_DICT[key].format(label='').strip(),\n", - " \"media_type\": COLLITAS_MEDIA_TYPE_DICT[key], \n", - " \"roles\": COLLITAS_ROLES_DICT[key]})\n", - " for key in COLLITAS_TITLE_DICT\n", - "}" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [], - "source": [ - "collection.add_link(CLMS_LICENSE)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [], - "source": [ - "collection.set_self_href(os.path.join(WORKING_DIR, f\"{STAC_DIR}/{collection.id}/{collection.id}.json\"))\n", - "catalog = pystac.read_file(f\"{WORKING_DIR}/{STAC_DIR}/clms_catalog.json\")\n", + " collection.add_link(CLMS_LICENSE)\n", + " collection.set_self_href(os.path.join(WORKING_DIR, f\"{STAC_DIR}/{collection.id}/{collection.id}.json\"))\n", + " catalog = pystac.read_file(f\"{WORKING_DIR}/{STAC_DIR}/clms_catalog.json\")\n", "\n", - "collection.set_root(catalog)\n", - "collection.set_parent(catalog)\n", + " collection.set_root(catalog)\n", + " collection.set_parent(catalog)\n", "\n", - "collection.save_object()\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [], - "source": [ - "img_paths = get_img_paths(path=data_root)" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "An error occured: 'tif_vat_dbf_xml'\n", - "An error occured: 'tif_ovr_aux_xml'\n" - ] - } - ], - "source": [ - "proj_epsg = []\n", - "for img_path in img_paths:\n", - " item = create_item(img_path, data_root)\n", - " collection.add_item(item)\n", + " collection.save_object()\n", "\n", - " item_epsg = proj_epsg_from_item_asset(item)\n", - " proj_epsg.append(item_epsg)\n", + " img_paths = get_img_paths(path=data_root)\n", "\n", - " item.set_self_href(os.path.join(WORKING_DIR, f\"{STAC_DIR}/{COLLECTION_ID}/{item.id}/{item.id}.json\"))\n", - " item.save_object()" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "metadata": {}, - "outputs": [], - "source": [ - "collection.make_all_asset_hrefs_relative()\n", - "collection.update_extent_from_items()\n", - "ProjectionExtension.add_to(collection)" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "metadata": {}, - "outputs": [], - "source": [ - "collection.summaries = pystac.Summaries({'proj:epsg': list(set(proj_epsg))})" - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "metadata": {}, - "outputs": [], - "source": [ - "collection.save_object()" + " proj_epsg = []\n", + " for img_path in img_paths:\n", + " item = create_item(img_path, data_root)\n", + " collection.add_item(item)\n", + "\n", + " item_epsg = proj_epsg_from_item_asset(item)\n", + " proj_epsg.append(item_epsg)\n", + "\n", + " item.set_self_href(os.path.join(WORKING_DIR, f\"{STAC_DIR}/{COLLECTION_ID}/{item.id}/{item.id}.json\"))\n", + " item.save_object()\n", + "\n", + " collection.make_all_asset_hrefs_relative()\n", + " collection.update_extent_from_items()\n", + " ProjectionExtension.add_to(collection)\n", + " collection.summaries = pystac.Summaries({'proj:epsg': list(set(proj_epsg))})\n", + "\n", + " collection.save_object()" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { diff --git a/scripts/clc/clms_item_generator.ipynb b/scripts/clc/clms_item_generator.ipynb index 0ea90e1..19d4382 100644 --- a/scripts/clc/clms_item_generator.ipynb +++ b/scripts/clc/clms_item_generator.ipynb @@ -21,6 +21,7 @@ "from datetime import datetime, UTC\n", "\n", "import rasterio as rio\n", + "from rasterio.warp import Resampling\n", "import rasterio.warp\n", "import rasterio.crs\n", "\n", @@ -33,6 +34,16 @@ "execution_count": 2, "metadata": {}, "outputs": [], + "source": [ + "from constants import *\n", + "# from .constants import STAC_DIR, COLLECTION_ID" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [], "source": [ "# img_path = 'X:/EO/u2018_clc2012_v2020_20u1_raster100m/DATA/U2018_CLC2012_V2020_20u1.tif'\n", "# os.path.split(img_path)" @@ -40,108 +51,108 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 41, "metadata": {}, "outputs": [], "source": [ - "# This goes into constants.py ...\n", - "\n", - "STAC_DIR = 'stac_tests'\n", - "COLLECTION_ID = 'corine-land-cover-raster'\n", - "\n", - "DOM_DICT = {\n", - " 'GLP': 'Guadeloupe',\n", - " 'GUF': 'French Guyana',\n", - " 'MTQ': 'Martinique',\n", - " 'MYT': 'Mayotte',\n", - " 'REU': 'Réunion',\n", - " '': 'Europe',\n", - "}\n", - "\n", - "MEDIA_TYPE_DICT = {\n", - " 'tif': pystac.MediaType.COG,\n", - " 'tif_xml': pystac.MediaType.XML,\n", - " 'tif_aux_xml': pystac.MediaType.XML,\n", - " 'tif_ovr': 'image/tiff; application=geotiff; profile=pyramid',\n", - " 'tif_vat_cpg': pystac.MediaType.TEXT,\n", - " 'tif_vat_dbf': 'application/dbf',\n", - " 'txt': pystac.MediaType.TEXT,\n", - " 'tif_lyr': 'image/tiff; application=geotiff; profile=layer',\n", - " 'tfw': pystac.MediaType.TEXT,\n", - " 'xml': pystac.MediaType.XML,\n", - "}\n", - "\n", - "ROLES_DICT = {\n", - " 'tif': ['data', 'visual'],\n", - " 'tif_xml': ['metadata'],\n", - " 'tif_aux_xml': ['metadata'],\n", - " 'tif_ovr': ['metadata'],\n", - " 'tif_vat_cpg': ['metadata'],\n", - " 'tif_vat_dbf': ['metadata'],\n", - " 'txt': ['metadata'],\n", - " 'tif_lyr': ['metadata'],\n", - " 'tfw': ['metadata'],\n", - " 'xml': ['metadata'],\n", - "}\n", - "\n", - "TITLE_DICT = {\n", - " 'tif': 'Single Band Land Classification {label}',\n", - " 'tif_xml': 'TIFF Metadata {label}',\n", - " 'tif_aux_xml': 'TIFF Statistics {label}',\n", - " 'tif_ovr': 'Pyramid {label}',\n", - " 'tif_vat_cpg': 'Encoding {label}',\n", - " 'tif_vat_dbf': 'Database {label}',\n", - " 'txt': 'Legends {label}',\n", - " 'tif_lyr': 'Legend Layer {label}',\n", - " 'tfw': 'World File {label}',\n", - " 'xml': 'Single Band Land Classification Metadata {label}',\n", - "}\n", - "\n", - "CLC_PROVIDER = pystac.provider.Provider(\n", - " name='Copernicus Land Monitoring Service',\n", - " description=('The Copernicus Land Monitoring Service provides '\n", - " 'geographical information on land cover and its '\n", - " 'changes, land use, ground motions, vegetation state, '\n", - " 'water cycle and Earth\\'s surface energy variables to '\n", - " 'a broad range of users in Europe and across the World '\n", - " 'in the field of environmental terrestrial applications.'),\n", - " roles=[ProviderRole.LICENSOR, ProviderRole.HOST],\n", - " url='https://land.copernicus.eu'\n", - ")\n", - "\n", - "\n", - "ITEM_DESCRIPTION = ('Corine Land Cover {year} (CLC{year}) is one of the Corine Land Cover (CLC) ' \n", - " 'datasets produced within the frame the Copernicus Land Monitoring Service '\n", - " 'referring to land cover / land use status of year {year}. '\n", - " 'CLC service has a long-time heritage (formerly known as \\\"CORINE Land Cover Programme\\\"), '\n", - " 'coordinated by the European Environment Agency (EEA). It provides consistent '\n", - " 'and thematically detailed information on land cover and land cover changes across Europe. '\n", - " 'CLC datasets are based on the classification of satellite images produced by the national '\n", - " 'teams of the participating countries - the EEA members and cooperating countries (EEA39). '\n", - " 'National CLC inventories are then further integrated into a seamless land cover map of Europe. '\n", - " 'The resulting European database relies on standard methodology and nomenclature with following '\n", - " 'base parameters: 44 classes in the hierarchical 3-level CLC nomenclature; '\n", - " 'minimum mapping unit (MMU) for status layers is 25 hectares; '\n", - " 'minimum width of linear elements is 100 metres. '\n", - " 'Change layers have higher resolution, i.e. minimum mapping unit (MMU) is 5 hectares '\n", - " 'for Land Cover Changes (LCC), and the minimum width of linear elements is 100 metres. '\n", - " 'The CLC service delivers important data sets supporting the implementation of key priority '\n", - " 'areas of the Environment Action Programmes of the European Union as e.g. protecting ecosystems, '\n", - " 'halting the loss of biological diversity, tracking the impacts of climate change, '\n", - " 'monitoring urban land take, assessing developments in agriculture or dealing with '\n", - " 'water resources directives. CLC belongs to the Pan-European component of the '\n", - " 'Copernicus Land Monitoring Service (https://land.copernicus.eu/), part of the '\n", - " 'European Copernicus Programme coordinated by the European Environment Agency, '\n", - " 'providing environmental information from a combination of air- and space-based observation '\n", - " 'systems and in-situ monitoring. Additional information about CLC product description including '\n", - " 'mapping guides can be found at https://land.copernicus.eu/user-corner/technical-library/. '\n", - " 'CLC class descriptions can be found at '\n", - " 'https://land.copernicus.eu/user-corner/technical-library/corine-land-cover-nomenclature-guidelines/html/.')\n" + "# # This goes into constants.py ...\n", + "\n", + "# STAC_DIR = 'stac_tests'\n", + "# COLLECTION_ID = 'corine-land-cover-raster'\n", + "\n", + "# DOM_DICT = {\n", + "# 'GLP': 'Guadeloupe',\n", + "# 'GUF': 'French Guyana',\n", + "# 'MTQ': 'Martinique',\n", + "# 'MYT': 'Mayotte',\n", + "# 'REU': 'Réunion',\n", + "# '': 'Europe',\n", + "# }\n", + "\n", + "# MEDIA_TYPE_DICT = {\n", + "# 'tif': pystac.MediaType.COG,\n", + "# 'tif_xml': pystac.MediaType.XML,\n", + "# 'tif_aux_xml': pystac.MediaType.XML,\n", + "# 'tif_ovr': 'image/tiff; application=geotiff; profile=pyramid',\n", + "# 'tif_vat_cpg': pystac.MediaType.TEXT,\n", + "# 'tif_vat_dbf': 'application/dbf',\n", + "# 'txt': pystac.MediaType.TEXT,\n", + "# 'tif_lyr': 'image/tiff; application=geotiff; profile=layer',\n", + "# 'tfw': pystac.MediaType.TEXT,\n", + "# 'xml': pystac.MediaType.XML,\n", + "# }\n", + "\n", + "# ROLES_DICT = {\n", + "# 'tif': ['data', 'visual'],\n", + "# 'tif_xml': ['metadata'],\n", + "# 'tif_aux_xml': ['metadata'],\n", + "# 'tif_ovr': ['metadata'],\n", + "# 'tif_vat_cpg': ['metadata'],\n", + "# 'tif_vat_dbf': ['metadata'],\n", + "# 'txt': ['metadata'],\n", + "# 'tif_lyr': ['metadata'],\n", + "# 'tfw': ['metadata'],\n", + "# 'xml': ['metadata'],\n", + "# }\n", + "\n", + "# TITLE_DICT = {\n", + "# 'tif': 'Single Band Land Classification {label}',\n", + "# 'tif_xml': 'TIFF Metadata {label}',\n", + "# 'tif_aux_xml': 'TIFF Statistics {label}',\n", + "# 'tif_ovr': 'Pyramid {label}',\n", + "# 'tif_vat_cpg': 'Encoding {label}',\n", + "# 'tif_vat_dbf': 'Database {label}',\n", + "# 'txt': 'Legends {label}',\n", + "# 'tif_lyr': 'Legend Layer {label}',\n", + "# 'tfw': 'World File {label}',\n", + "# 'xml': 'Single Band Land Classification Metadata {label}',\n", + "# }\n", + "\n", + "# CLC_PROVIDER = pystac.provider.Provider(\n", + "# name='Copernicus Land Monitoring Service',\n", + "# description=('The Copernicus Land Monitoring Service provides '\n", + "# 'geographical information on land cover and its '\n", + "# 'changes, land use, ground motions, vegetation state, '\n", + "# 'water cycle and Earth\\'s surface energy variables to '\n", + "# 'a broad range of users in Europe and across the World '\n", + "# 'in the field of environmental terrestrial applications.'),\n", + "# roles=[ProviderRole.LICENSOR, ProviderRole.HOST],\n", + "# url='https://land.copernicus.eu'\n", + "# )\n", + "\n", + "\n", + "# ITEM_DESCRIPTION = ('Corine Land Cover {year} (CLC{year}) is one of the Corine Land Cover (CLC) ' \n", + "# 'datasets produced within the frame the Copernicus Land Monitoring Service '\n", + "# 'referring to land cover / land use status of year {year}. '\n", + "# 'CLC service has a long-time heritage (formerly known as \\\"CORINE Land Cover Programme\\\"), '\n", + "# 'coordinated by the European Environment Agency (EEA). It provides consistent '\n", + "# 'and thematically detailed information on land cover and land cover changes across Europe. '\n", + "# 'CLC datasets are based on the classification of satellite images produced by the national '\n", + "# 'teams of the participating countries - the EEA members and cooperating countries (EEA39). '\n", + "# 'National CLC inventories are then further integrated into a seamless land cover map of Europe. '\n", + "# 'The resulting European database relies on standard methodology and nomenclature with following '\n", + "# 'base parameters: 44 classes in the hierarchical 3-level CLC nomenclature; '\n", + "# 'minimum mapping unit (MMU) for status layers is 25 hectares; '\n", + "# 'minimum width of linear elements is 100 metres. '\n", + "# 'Change layers have higher resolution, i.e. minimum mapping unit (MMU) is 5 hectares '\n", + "# 'for Land Cover Changes (LCC), and the minimum width of linear elements is 100 metres. '\n", + "# 'The CLC service delivers important data sets supporting the implementation of key priority '\n", + "# 'areas of the Environment Action Programmes of the European Union as e.g. protecting ecosystems, '\n", + "# 'halting the loss of biological diversity, tracking the impacts of climate change, '\n", + "# 'monitoring urban land take, assessing developments in agriculture or dealing with '\n", + "# 'water resources directives. CLC belongs to the Pan-European component of the '\n", + "# 'Copernicus Land Monitoring Service (https://land.copernicus.eu/), part of the '\n", + "# 'European Copernicus Programme coordinated by the European Environment Agency, '\n", + "# 'providing environmental information from a combination of air- and space-based observation '\n", + "# 'systems and in-situ monitoring. Additional information about CLC product description including '\n", + "# 'mapping guides can be found at https://land.copernicus.eu/user-corner/technical-library/. '\n", + "# 'CLC class descriptions can be found at '\n", + "# 'https://land.copernicus.eu/user-corner/technical-library/corine-land-cover-nomenclature-guidelines/html/.')\n" ] }, { "cell_type": "code", - "execution_count": 44, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -201,27 +212,81 @@ "# clc_name = os.path.basename(path).split('.')[0]\n", "# return(clc_name)\n", "\n", - "def get_asset_files(path, clc_name):\n", + "# def get_asset_files(path, clc_name):\n", + "\n", + "# clc_name_elements = deconstruct_clc_name(clc_name)\n", + "\n", + "# asset_files = []\n", + " \n", + "# for root, dirs, files in os.walk(path):\n", + "# if not clc_name_elements['DOM_code'] and 'French_DOMs' in root:\n", + "# continue\n", + " \n", + "# if clc_name_elements['DOM_code'] and ('Legend' in root and not 'French_DOMs' in root):\n", + "# continue\n", + " \n", + "# for file in files:\n", + "# if (file.startswith(clc_name + '.') or \n", + "# file.endswith((f'{clc_name_elements[\"DOM_code\"]}.tif.lyr', 'QGIS.txt',)) and \n", + "# clc_name in file):\n", + "# asset_files.append(os.path.join(root, file))\n", + "\n", + "# return(asset_files)\n", + " \n", + "\n", + "# def get_asset_files(path: str, clc_name: str) -> list[str]:\n", + "\n", + "# clc_name_elements = deconstruct_clc_name(clc_name)\n", + "\n", + "# asset_files = []\n", + " \n", + "# for root, dirs, files in os.walk(path):\n", + "# if not clc_name_elements['DOM_code'] and 'French_DOMs' in root:\n", + "# continue\n", + " \n", + "# if clc_name_elements['DOM_code'] and ('Legend' in root and not 'French_DOMs' in root):\n", + "# continue\n", + " \n", + "# for file in files:\n", + "# # print(root, file)\n", + "# if (file.startswith(clc_name_elements['id'] + '.') or \n", + "# file.endswith((f'{clc_name_elements['DOM_code']}.tif.lyr', 'QGIS.txt',)) and clc_name_elements['id'].lower() in root or\n", + "# file == f'readme_{clc_name_elements['id']}.txt'):\n", + " \n", + "# asset_files.append(os.path.join(root, file)) \n", + "\n", + "# return asset_files\n", + "\n", + "def get_asset_files(data_root: str, img_path: str) -> list[str]:\n", "\n", - " clc_name_elements = deconstruct_clc_name(clc_name)\n", + " clc_name_elements = deconstruct_clc_name(img_path)\n", + " id = clc_name_elements['id']\n", + " dom_code = clc_name_elements['DOM_code']\n", "\n", " asset_files = []\n", " \n", - " for root, dirs, files in os.walk(path):\n", - " if not clc_name_elements['DOM_code'] and 'French_DOMs' in root:\n", + " for root, _, files in os.walk(data_root):\n", + " if not dom_code and 'French_DOMs' in root:\n", + " continue\n", + "\n", + " if dom_code and 'Legend' in root and not 'French_DOMs' in root:\n", " continue\n", " \n", - " if clc_name_elements['DOM_code'] and ('Legend' in root and not 'French_DOMs' in root):\n", + " if not 'U{update_campaign}_{theme}{reference_year}_V{release_year}'.format(**clc_name_elements).lower() in root:\n", " continue\n", - " \n", + " \n", " for file in files:\n", - " if (file.startswith(clc_name + '.') or \n", - " file.endswith((f'{clc_name_elements[\"DOM_code\"]}.tif.lyr', 'QGIS.txt',)) and \n", - " clc_name in file):\n", + "\n", + " if (file.startswith(id + '.') or \n", + " file.endswith(f'{dom_code}.tif.lyr') or \n", + " file.endswith('QGIS.txt',) or \n", + " file == f'readme_{id}.txt'):\n", + "\n", " asset_files.append(os.path.join(root, file))\n", "\n", - " return(asset_files)\n", - " \n", + " return asset_files\n", + "\n", + "\n", "def project_bbox(img, target_epsg=4326):\n", " target_crs = rio.crs.CRS.from_epsg(target_epsg)\n", " bbox_warped = rio.warp.transform_bounds(img.crs, target_crs, *img.bounds)\n", @@ -231,7 +296,7 @@ "\n", " clc_name_elements = deconstruct_clc_name(img_path)\n", "\n", - " asset_files = get_asset_files(root, clc_name=clc_name_elements['id'])\n", + " asset_files = get_asset_files(root, clc_name=os.path.basename(img_path))\n", " asset_files = [f for f in asset_files if not f.endswith('aux')]\n", " year = clc_name_elements.get('reference_year')\n", " props = {'description': ITEM_DESCRIPTION.format(year=year),\n", @@ -269,15 +334,18 @@ " transform=[_ for _ in img.transform] + [0.0, 0.0, 1.0],\n", " )\n", "\n", - " license = pystac.link.Link(rel='LICENSE', target=\"https://land.copernicus.eu/en/data-policy\")\n", - " item.add_link(license)\n", + " # license = pystac.link.Link(rel='LICENSE', target=\"https://land.copernicus.eu/en/data-policy\")\n", + " # item.add_link(license)\n", + " links = [CLMS_LICENSE, CLMS_CATALOG_LINK, ITEM_PARENT_LINK, COLLECTION_LINK]\n", + " item.add_links(links)\n", "\n", - " return(item)" + " return item\n", + "\n" ] }, { "cell_type": "code", - "execution_count": 45, + "execution_count": 4, "metadata": {}, "outputs": [ { @@ -286,7 +354,7 @@ "'x:\\\\projects\\\\ETC-DI\\\\Task_18\\\\clms-stac\\\\scripts\\\\clc'" ] }, - "execution_count": 45, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -297,7 +365,17 @@ }, { "cell_type": "code", - "execution_count": 48, + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "if '':\n", + " print('buh')" + ] + }, + { + "cell_type": "code", + "execution_count": 5, "metadata": {}, "outputs": [ { @@ -306,1342 +384,109 @@ "text": [ "['../../../CLC_100m\\\\u2018_clc2012_v2020_20u1_raster100m\\\\DATA\\\\U2018_CLC2012_V2020_20u1.tif', '../../../CLC_100m\\\\u2018_clc2012_v2020_20u1_raster100m\\\\DATA\\\\French_DOMs\\\\U2018_CLC2012_V2020_20u1_FR_GLP.tif', '../../../CLC_100m\\\\u2018_clc2012_v2020_20u1_raster100m\\\\DATA\\\\French_DOMs\\\\U2018_CLC2012_V2020_20u1_FR_GUF.tif', '../../../CLC_100m\\\\u2018_clc2012_v2020_20u1_raster100m\\\\DATA\\\\French_DOMs\\\\U2018_CLC2012_V2020_20u1_FR_MTQ.tif', '../../../CLC_100m\\\\u2018_clc2012_v2020_20u1_raster100m\\\\DATA\\\\French_DOMs\\\\U2018_CLC2012_V2020_20u1_FR_MYT.tif', '../../../CLC_100m\\\\u2018_clc2012_v2020_20u1_raster100m\\\\DATA\\\\French_DOMs\\\\U2018_CLC2012_V2020_20u1_FR_REU.tif', '../../../CLC_100m\\\\u2018_clc2018_v2020_20u1_raster100m\\\\DATA\\\\U2018_CLC2018_V2020_20u1.tif', '../../../CLC_100m\\\\u2018_clc2018_v2020_20u1_raster100m\\\\DATA\\\\French_DOMs\\\\U2018_CLC2018_V2020_20u1_FR_GLP.tif', '../../../CLC_100m\\\\u2018_clc2018_v2020_20u1_raster100m\\\\DATA\\\\French_DOMs\\\\U2018_CLC2018_V2020_20u1_FR_GUF.tif', '../../../CLC_100m\\\\u2018_clc2018_v2020_20u1_raster100m\\\\DATA\\\\French_DOMs\\\\U2018_CLC2018_V2020_20u1_FR_MTQ.tif', '../../../CLC_100m\\\\u2018_clc2018_v2020_20u1_raster100m\\\\DATA\\\\French_DOMs\\\\U2018_CLC2018_V2020_20u1_FR_MYT.tif', '../../../CLC_100m\\\\u2018_clc2018_v2020_20u1_raster100m\\\\DATA\\\\French_DOMs\\\\U2018_CLC2018_V2020_20u1_FR_REU.tif']\n" ] + } + ], + "source": [ + "root = '../../../CLC_100m'\n", + "\n", + "img_paths = get_img_paths(path=root)\n", + "print(img_paths)\n", + "# for img_path in img_paths:\n", + "\n", + "img_path = img_paths[0]\n", + "\n", + "# get_asset_files(root, deconstruct_clc_name(img_path)['id'])" + ] + }, + { + "cell_type": "code", + "execution_count": 103, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "U2018_CLC2012_V2020_20u1 ../../../CLC_100m\\u2018_clc2012_v2020_20u1_raster100m\\DATA\\U2018_CLC2012_V2020_20u1.tif\n", + "U2018_CLC2012_V2020_20u1.tfw u2018_clc2012_v2020_20u1 ../../../CLC_100m\\u2018_clc2012_v2020_20u1_raster100m\\DATA\n", + "U2018_CLC2012_V2020_20u1.tif u2018_clc2012_v2020_20u1 ../../../CLC_100m\\u2018_clc2012_v2020_20u1_raster100m\\DATA\n", + "U2018_CLC2012_V2020_20u1.tif.aux.xml u2018_clc2012_v2020_20u1 ../../../CLC_100m\\u2018_clc2012_v2020_20u1_raster100m\\DATA\n", + "U2018_CLC2012_V2020_20u1.tif.ovr u2018_clc2012_v2020_20u1 ../../../CLC_100m\\u2018_clc2012_v2020_20u1_raster100m\\DATA\n", + "U2018_CLC2012_V2020_20u1.tif.vat.cpg u2018_clc2012_v2020_20u1 ../../../CLC_100m\\u2018_clc2012_v2020_20u1_raster100m\\DATA\n", + "U2018_CLC2012_V2020_20u1.tif.vat.dbf u2018_clc2012_v2020_20u1 ../../../CLC_100m\\u2018_clc2012_v2020_20u1_raster100m\\DATA\n", + "U2018_CLC2012_V2020_20u1.tif.xml u2018_clc2012_v2020_20u1 ../../../CLC_100m\\u2018_clc2012_v2020_20u1_raster100m\\DATA\n", + "readme_U2018_CLC2012_V2020_20u1.txt u2018_clc2012_v2020_20u1 ../../../CLC_100m\\u2018_clc2012_v2020_20u1_raster100m\\Documents\n", + "CLC2018_CLC2012_V2018_20.tif.lyr u2018_clc2012_v2020_20u1 ../../../CLC_100m\\u2018_clc2012_v2020_20u1_raster100m\\Legend\n", + "CLC2018_CLC2012_V2018_20_QGIS.txt u2018_clc2012_v2020_20u1 ../../../CLC_100m\\u2018_clc2012_v2020_20u1_raster100m\\Legend\n", + "U2018_CLC2012_V2020_20u1.xml u2018_clc2012_v2020_20u1 ../../../CLC_100m\\u2018_clc2012_v2020_20u1_raster100m\\Metadata\n" + ] }, { "data": { "text/plain": [ - "['../../../CLC_100m\\\\u2018_clc2012_v2020_20u1_raster100m\\\\DATA\\\\French_DOMs\\\\U2018_CLC2012_V2020_20u1_FR_MTQ.tfw',\n", - " '../../../CLC_100m\\\\u2018_clc2012_v2020_20u1_raster100m\\\\DATA\\\\French_DOMs\\\\U2018_CLC2012_V2020_20u1_FR_MTQ.tif',\n", - " '../../../CLC_100m\\\\u2018_clc2012_v2020_20u1_raster100m\\\\DATA\\\\French_DOMs\\\\U2018_CLC2012_V2020_20u1_FR_MTQ.tif.aux.xml',\n", - " '../../../CLC_100m\\\\u2018_clc2012_v2020_20u1_raster100m\\\\DATA\\\\French_DOMs\\\\U2018_CLC2012_V2020_20u1_FR_MTQ.tif.ovr',\n", - " '../../../CLC_100m\\\\u2018_clc2012_v2020_20u1_raster100m\\\\DATA\\\\French_DOMs\\\\U2018_CLC2012_V2020_20u1_FR_MTQ.tif.vat.cpg',\n", - " '../../../CLC_100m\\\\u2018_clc2012_v2020_20u1_raster100m\\\\DATA\\\\French_DOMs\\\\U2018_CLC2012_V2020_20u1_FR_MTQ.tif.vat.dbf',\n", - " '../../../CLC_100m\\\\u2018_clc2012_v2020_20u1_raster100m\\\\DATA\\\\French_DOMs\\\\U2018_CLC2012_V2020_20u1_FR_MTQ.tif.xml',\n", - " '../../../CLC_100m\\\\u2018_clc2012_v2020_20u1_raster100m\\\\Metadata\\\\U2018_CLC2012_V2020_20u1_FR_MTQ.xml']" + "['../../../CLC_100m\\\\u2018_clc2012_v2020_20u1_raster100m\\\\DATA\\\\U2018_CLC2012_V2020_20u1.tfw',\n", + " '../../../CLC_100m\\\\u2018_clc2012_v2020_20u1_raster100m\\\\DATA\\\\U2018_CLC2012_V2020_20u1.tif',\n", + " '../../../CLC_100m\\\\u2018_clc2012_v2020_20u1_raster100m\\\\DATA\\\\U2018_CLC2012_V2020_20u1.tif.aux.xml',\n", + " '../../../CLC_100m\\\\u2018_clc2012_v2020_20u1_raster100m\\\\DATA\\\\U2018_CLC2012_V2020_20u1.tif.ovr',\n", + " '../../../CLC_100m\\\\u2018_clc2012_v2020_20u1_raster100m\\\\DATA\\\\U2018_CLC2012_V2020_20u1.tif.vat.cpg',\n", + " '../../../CLC_100m\\\\u2018_clc2012_v2020_20u1_raster100m\\\\DATA\\\\U2018_CLC2012_V2020_20u1.tif.vat.dbf',\n", + " '../../../CLC_100m\\\\u2018_clc2012_v2020_20u1_raster100m\\\\DATA\\\\U2018_CLC2012_V2020_20u1.tif.xml',\n", + " '../../../CLC_100m\\\\u2018_clc2012_v2020_20u1_raster100m\\\\Documents\\\\readme_U2018_CLC2012_V2020_20u1.txt',\n", + " '../../../CLC_100m\\\\u2018_clc2012_v2020_20u1_raster100m\\\\Legend\\\\CLC2018_CLC2012_V2018_20.tif.lyr',\n", + " '../../../CLC_100m\\\\u2018_clc2012_v2020_20u1_raster100m\\\\Legend\\\\CLC2018_CLC2012_V2018_20_QGIS.txt',\n", + " '../../../CLC_100m\\\\u2018_clc2012_v2020_20u1_raster100m\\\\Metadata\\\\U2018_CLC2012_V2020_20u1.xml']" ] }, - "execution_count": 48, + "execution_count": 103, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "root = '../../../CLC_100m'\n", - "\n", - "img_paths = get_img_paths(path=root)\n", - "print(img_paths)\n", - "# for img_path in img_paths:\n", - "\n", - "img_path = img_paths[3]\n", - "\n", - "get_asset_files(root, deconstruct_clc_name(img_path)['id'])" + "clc_name = deconstruct_clc_name(img_path)['id']\n", + "print(clc_name, img_path)\n", + "get_asset_files(root, img_path)\n", + "# deconstruct_clc_name(img_path)" ] }, { "cell_type": "code", - "execution_count": 50, + "execution_count": 68, "metadata": {}, "outputs": [ { "data": { - "text/html": [ - "\n", - "\n", - "\n", - "
                      \n", - "
                      \n", - "
                        \n", - " \n", - " \n", - " \n", - "
                      • \n", - " type\n", - " \"Feature\"\n", - "
                      • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
                      • \n", - " stac_version\n", - " \"1.0.0\"\n", - "
                      • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
                      • \n", - " id\n", - " \"U2018_CLC2012_V2020_20u1_FR_MTQ\"\n", - "
                      • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
                      • \n", - " properties\n", - "
                          \n", - " \n", - " \n", - " \n", - "
                        • \n", - " description\n", - " \"Corine Land Cover 2012 (CLC2012) is one of the Corine Land Cover (CLC) datasets produced within the frame the Copernicus Land Monitoring Service referring to land cover / land use status of year 2012. CLC service has a long-time heritage (formerly known as \"CORINE Land Cover Programme\"), coordinated by the European Environment Agency (EEA). It provides consistent and thematically detailed information on land cover and land cover changes across Europe. CLC datasets are based on the classification of satellite images produced by the national teams of the participating countries - the EEA members and cooperating countries (EEA39). National CLC inventories are then further integrated into a seamless land cover map of Europe. The resulting European database relies on standard methodology and nomenclature with following base parameters: 44 classes in the hierarchical 3-level CLC nomenclature; minimum mapping unit (MMU) for status layers is 25 hectares; minimum width of linear elements is 100 metres. Change layers have higher resolution, i.e. minimum mapping unit (MMU) is 5 hectares for Land Cover Changes (LCC), and the minimum width of linear elements is 100 metres. The CLC service delivers important data sets supporting the implementation of key priority areas of the Environment Action Programmes of the European Union as e.g. protecting ecosystems, halting the loss of biological diversity, tracking the impacts of climate change, monitoring urban land take, assessing developments in agriculture or dealing with water resources directives. CLC belongs to the Pan-European component of the Copernicus Land Monitoring Service (https://land.copernicus.eu/), part of the European Copernicus Programme coordinated by the European Environment Agency, providing environmental information from a combination of air- and space-based observation systems and in-situ monitoring. Additional information about CLC product description including mapping guides can be found at https://land.copernicus.eu/user-corner/technical-library/. CLC class descriptions can be found at https://land.copernicus.eu/user-corner/technical-library/corine-land-cover-nomenclature-guidelines/html/.\"\n", - "
                        • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
                        • \n", - " created\n", - " None\n", - "
                        • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
                        • \n", - " providers\n", - "
                            \n", - " \n", - " \n", - " \n", - "
                          • \n", - " name\n", - " \"Copernicus Land Monitoring Service\"\n", - "
                          • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
                          • \n", - " description\n", - " \"The Copernicus Land Monitoring Service provides geographical information on land cover and its changes, land use, ground motions, vegetation state, water cycle and Earth's surface energy variables to a broad range of users in Europe and across the World in the field of environmental terrestrial applications.\"\n", - "
                          • \n", - " \n", - " \n", - " \n", - " \n", - "
                          • \n", - " roles[] 2 items\n", - " \n", - "
                              \n", - " \n", - " \n", - " \n", - "
                            • \n", - " 0\n", - " \"licensor\"\n", - "
                            • \n", - " \n", - " \n", - " \n", - "
                            \n", - " \n", - "
                              \n", - " \n", - " \n", - " \n", - "
                            • \n", - " 1\n", - " \"host\"\n", - "
                            • \n", - " \n", - " \n", - " \n", - "
                            \n", - " \n", - "
                          • \n", - " \n", - " \n", - " \n", - " \n", - "
                          • \n", - " url\n", - " \"https://land.copernicus.eu\"\n", - "
                          • \n", - " \n", - " \n", - " \n", - "
                          \n", - "
                        • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
                        • \n", - " start_datetime\n", - " \"2012-01-01T00:00:00Z\"\n", - "
                        • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
                        • \n", - " end_datetime\n", - " \"2012-12-31T00:00:00Z\"\n", - "
                        • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
                        • \n", - " datetime\n", - " None\n", - "
                        • \n", - " \n", - " \n", - " \n", - "
                        \n", - "
                      • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
                      • \n", - " geometry\n", - "
                          \n", - " \n", - " \n", - " \n", - "
                        • \n", - " type\n", - " \"Polygon\"\n", - "
                        • \n", - " \n", - " \n", - " \n", - " \n", - "
                        • \n", - " coordinates[] 1 items\n", - " \n", - "
                            \n", - " \n", - " \n", - "
                          • \n", - " 0[] 5 items\n", - " \n", - "
                              \n", - " \n", - " \n", - "
                            • \n", - " 0[] 2 items\n", - " \n", - "
                                \n", - " \n", - " \n", - " \n", - "
                              • \n", - " 0\n", - " -60.711279992199906\n", - "
                              • \n", - " \n", - " \n", - " \n", - "
                              \n", - " \n", - "
                                \n", - " \n", - " \n", - " \n", - "
                              • \n", - " 1\n", - " 14.296077071286975\n", - "
                              • \n", - " \n", - " \n", - " \n", - "
                              \n", - " \n", - "
                            • \n", - " \n", - " \n", - "
                            \n", - " \n", - "
                              \n", - " \n", - " \n", - "
                            • \n", - " 1[] 2 items\n", - " \n", - "
                                \n", - " \n", - " \n", - " \n", - "
                              • \n", - " 0\n", - " -60.711279992199906\n", - "
                              • \n", - " \n", - " \n", - " \n", - "
                              \n", - " \n", - "
                                \n", - " \n", - " \n", - " \n", - "
                              • \n", - " 1\n", - " 14.970827479517444\n", - "
                              • \n", - " \n", - " \n", - " \n", - "
                              \n", - " \n", - "
                            • \n", - " \n", - " \n", - "
                            \n", - " \n", - "
                              \n", - " \n", - " \n", - "
                            • \n", - " 2[] 2 items\n", - " \n", - "
                                \n", - " \n", - " \n", - " \n", - "
                              • \n", - " 0\n", - " -61.32702312276209\n", - "
                              • \n", - " \n", - " \n", - " \n", - "
                              \n", - " \n", - "
                                \n", - " \n", - " \n", - " \n", - "
                              • \n", - " 1\n", - " 14.970827479517444\n", - "
                              • \n", - " \n", - " \n", - " \n", - "
                              \n", - " \n", - "
                            • \n", - " \n", - " \n", - "
                            \n", - " \n", - "
                              \n", - " \n", - " \n", - "
                            • \n", - " 3[] 2 items\n", - " \n", - "
                                \n", - " \n", - " \n", - " \n", - "
                              • \n", - " 0\n", - " -61.32702312276209\n", - "
                              • \n", - " \n", - " \n", - " \n", - "
                              \n", - " \n", - "
                                \n", - " \n", - " \n", - " \n", - "
                              • \n", - " 1\n", - " 14.296077071286975\n", - "
                              • \n", - " \n", - " \n", - " \n", - "
                              \n", - " \n", - "
                            • \n", - " \n", - " \n", - "
                            \n", - " \n", - "
                              \n", - " \n", - " \n", - "
                            • \n", - " 4[] 2 items\n", - " \n", - "
                                \n", - " \n", - " \n", - " \n", - "
                              • \n", - " 0\n", - " -60.711279992199906\n", - "
                              • \n", - " \n", - " \n", - " \n", - "
                              \n", - " \n", - "
                                \n", - " \n", - " \n", - " \n", - "
                              • \n", - " 1\n", - " 14.296077071286975\n", - "
                              • \n", - " \n", - " \n", - " \n", - "
                              \n", - " \n", - "
                            • \n", - " \n", - " \n", - "
                            \n", - " \n", - "
                          • \n", - " \n", - " \n", - "
                          \n", - " \n", - "
                        • \n", - " \n", - " \n", - "
                        \n", - "
                      • \n", - " \n", - " \n", - " \n", - " \n", - "
                      • \n", - " links[] 1 items\n", - " \n", - "
                          \n", - " \n", - " \n", - " \n", - "
                        • \n", - " 0\n", - "
                            \n", - " \n", - " \n", - " \n", - "
                          • \n", - " rel\n", - " \"LICENSE\"\n", - "
                          • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
                          • \n", - " href\n", - " \"https://land.copernicus.eu/en/data-policy\"\n", - "
                          • \n", - " \n", - " \n", - " \n", - "
                          \n", - "
                        • \n", - " \n", - " \n", - " \n", - "
                        \n", - " \n", - "
                      • \n", - " \n", - " \n", - " \n", - " \n", - "
                      • \n", - " assets\n", - "
                          \n", - " \n", - " \n", - " \n", - "
                        • \n", - " U2018_CLC2012_V2020_20u1_FR_MTQ_tfw\n", - "
                            \n", - " \n", - " \n", - " \n", - "
                          • \n", - " href\n", - " \"../../../CLC_100m/u2018_clc2012_v2020_20u1_raster100m/DATA/French_DOMs/U2018_CLC2012_V2020_20u1_FR_MTQ.tfw\"\n", - "
                          • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
                          • \n", - " type\n", - " \"text/plain\"\n", - "
                          • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
                          • \n", - " title\n", - " \"World File Martinique\"\n", - "
                          • \n", - " \n", - " \n", - " \n", - " \n", - "
                          • \n", - " roles[] 1 items\n", - " \n", - "
                              \n", - " \n", - " \n", - " \n", - "
                            • \n", - " 0\n", - " \"metadata\"\n", - "
                            • \n", - " \n", - " \n", - " \n", - "
                            \n", - " \n", - "
                          • \n", - " \n", - " \n", - "
                          \n", - "
                        • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
                        • \n", - " U2018_CLC2012_V2020_20u1_FR_MTQ_tif\n", - "
                            \n", - " \n", - " \n", - " \n", - "
                          • \n", - " href\n", - " \"../../../CLC_100m/u2018_clc2012_v2020_20u1_raster100m/DATA/French_DOMs/U2018_CLC2012_V2020_20u1_FR_MTQ.tif\"\n", - "
                          • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
                          • \n", - " type\n", - " \"image/tiff; application=geotiff; profile=cloud-optimized\"\n", - "
                          • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
                          • \n", - " title\n", - " \"Single Band Land Classification Martinique\"\n", - "
                          • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
                          • \n", - " proj:epsg\n", - " 4559\n", - "
                          • \n", - " \n", - " \n", - " \n", - " \n", - "
                          • \n", - " proj:bbox[] 4 items\n", - " \n", - "
                              \n", - " \n", - " \n", - " \n", - "
                            • \n", - " 0\n", - " 680451.000324164\n", - "
                            • \n", - " \n", - " \n", - " \n", - "
                            \n", - " \n", - "
                              \n", - " \n", - " \n", - " \n", - "
                            • \n", - " 1\n", - " 1581682.1579604005\n", - "
                            • \n", - " \n", - " \n", - " \n", - "
                            \n", - " \n", - "
                              \n", - " \n", - " \n", - " \n", - "
                            • \n", - " 2\n", - " 746151.000324164\n", - "
                            • \n", - " \n", - " \n", - " \n", - "
                            \n", - " \n", - "
                              \n", - " \n", - " \n", - " \n", - "
                            • \n", - " 3\n", - " 1655782.1579604005\n", - "
                            • \n", - " \n", - " \n", - " \n", - "
                            \n", - " \n", - "
                          • \n", - " \n", - " \n", - " \n", - "
                          • \n", - " proj:shape[] 2 items\n", - " \n", - "
                              \n", - " \n", - " \n", - " \n", - "
                            • \n", - " 0\n", - " 741\n", - "
                            • \n", - " \n", - " \n", - " \n", - "
                            \n", - " \n", - "
                              \n", - " \n", - " \n", - " \n", - "
                            • \n", - " 1\n", - " 657\n", - "
                            • \n", - " \n", - " \n", - " \n", - "
                            \n", - " \n", - "
                          • \n", - " \n", - " \n", - " \n", - "
                          • \n", - " proj:transform[] 12 items\n", - " \n", - "
                              \n", - " \n", - " \n", - " \n", - "
                            • \n", - " 0\n", - " 100.0\n", - "
                            • \n", - " \n", - " \n", - " \n", - "
                            \n", - " \n", - "
                              \n", - " \n", - " \n", - " \n", - "
                            • \n", - " 1\n", - " 0.0\n", - "
                            • \n", - " \n", - " \n", - " \n", - "
                            \n", - " \n", - "
                              \n", - " \n", - " \n", - " \n", - "
                            • \n", - " 2\n", - " 680451.000324164\n", - "
                            • \n", - " \n", - " \n", - " \n", - "
                            \n", - " \n", - "
                              \n", - " \n", - " \n", - " \n", - "
                            • \n", - " 3\n", - " 0.0\n", - "
                            • \n", - " \n", - " \n", - " \n", - "
                            \n", - " \n", - "
                              \n", - " \n", - " \n", - " \n", - "
                            • \n", - " 4\n", - " -100.0\n", - "
                            • \n", - " \n", - " \n", - " \n", - "
                            \n", - " \n", - "
                              \n", - " \n", - " \n", - " \n", - "
                            • \n", - " 5\n", - " 1655782.1579604005\n", - "
                            • \n", - " \n", - " \n", - " \n", - "
                            \n", - " \n", - "
                              \n", - " \n", - " \n", - " \n", - "
                            • \n", - " 6\n", - " 0.0\n", - "
                            • \n", - " \n", - " \n", - " \n", - "
                            \n", - " \n", - "
                              \n", - " \n", - " \n", - " \n", - "
                            • \n", - " 7\n", - " 0.0\n", - "
                            • \n", - " \n", - " \n", - " \n", - "
                            \n", - " \n", - "
                              \n", - " \n", - " \n", - " \n", - "
                            • \n", - " 8\n", - " 1.0\n", - "
                            • \n", - " \n", - " \n", - " \n", - "
                            \n", - " \n", - "
                              \n", - " \n", - " \n", - " \n", - "
                            • \n", - " 9\n", - " 0.0\n", - "
                            • \n", - " \n", - " \n", - " \n", - "
                            \n", - " \n", - "
                              \n", - " \n", - " \n", - " \n", - "
                            • \n", - " 10\n", - " 0.0\n", - "
                            • \n", - " \n", - " \n", - " \n", - "
                            \n", - " \n", - "
                              \n", - " \n", - " \n", - " \n", - "
                            • \n", - " 11\n", - " 1.0\n", - "
                            • \n", - " \n", - " \n", - " \n", - "
                            \n", - " \n", - "
                          • \n", - " \n", - " \n", - " \n", - "
                          • \n", - " roles[] 2 items\n", - " \n", - "
                              \n", - " \n", - " \n", - " \n", - "
                            • \n", - " 0\n", - " \"data\"\n", - "
                            • \n", - " \n", - " \n", - " \n", - "
                            \n", - " \n", - "
                              \n", - " \n", - " \n", - " \n", - "
                            • \n", - " 1\n", - " \"visual\"\n", - "
                            • \n", - " \n", - " \n", - " \n", - "
                            \n", - " \n", - "
                          • \n", - " \n", - " \n", - "
                          \n", - "
                        • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
                        • \n", - " U2018_CLC2012_V2020_20u1_FR_MTQ_tif_aux_xml\n", - "
                            \n", - " \n", - " \n", - " \n", - "
                          • \n", - " href\n", - " \"../../../CLC_100m/u2018_clc2012_v2020_20u1_raster100m/DATA/French_DOMs/U2018_CLC2012_V2020_20u1_FR_MTQ.tif.aux.xml\"\n", - "
                          • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
                          • \n", - " type\n", - " \"application/xml\"\n", - "
                          • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
                          • \n", - " title\n", - " \"TIFF Statistics Martinique\"\n", - "
                          • \n", - " \n", - " \n", - " \n", - " \n", - "
                          • \n", - " roles[] 1 items\n", - " \n", - "
                              \n", - " \n", - " \n", - " \n", - "
                            • \n", - " 0\n", - " \"metadata\"\n", - "
                            • \n", - " \n", - " \n", - " \n", - "
                            \n", - " \n", - "
                          • \n", - " \n", - " \n", - "
                          \n", - "
                        • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
                        • \n", - " U2018_CLC2012_V2020_20u1_FR_MTQ_tif_ovr\n", - "
                            \n", - " \n", - " \n", - " \n", - "
                          • \n", - " href\n", - " \"../../../CLC_100m/u2018_clc2012_v2020_20u1_raster100m/DATA/French_DOMs/U2018_CLC2012_V2020_20u1_FR_MTQ.tif.ovr\"\n", - "
                          • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
                          • \n", - " type\n", - " \"image/tiff; application=geotiff; profile=pyramid\"\n", - "
                          • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
                          • \n", - " title\n", - " \"Pyramid Martinique\"\n", - "
                          • \n", - " \n", - " \n", - " \n", - " \n", - "
                          • \n", - " roles[] 1 items\n", - " \n", - "
                              \n", - " \n", - " \n", - " \n", - "
                            • \n", - " 0\n", - " \"metadata\"\n", - "
                            • \n", - " \n", - " \n", - " \n", - "
                            \n", - " \n", - "
                          • \n", - " \n", - " \n", - "
                          \n", - "
                        • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
                        • \n", - " U2018_CLC2012_V2020_20u1_FR_MTQ_tif_vat_cpg\n", - "
                            \n", - " \n", - " \n", - " \n", - "
                          • \n", - " href\n", - " \"../../../CLC_100m/u2018_clc2012_v2020_20u1_raster100m/DATA/French_DOMs/U2018_CLC2012_V2020_20u1_FR_MTQ.tif.vat.cpg\"\n", - "
                          • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
                          • \n", - " type\n", - " \"text/plain\"\n", - "
                          • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
                          • \n", - " title\n", - " \"Encoding Martinique\"\n", - "
                          • \n", - " \n", - " \n", - " \n", - " \n", - "
                          • \n", - " roles[] 1 items\n", - " \n", - "
                              \n", - " \n", - " \n", - " \n", - "
                            • \n", - " 0\n", - " \"metadata\"\n", - "
                            • \n", - " \n", - " \n", - " \n", - "
                            \n", - " \n", - "
                          • \n", - " \n", - " \n", - "
                          \n", - "
                        • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
                        • \n", - " U2018_CLC2012_V2020_20u1_FR_MTQ_tif_vat_dbf\n", - "
                            \n", - " \n", - " \n", - " \n", - "
                          • \n", - " href\n", - " \"../../../CLC_100m/u2018_clc2012_v2020_20u1_raster100m/DATA/French_DOMs/U2018_CLC2012_V2020_20u1_FR_MTQ.tif.vat.dbf\"\n", - "
                          • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
                          • \n", - " type\n", - " \"application/dbf\"\n", - "
                          • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
                          • \n", - " title\n", - " \"Database Martinique\"\n", - "
                          • \n", - " \n", - " \n", - " \n", - " \n", - "
                          • \n", - " roles[] 1 items\n", - " \n", - "
                              \n", - " \n", - " \n", - " \n", - "
                            • \n", - " 0\n", - " \"metadata\"\n", - "
                            • \n", - " \n", - " \n", - " \n", - "
                            \n", - " \n", - "
                          • \n", - " \n", - " \n", - "
                          \n", - "
                        • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
                        • \n", - " U2018_CLC2012_V2020_20u1_FR_MTQ_tif_xml\n", - "
                            \n", - " \n", - " \n", - " \n", - "
                          • \n", - " href\n", - " \"../../../CLC_100m/u2018_clc2012_v2020_20u1_raster100m/DATA/French_DOMs/U2018_CLC2012_V2020_20u1_FR_MTQ.tif.xml\"\n", - "
                          • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
                          • \n", - " type\n", - " \"application/xml\"\n", - "
                          • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
                          • \n", - " title\n", - " \"TIFF Metadata Martinique\"\n", - "
                          • \n", - " \n", - " \n", - " \n", - " \n", - "
                          • \n", - " roles[] 1 items\n", - " \n", - "
                              \n", - " \n", - " \n", - " \n", - "
                            • \n", - " 0\n", - " \"metadata\"\n", - "
                            • \n", - " \n", - " \n", - " \n", - "
                            \n", - " \n", - "
                          • \n", - " \n", - " \n", - "
                          \n", - "
                        • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
                        • \n", - " U2018_CLC2012_V2020_20u1_FR_MTQ_xml\n", - "
                            \n", - " \n", - " \n", - " \n", - "
                          • \n", - " href\n", - " \"../../../CLC_100m/u2018_clc2012_v2020_20u1_raster100m/Metadata/U2018_CLC2012_V2020_20u1_FR_MTQ.xml\"\n", - "
                          • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
                          • \n", - " type\n", - " \"application/xml\"\n", - "
                          • \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
                          • \n", - " title\n", - " \"Single Band Land Classification Metadata Martinique\"\n", - "
                          • \n", - " \n", - " \n", - " \n", - " \n", - "
                          • \n", - " roles[] 1 items\n", - " \n", - "
                              \n", - " \n", - " \n", - " \n", - "
                            • \n", - " 0\n", - " \"metadata\"\n", - "
                            • \n", - " \n", - " \n", - " \n", - "
                            \n", - " \n", - "
                          • \n", - " \n", - " \n", - "
                          \n", - "
                        • \n", - " \n", - " \n", - " \n", - "
                        \n", - "
                      • \n", - " \n", - " \n", - " \n", - " \n", - "
                      • \n", - " bbox[] 4 items\n", - " \n", - "
                          \n", - " \n", - " \n", - " \n", - "
                        • \n", - " 0\n", - " -61.32702312276209\n", - "
                        • \n", - " \n", - " \n", - " \n", - "
                        \n", - " \n", - "
                          \n", - " \n", - " \n", - " \n", - "
                        • \n", - " 1\n", - " 14.296077071286975\n", - "
                        • \n", - " \n", - " \n", - " \n", - "
                        \n", - " \n", - "
                          \n", - " \n", - " \n", - " \n", - "
                        • \n", - " 2\n", - " -60.711279992199906\n", - "
                        • \n", - " \n", - " \n", - " \n", - "
                        \n", - " \n", - "
                          \n", - " \n", - " \n", - " \n", - "
                        • \n", - " 3\n", - " 14.970827479517444\n", - "
                        • \n", - " \n", - " \n", - " \n", - "
                        \n", - " \n", - "
                      • \n", - " \n", - " \n", - " \n", - "
                      • \n", - " stac_extensions[] 1 items\n", - " \n", - "
                          \n", - " \n", - " \n", - " \n", - "
                        • \n", - " 0\n", - " \"https://stac-extensions.github.io/projection/v1.1.0/schema.json\"\n", - "
                        • \n", - " \n", - " \n", - " \n", - "
                        \n", - " \n", - "
                      • \n", - " \n", - " \n", - "
                      \n", - "
                      \n", - "
                      " - ], "text/plain": [ - "" + "{'id': 'CLC2018_CLC2012_V2018_20_QGIS', 'suffix': 'txt'}" ] }, - "execution_count": 50, + "execution_count": 68, "metadata": {}, "output_type": "execute_result" } ], + "source": [ + "deconstruct_clc_name('CLC2018_CLC2012_V2018_20_QGIS.txt')" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "metadata": {}, + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'CLMS_CATALOG_LINK' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[1;32mIn[69], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m item \u001b[38;5;241m=\u001b[39m \u001b[43mcreate_item\u001b[49m\u001b[43m(\u001b[49m\u001b[43mimg_path\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mroot\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 2\u001b[0m item\n", + "Cell \u001b[1;32mIn[65], line 152\u001b[0m, in \u001b[0;36mcreate_item\u001b[1;34m(img_path, root)\u001b[0m\n\u001b[0;32m 144\u001b[0m proj_ext\u001b[38;5;241m.\u001b[39mapply(epsg\u001b[38;5;241m=\u001b[39mrio\u001b[38;5;241m.\u001b[39mcrs\u001b[38;5;241m.\u001b[39mCRS(img\u001b[38;5;241m.\u001b[39mcrs)\u001b[38;5;241m.\u001b[39mto_epsg(),\n\u001b[0;32m 145\u001b[0m bbox\u001b[38;5;241m=\u001b[39mimg\u001b[38;5;241m.\u001b[39mbounds,\n\u001b[0;32m 146\u001b[0m shape\u001b[38;5;241m=\u001b[39m[_ \u001b[38;5;28;01mfor\u001b[39;00m _ \u001b[38;5;129;01min\u001b[39;00m img\u001b[38;5;241m.\u001b[39mshape],\n\u001b[0;32m 147\u001b[0m transform\u001b[38;5;241m=\u001b[39m[_ \u001b[38;5;28;01mfor\u001b[39;00m _ \u001b[38;5;129;01min\u001b[39;00m img\u001b[38;5;241m.\u001b[39mtransform] \u001b[38;5;241m+\u001b[39m [\u001b[38;5;241m0.0\u001b[39m, \u001b[38;5;241m0.0\u001b[39m, \u001b[38;5;241m1.0\u001b[39m],\n\u001b[0;32m 148\u001b[0m )\n\u001b[0;32m 150\u001b[0m \u001b[38;5;66;03m# license = pystac.link.Link(rel='LICENSE', target=\"https://land.copernicus.eu/en/data-policy\")\u001b[39;00m\n\u001b[0;32m 151\u001b[0m \u001b[38;5;66;03m# item.add_link(license)\u001b[39;00m\n\u001b[1;32m--> 152\u001b[0m links \u001b[38;5;241m=\u001b[39m [CLMS_LICENSE, \u001b[43mCLMS_CATALOG_LINK\u001b[49m, ITEM_PARENT_LINK, COLLECTION_LINK]\n\u001b[0;32m 153\u001b[0m item\u001b[38;5;241m.\u001b[39madd_links(links)\n\u001b[0;32m 155\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m item\n", + "\u001b[1;31mNameError\u001b[0m: name 'CLMS_CATALOG_LINK' is not defined" + ] + } + ], "source": [ "item = create_item(img_path, root)\n", "item" @@ -1649,47 +494,68 @@ }, { "cell_type": "code", - "execution_count": 51, + "execution_count": 12, "metadata": {}, "outputs": [ { - "ename": "TypeError", - "evalue": "AssetDefinition.__init__() missing 1 required positional argument: 'properties'", + "ename": "AttributeError", + "evalue": "'Item' object has no attribute 'normalize_hrefs'", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[1;32mIn[51], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m asset_def \u001b[38;5;241m=\u001b[39m \u001b[43mAssetDefinition\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 2\u001b[0m asset_def\u001b[38;5;241m.\u001b[39mitem_assets(item)\n\u001b[0;32m 3\u001b[0m item\u001b[38;5;241m.\u001b[39mmake_asset_hrefs_relative()\n", - "\u001b[1;31mTypeError\u001b[0m: AssetDefinition.__init__() missing 1 required positional argument: 'properties'" + "\u001b[1;31mAttributeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[1;32mIn[12], line 4\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;66;03m# import pystac.extensions\u001b[39;00m\n\u001b[0;32m 2\u001b[0m \u001b[38;5;66;03m# import pystac.extensions.projection\u001b[39;00m\n\u001b[1;32m----> 4\u001b[0m \u001b[43mitem\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mnormalize_hrefs\u001b[49m()\n", + "\u001b[1;31mAttributeError\u001b[0m: 'Item' object has no attribute 'normalize_hrefs'" ] } ], "source": [ - "asset_def = AssetDefinition()\n", - "asset_def.item_assets(item)\n", - "item.make_asset_hrefs_relative()" + "# import pystac.extensions\n", + "# import pystac.extensions.projection\n", + "\n", + "item.normalize_hrefs()" ] }, { "cell_type": "code", - "execution_count": 52, + "execution_count": 72, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'bla': 'blu', 'blo': 42}" + ] + }, + "execution_count": 72, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "test_fun()" + ] + }, + { + "cell_type": "code", + "execution_count": 17, "metadata": {}, "outputs": [ { "ename": "FileNotFoundError", - "evalue": "[Errno 2] No such file or directory: 'x:\\\\projects\\\\ETC-DI\\\\Task_18\\\\clms-stac\\\\scripts\\\\clc\\\\stacs/clms_catalog.json'", + "evalue": "[Errno 2] No such file or directory: 'x:\\\\projects\\\\ETC-DI\\\\Task_18\\\\clms-stac\\\\scripts\\\\clc\\\\stacs/corine-land-cover-raster/corine-land-cover-raster.json'", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mFileNotFoundError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[1;32mIn[52], line 6\u001b[0m\n\u001b[0;32m 2\u001b[0m STAC_DIR \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mstac_tests\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[0;32m 3\u001b[0m COLLECTION_ID \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mcorine-land-cover-raster\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[0;32m 5\u001b[0m CLMS_CATALOG_LINK \u001b[38;5;241m=\u001b[39m pystac\u001b[38;5;241m.\u001b[39mlink\u001b[38;5;241m.\u001b[39mLink(\n\u001b[1;32m----> 6\u001b[0m rel\u001b[38;5;241m=\u001b[39mpystac\u001b[38;5;241m.\u001b[39mRelType\u001b[38;5;241m.\u001b[39mROOT, target\u001b[38;5;241m=\u001b[39m\u001b[43mpystac\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mSTACObject\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_file\u001b[49m\u001b[43m(\u001b[49m\u001b[43mos\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpath\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mjoin\u001b[49m\u001b[43m(\u001b[49m\u001b[43mWORKING_DIR\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mstacs/clms_catalog.json\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 7\u001b[0m )\n\u001b[0;32m 8\u001b[0m COLLECTION_LINK \u001b[38;5;241m=\u001b[39m pystac\u001b[38;5;241m.\u001b[39mlink\u001b[38;5;241m.\u001b[39mLink(\n\u001b[0;32m 9\u001b[0m rel\u001b[38;5;241m=\u001b[39mpystac\u001b[38;5;241m.\u001b[39mRelType\u001b[38;5;241m.\u001b[39mCOLLECTION,\n\u001b[0;32m 10\u001b[0m target\u001b[38;5;241m=\u001b[39mpystac\u001b[38;5;241m.\u001b[39mSTACObject\u001b[38;5;241m.\u001b[39mfrom_file(os\u001b[38;5;241m.\u001b[39mpath\u001b[38;5;241m.\u001b[39mjoin(WORKING_DIR, \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mstacs/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mCOLLECTION_ID\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mCOLLECTION_ID\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.json\u001b[39m\u001b[38;5;124m\"\u001b[39m)),\n\u001b[0;32m 11\u001b[0m )\n\u001b[0;32m 12\u001b[0m ITEM_PARENT_LINK \u001b[38;5;241m=\u001b[39m pystac\u001b[38;5;241m.\u001b[39mlink\u001b[38;5;241m.\u001b[39mLink(\n\u001b[0;32m 13\u001b[0m rel\u001b[38;5;241m=\u001b[39mpystac\u001b[38;5;241m.\u001b[39mRelType\u001b[38;5;241m.\u001b[39mPARENT,\n\u001b[0;32m 14\u001b[0m target\u001b[38;5;241m=\u001b[39mpystac\u001b[38;5;241m.\u001b[39mSTACObject\u001b[38;5;241m.\u001b[39mfrom_file(os\u001b[38;5;241m.\u001b[39mpath\u001b[38;5;241m.\u001b[39mjoin(WORKING_DIR, \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mstacs/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mCOLLECTION_ID\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mCOLLECTION_ID\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.json\u001b[39m\u001b[38;5;124m\"\u001b[39m)),\n\u001b[0;32m 15\u001b[0m )\n", + "Cell \u001b[1;32mIn[17], line 9\u001b[0m\n\u001b[0;32m 1\u001b[0m WORKING_DIR \u001b[38;5;241m=\u001b[39m os\u001b[38;5;241m.\u001b[39mgetcwd()\n\u001b[0;32m 3\u001b[0m CLMS_CATALOG_LINK \u001b[38;5;241m=\u001b[39m pystac\u001b[38;5;241m.\u001b[39mlink\u001b[38;5;241m.\u001b[39mLink(\n\u001b[0;32m 4\u001b[0m \u001b[38;5;66;03m# rel=pystac.RelType.ROOT, target=pystac.STACObject.from_file(os.path.join(WORKING_DIR, \"stacs/clms_catalog.json\"))\u001b[39;00m\n\u001b[0;32m 5\u001b[0m rel\u001b[38;5;241m=\u001b[39mpystac\u001b[38;5;241m.\u001b[39mRelType\u001b[38;5;241m.\u001b[39mROOT, target\u001b[38;5;241m=\u001b[39mpystac\u001b[38;5;241m.\u001b[39mSTACObject\u001b[38;5;241m.\u001b[39mfrom_file(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mx:\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124mprojects\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124mETC-DI\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124mTask_18\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124mclms-stac\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124mstacs\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124mclms_catalog.json\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 6\u001b[0m )\n\u001b[0;32m 7\u001b[0m COLLECTION_LINK \u001b[38;5;241m=\u001b[39m pystac\u001b[38;5;241m.\u001b[39mlink\u001b[38;5;241m.\u001b[39mLink(\n\u001b[0;32m 8\u001b[0m rel\u001b[38;5;241m=\u001b[39mpystac\u001b[38;5;241m.\u001b[39mRelType\u001b[38;5;241m.\u001b[39mCOLLECTION,\n\u001b[1;32m----> 9\u001b[0m target\u001b[38;5;241m=\u001b[39m\u001b[43mpystac\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mSTACObject\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_file\u001b[49m\u001b[43m(\u001b[49m\u001b[43mos\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpath\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mjoin\u001b[49m\u001b[43m(\u001b[49m\u001b[43mWORKING_DIR\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43mf\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mstacs/\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mCOLLECTION_ID\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m/\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mCOLLECTION_ID\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m.json\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m,\n\u001b[0;32m 10\u001b[0m )\n\u001b[0;32m 11\u001b[0m ITEM_PARENT_LINK \u001b[38;5;241m=\u001b[39m pystac\u001b[38;5;241m.\u001b[39mlink\u001b[38;5;241m.\u001b[39mLink(\n\u001b[0;32m 12\u001b[0m rel\u001b[38;5;241m=\u001b[39mpystac\u001b[38;5;241m.\u001b[39mRelType\u001b[38;5;241m.\u001b[39mPARENT,\n\u001b[0;32m 13\u001b[0m target\u001b[38;5;241m=\u001b[39mpystac\u001b[38;5;241m.\u001b[39mSTACObject\u001b[38;5;241m.\u001b[39mfrom_file(os\u001b[38;5;241m.\u001b[39mpath\u001b[38;5;241m.\u001b[39mjoin(WORKING_DIR, \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mstacs/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mCOLLECTION_ID\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mCOLLECTION_ID\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.json\u001b[39m\u001b[38;5;124m\"\u001b[39m)),\n\u001b[0;32m 14\u001b[0m )\n\u001b[0;32m 16\u001b[0m item\u001b[38;5;241m.\u001b[39mset_self_href(os\u001b[38;5;241m.\u001b[39mpath\u001b[38;5;241m.\u001b[39mjoin(WORKING_DIR, \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mSTAC_DIR\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mCOLLECTION_ID\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mitem\u001b[38;5;241m.\u001b[39mid\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mitem\u001b[38;5;241m.\u001b[39mid\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.json\u001b[39m\u001b[38;5;124m\"\u001b[39m))\n", "File \u001b[1;32mc:\\Users\\boeck\\AppData\\Local\\miniforge3\\envs\\stacdev\\Lib\\site-packages\\pystac\\stac_object.py:619\u001b[0m, in \u001b[0;36mSTACObject.from_file\u001b[1;34m(cls, href, stac_io)\u001b[0m\n\u001b[0;32m 607\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Reads a STACObject implementation from a file.\u001b[39;00m\n\u001b[0;32m 608\u001b[0m \n\u001b[0;32m 609\u001b[0m \u001b[38;5;124;03mArgs:\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 616\u001b[0m \u001b[38;5;124;03m by the JSON read from the file located at HREF.\u001b[39;00m\n\u001b[0;32m 617\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m 618\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mcls\u001b[39m \u001b[38;5;241m==\u001b[39m STACObject:\n\u001b[1;32m--> 619\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m cast(S, \u001b[43mpystac\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread_file\u001b[49m\u001b[43m(\u001b[49m\u001b[43mhref\u001b[49m\u001b[43m)\u001b[49m)\n\u001b[0;32m 621\u001b[0m href \u001b[38;5;241m=\u001b[39m make_posix_style(href)\n\u001b[0;32m 623\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m stac_io \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n", "File \u001b[1;32mc:\\Users\\boeck\\AppData\\Local\\miniforge3\\envs\\stacdev\\Lib\\site-packages\\pystac\\__init__.py:165\u001b[0m, in \u001b[0;36mread_file\u001b[1;34m(href, stac_io)\u001b[0m\n\u001b[0;32m 163\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m stac_io \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m 164\u001b[0m stac_io \u001b[38;5;241m=\u001b[39m StacIO\u001b[38;5;241m.\u001b[39mdefault()\n\u001b[1;32m--> 165\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mstac_io\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread_stac_object\u001b[49m\u001b[43m(\u001b[49m\u001b[43mhref\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[1;32mc:\\Users\\boeck\\AppData\\Local\\miniforge3\\envs\\stacdev\\Lib\\site-packages\\pystac\\stac_io.py:234\u001b[0m, in \u001b[0;36mStacIO.read_stac_object\u001b[1;34m(self, source, root, *args, **kwargs)\u001b[0m\n\u001b[0;32m 208\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mread_stac_object\u001b[39m(\n\u001b[0;32m 209\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[0;32m 210\u001b[0m source: HREF,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 213\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs: Any,\n\u001b[0;32m 214\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m STACObject:\n\u001b[0;32m 215\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Read a STACObject from a JSON file at the given source.\u001b[39;00m\n\u001b[0;32m 216\u001b[0m \n\u001b[0;32m 217\u001b[0m \u001b[38;5;124;03m See :func:`StacIO.read_text ` for usage of\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 232\u001b[0m \u001b[38;5;124;03m contained in the file at the given uri.\u001b[39;00m\n\u001b[0;32m 233\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[1;32m--> 234\u001b[0m d \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread_json\u001b[49m\u001b[43m(\u001b[49m\u001b[43msource\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 235\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstac_object_from_dict(\n\u001b[0;32m 236\u001b[0m d, href\u001b[38;5;241m=\u001b[39msource, root\u001b[38;5;241m=\u001b[39mroot, preserve_dict\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[0;32m 237\u001b[0m )\n", "File \u001b[1;32mc:\\Users\\boeck\\AppData\\Local\\miniforge3\\envs\\stacdev\\Lib\\site-packages\\pystac\\stac_io.py:205\u001b[0m, in \u001b[0;36mStacIO.read_json\u001b[1;34m(self, source, *args, **kwargs)\u001b[0m\n\u001b[0;32m 188\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mread_json\u001b[39m(\u001b[38;5;28mself\u001b[39m, source: HREF, \u001b[38;5;241m*\u001b[39margs: Any, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs: Any) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28mdict\u001b[39m[\u001b[38;5;28mstr\u001b[39m, Any]:\n\u001b[0;32m 189\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Read a dict from the given source.\u001b[39;00m\n\u001b[0;32m 190\u001b[0m \n\u001b[0;32m 191\u001b[0m \u001b[38;5;124;03m See :func:`StacIO.read_text ` for usage of\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 203\u001b[0m \u001b[38;5;124;03m given source.\u001b[39;00m\n\u001b[0;32m 204\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[1;32m--> 205\u001b[0m txt \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread_text\u001b[49m\u001b[43m(\u001b[49m\u001b[43msource\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 206\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mjson_loads(txt)\n", "File \u001b[1;32mc:\\Users\\boeck\\AppData\\Local\\miniforge3\\envs\\stacdev\\Lib\\site-packages\\pystac\\stac_io.py:282\u001b[0m, in \u001b[0;36mDefaultStacIO.read_text\u001b[1;34m(self, source, *_, **__)\u001b[0m\n\u001b[0;32m 277\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"A concrete implementation of :meth:`StacIO.read_text\u001b[39;00m\n\u001b[0;32m 278\u001b[0m \u001b[38;5;124;03m`. Converts the ``source`` argument to a string (if it\u001b[39;00m\n\u001b[0;32m 279\u001b[0m \u001b[38;5;124;03mis not already) and delegates to :meth:`DefaultStacIO.read_text_from_href` for\u001b[39;00m\n\u001b[0;32m 280\u001b[0m \u001b[38;5;124;03mopening and reading the file.\"\"\"\u001b[39;00m\n\u001b[0;32m 281\u001b[0m href \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mstr\u001b[39m(os\u001b[38;5;241m.\u001b[39mfspath(source))\n\u001b[1;32m--> 282\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread_text_from_href\u001b[49m\u001b[43m(\u001b[49m\u001b[43mhref\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[1;32mc:\\Users\\boeck\\AppData\\Local\\miniforge3\\envs\\stacdev\\Lib\\site-packages\\pystac\\stac_io.py:305\u001b[0m, in \u001b[0;36mDefaultStacIO.read_text_from_href\u001b[1;34m(self, href)\u001b[0m\n\u001b[0;32m 303\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCould not read uri \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mhref\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01me\u001b[39;00m\n\u001b[0;32m 304\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m--> 305\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28;43mopen\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mhref\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mencoding\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mutf-8\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mas\u001b[39;00m f:\n\u001b[0;32m 306\u001b[0m href_contents \u001b[38;5;241m=\u001b[39m f\u001b[38;5;241m.\u001b[39mread()\n\u001b[0;32m 307\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m href_contents\n", - "\u001b[1;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: 'x:\\\\projects\\\\ETC-DI\\\\Task_18\\\\clms-stac\\\\scripts\\\\clc\\\\stacs/clms_catalog.json'" + "\u001b[1;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: 'x:\\\\projects\\\\ETC-DI\\\\Task_18\\\\clms-stac\\\\scripts\\\\clc\\\\stacs/corine-land-cover-raster/corine-land-cover-raster.json'" ] } ], @@ -1697,7 +563,8 @@ "WORKING_DIR = os.getcwd()\n", "\n", "CLMS_CATALOG_LINK = pystac.link.Link(\n", - " rel=pystac.RelType.ROOT, target=pystac.STACObject.from_file(os.path.join(WORKING_DIR, \"stacs/clms_catalog.json\"))\n", + " # rel=pystac.RelType.ROOT, target=pystac.STACObject.from_file(os.path.join(WORKING_DIR, \"stacs/clms_catalog.json\"))\n", + " rel=pystac.RelType.ROOT, target=pystac.STACObject.from_file(\"x:\\\\projects\\\\ETC-DI\\\\Task_18\\\\clms-stac\\\\stacs\\\\clms_catalog.json\")\n", ")\n", "COLLECTION_LINK = pystac.link.Link(\n", " rel=pystac.RelType.COLLECTION,\n", @@ -1712,6 +579,24 @@ "item.save_object()" ] }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'x:\\\\projects\\\\ETC-DI\\\\Task_18\\\\clms-stac\\\\scripts\\\\clc'" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [] + }, { "cell_type": "code", "execution_count": 62, diff --git a/scripts/clc/reproject_data_bounds.ipynb b/scripts/clc/reproject_data_bounds.ipynb new file mode 100644 index 0000000..19a307d --- /dev/null +++ b/scripts/clc/reproject_data_bounds.ipynb @@ -0,0 +1,112 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import rasterio as rio\n", + "from rasterio.warp import Resampling\n", + "from shapely.geometry import box, mapping\n", + "from shapely import envelope" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "img_path = 'X:/EO/u2018_clc2018_v2020_20u1_raster100m/DATA/U2018_CLC2018_V2020_20u1.tif'\n", + "\n", + "\n", + "dst_crs = rio.CRS.from_epsg(4326)" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "metadata": {}, + "outputs": [], + "source": [ + "def project_data_window_bbox(src: rio.open, dst_crs: rio.CRS = rio.CRS.from_epsg(4326)) -> tuple:\n", + " data, transform = rio.warp.reproject(source=src.read(),\n", + " src_transform=src.transform,\n", + " src_crs=src.crs,\n", + " dst_crs=dst_crs,\n", + " dst_nodata=src.nodata,\n", + " dst_resolution=(0.25, 0.25),\n", + " resampling=Resampling.max)\n", + " \n", + " data_window = rio.windows.get_data_window(data, nodata=src.nodata)\n", + " bbox = rio.windows.bounds(data_window, transform=transform)\n", + " print(data.shape)\n", + " # bbox_raw = rio.windows.bounds(data_window, transform=transform)\n", + " # bbox_buff = box(*bbox_raw).buffer(0.25 / 2)\n", + " # bbox = envelope(bbox_buff).bounds\n", + " return(bbox)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + } + ], + "source": [ + "with rio.open(img_path) as src:\n", + " print(type(src))\n", + " #bbox = project_data_window_bbox(src)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "## To export results for examination in QGIS\n", + "\n", + "# profile = src.profile\n", + "# profile.update(transform=transform, driver='GTiff', height=data.shape[1], width=data.shape[2], crs=rio.CRS.from_epsg(4326))\n", + "\n", + "# with rio.open(dst_path, 'w', **profile) as dst:\n", + "# dst.write(data)\n", + "\n", + "# import geopandas as gpd\n", + "\n", + "# x = gpd.GeoSeries(box(*bbox), crs=dst_crs)\n", + "# x.to_file('X:/EO/u2018_clc2018_v2020_20u1_rio4326_box025max.gpkg')\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From b0d77a973478746c93d78ffc2c2db1d1d308d214 Mon Sep 17 00:00:00 2001 From: Sebastian Boeck Date: Mon, 6 May 2024 22:02:37 +0200 Subject: [PATCH 66/80] removes dev notebooks --- scripts/clc/clms_collection_generator.ipynb | 171 ----- scripts/clc/clms_item_generator.ipynb | 679 -------------------- scripts/clc/reproject_data_bounds.ipynb | 112 ---- 3 files changed, 962 deletions(-) delete mode 100644 scripts/clc/clms_collection_generator.ipynb delete mode 100644 scripts/clc/clms_item_generator.ipynb delete mode 100644 scripts/clc/reproject_data_bounds.ipynb diff --git a/scripts/clc/clms_collection_generator.ipynb b/scripts/clc/clms_collection_generator.ipynb deleted file mode 100644 index 46fd7a3..0000000 --- a/scripts/clc/clms_collection_generator.ipynb +++ /dev/null @@ -1,171 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "import re\n", - "\n", - "import pystac\n", - "import pystac.item\n", - "import pystac.link\n", - "from pystac.provider import ProviderRole\n", - "from pystac.extensions.projection import ProjectionExtension\n", - "\n", - "from pystac.extensions.item_assets import ItemAssetsExtension, AssetDefinition\n", - "\n", - "from datetime import datetime, UTC\n", - "\n", - "import rasterio.warp\n" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "from constants import *\n", - "from item import create_item, get_img_paths" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [], - "source": [ - "def proj_epsg_from_item_asset(item):\n", - " for asset_key in item.assets:\n", - " asset = item.assets[asset_key].to_dict()\n", - " if 'proj:epsg' in asset.keys():\n", - " return(asset.get('proj:epsg'))" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "# WORKING_DIR = os.getcwd()\n", - "# STAC_DIR = 'stac_tests'\n", - "# COLLECTION_ID = 'corine-land-cover-raster'\n", - "# COLLECTION_TITLE = 'CORINE Land Cover Raster'\n", - "# COLLECTION_DESCRIPTION = (\"The European Commission launched the CORINE (Coordination of Information on the Environment) \"\n", - "# \"program in an effort to develop a standardized methodology for producing continent-scale land \"\n", - "# \"cover, biotope, and air quality maps. The CORINE Land Cover (CLC) product offers a pan-European \"\n", - "# \"land cover and land use inventory with 44 thematic classes, ranging from broad forested areas \"\n", - "# \"to individual vineyards.\")\n", - "# COLLECTION_KEYWORDS = [\"clms\", \"corine\", \"derived data\", \"land cover\", \"machine learning\", \"open data\"]\n", - "# COLLECTION_LICENSE = 'proprietary'\n", - "\n", - "\n", - "# CLMS_CATALOG_LINK = pystac.link.Link(\n", - "# rel=pystac.RelType.ROOT, target=pystac.STACObject.from_file(os.path.join(WORKING_DIR, \"stacs/clms_catalog.json\"))\n", - "# )\n", - "# COLLECTION_LINK = pystac.link.Link(\n", - "# rel=pystac.RelType.COLLECTION,\n", - "# target=pystac.STACObject.from_file(os.path.join(WORKING_DIR, f\"stacs/{COLLECTION_ID}/{COLLECTION_ID}.json\")),\n", - "# )\n", - "# ITEM_PARENT_LINK = pystac.link.Link(\n", - "# rel=pystac.RelType.PARENT,\n", - "# target=pystac.STACObject.from_file(os.path.join(WORKING_DIR, f\"stacs/{COLLECTION_ID}/{COLLECTION_ID}.json\")),\n", - "# )" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "data_root = '../CLC_100m'" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "def create_collection(data_root: str):\n", - "\n", - " sp_extent = pystac.SpatialExtent([None, None, None, None])\n", - " tmp_extent = pystac.TemporalExtent([datetime(1990, 1, 1, microsecond=0, tzinfo=UTC), None])\n", - " extent = pystac.Extent(sp_extent, tmp_extent)\n", - "\n", - " collection = pystac.Collection(id=COLLECTION_ID,\n", - " description=COLLECTION_DESCRIPTION,\n", - " title=COLLECTION_TITLE,\n", - " extent=extent,\n", - " keywords=COLLECTION_KEYWORDS,\n", - " license=COLLECTION_LICENSE,\n", - " stac_extensions=[]\n", - " )\n", - "\n", - "\n", - " item_assets = ItemAssetsExtension.ext(collection, add_if_missing=True)\n", - " item_assets.item_assets = {\n", - " key: AssetDefinition({\"title\": COLLITAS_TITLE_DICT[key].format(label='').strip(),\n", - " \"media_type\": COLLITAS_MEDIA_TYPE_DICT[key], \n", - " \"roles\": COLLITAS_ROLES_DICT[key]})\n", - " for key in COLLITAS_TITLE_DICT\n", - " }\n", - "\n", - " collection.add_link(CLMS_LICENSE)\n", - " collection.set_self_href(os.path.join(WORKING_DIR, f\"{STAC_DIR}/{collection.id}/{collection.id}.json\"))\n", - " catalog = pystac.read_file(f\"{WORKING_DIR}/{STAC_DIR}/clms_catalog.json\")\n", - "\n", - " collection.set_root(catalog)\n", - " collection.set_parent(catalog)\n", - "\n", - " collection.save_object()\n", - "\n", - " img_paths = get_img_paths(path=data_root)\n", - "\n", - " proj_epsg = []\n", - " for img_path in img_paths:\n", - " item = create_item(img_path, data_root)\n", - " collection.add_item(item)\n", - "\n", - " item_epsg = proj_epsg_from_item_asset(item)\n", - " proj_epsg.append(item_epsg)\n", - "\n", - " item.set_self_href(os.path.join(WORKING_DIR, f\"{STAC_DIR}/{COLLECTION_ID}/{item.id}/{item.id}.json\"))\n", - " item.save_object()\n", - "\n", - " collection.make_all_asset_hrefs_relative()\n", - " collection.update_extent_from_items()\n", - " ProjectionExtension.add_to(collection)\n", - " collection.summaries = pystac.Summaries({'proj:epsg': list(set(proj_epsg))})\n", - "\n", - " collection.save_object()" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "stacdev", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.3" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/scripts/clc/clms_item_generator.ipynb b/scripts/clc/clms_item_generator.ipynb deleted file mode 100644 index 19d4382..0000000 --- a/scripts/clc/clms_item_generator.ipynb +++ /dev/null @@ -1,679 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "import re\n", - "\n", - "import pystac\n", - "import pystac.item\n", - "import pystac.link\n", - "from pystac.provider import ProviderRole\n", - "from pystac.extensions.projection import ProjectionExtension\n", - "from pystac.extensions.item_assets import AssetDefinition\n", - "\n", - "from pyproj import Transformer\n", - "from shapely.geometry import GeometryCollection, box, shape, mapping\n", - "from datetime import datetime, UTC\n", - "\n", - "import rasterio as rio\n", - "from rasterio.warp import Resampling\n", - "import rasterio.warp\n", - "import rasterio.crs\n", - "\n", - "import xml.etree.cElementTree as ET\n", - "from xml.dom import minidom" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "from constants import *\n", - "# from .constants import STAC_DIR, COLLECTION_ID" - ] - }, - { - "cell_type": "code", - "execution_count": 40, - "metadata": {}, - "outputs": [], - "source": [ - "# img_path = 'X:/EO/u2018_clc2012_v2020_20u1_raster100m/DATA/U2018_CLC2012_V2020_20u1.tif'\n", - "# os.path.split(img_path)" - ] - }, - { - "cell_type": "code", - "execution_count": 41, - "metadata": {}, - "outputs": [], - "source": [ - "# # This goes into constants.py ...\n", - "\n", - "# STAC_DIR = 'stac_tests'\n", - "# COLLECTION_ID = 'corine-land-cover-raster'\n", - "\n", - "# DOM_DICT = {\n", - "# 'GLP': 'Guadeloupe',\n", - "# 'GUF': 'French Guyana',\n", - "# 'MTQ': 'Martinique',\n", - "# 'MYT': 'Mayotte',\n", - "# 'REU': 'Réunion',\n", - "# '': 'Europe',\n", - "# }\n", - "\n", - "# MEDIA_TYPE_DICT = {\n", - "# 'tif': pystac.MediaType.COG,\n", - "# 'tif_xml': pystac.MediaType.XML,\n", - "# 'tif_aux_xml': pystac.MediaType.XML,\n", - "# 'tif_ovr': 'image/tiff; application=geotiff; profile=pyramid',\n", - "# 'tif_vat_cpg': pystac.MediaType.TEXT,\n", - "# 'tif_vat_dbf': 'application/dbf',\n", - "# 'txt': pystac.MediaType.TEXT,\n", - "# 'tif_lyr': 'image/tiff; application=geotiff; profile=layer',\n", - "# 'tfw': pystac.MediaType.TEXT,\n", - "# 'xml': pystac.MediaType.XML,\n", - "# }\n", - "\n", - "# ROLES_DICT = {\n", - "# 'tif': ['data', 'visual'],\n", - "# 'tif_xml': ['metadata'],\n", - "# 'tif_aux_xml': ['metadata'],\n", - "# 'tif_ovr': ['metadata'],\n", - "# 'tif_vat_cpg': ['metadata'],\n", - "# 'tif_vat_dbf': ['metadata'],\n", - "# 'txt': ['metadata'],\n", - "# 'tif_lyr': ['metadata'],\n", - "# 'tfw': ['metadata'],\n", - "# 'xml': ['metadata'],\n", - "# }\n", - "\n", - "# TITLE_DICT = {\n", - "# 'tif': 'Single Band Land Classification {label}',\n", - "# 'tif_xml': 'TIFF Metadata {label}',\n", - "# 'tif_aux_xml': 'TIFF Statistics {label}',\n", - "# 'tif_ovr': 'Pyramid {label}',\n", - "# 'tif_vat_cpg': 'Encoding {label}',\n", - "# 'tif_vat_dbf': 'Database {label}',\n", - "# 'txt': 'Legends {label}',\n", - "# 'tif_lyr': 'Legend Layer {label}',\n", - "# 'tfw': 'World File {label}',\n", - "# 'xml': 'Single Band Land Classification Metadata {label}',\n", - "# }\n", - "\n", - "# CLC_PROVIDER = pystac.provider.Provider(\n", - "# name='Copernicus Land Monitoring Service',\n", - "# description=('The Copernicus Land Monitoring Service provides '\n", - "# 'geographical information on land cover and its '\n", - "# 'changes, land use, ground motions, vegetation state, '\n", - "# 'water cycle and Earth\\'s surface energy variables to '\n", - "# 'a broad range of users in Europe and across the World '\n", - "# 'in the field of environmental terrestrial applications.'),\n", - "# roles=[ProviderRole.LICENSOR, ProviderRole.HOST],\n", - "# url='https://land.copernicus.eu'\n", - "# )\n", - "\n", - "\n", - "# ITEM_DESCRIPTION = ('Corine Land Cover {year} (CLC{year}) is one of the Corine Land Cover (CLC) ' \n", - "# 'datasets produced within the frame the Copernicus Land Monitoring Service '\n", - "# 'referring to land cover / land use status of year {year}. '\n", - "# 'CLC service has a long-time heritage (formerly known as \\\"CORINE Land Cover Programme\\\"), '\n", - "# 'coordinated by the European Environment Agency (EEA). It provides consistent '\n", - "# 'and thematically detailed information on land cover and land cover changes across Europe. '\n", - "# 'CLC datasets are based on the classification of satellite images produced by the national '\n", - "# 'teams of the participating countries - the EEA members and cooperating countries (EEA39). '\n", - "# 'National CLC inventories are then further integrated into a seamless land cover map of Europe. '\n", - "# 'The resulting European database relies on standard methodology and nomenclature with following '\n", - "# 'base parameters: 44 classes in the hierarchical 3-level CLC nomenclature; '\n", - "# 'minimum mapping unit (MMU) for status layers is 25 hectares; '\n", - "# 'minimum width of linear elements is 100 metres. '\n", - "# 'Change layers have higher resolution, i.e. minimum mapping unit (MMU) is 5 hectares '\n", - "# 'for Land Cover Changes (LCC), and the minimum width of linear elements is 100 metres. '\n", - "# 'The CLC service delivers important data sets supporting the implementation of key priority '\n", - "# 'areas of the Environment Action Programmes of the European Union as e.g. protecting ecosystems, '\n", - "# 'halting the loss of biological diversity, tracking the impacts of climate change, '\n", - "# 'monitoring urban land take, assessing developments in agriculture or dealing with '\n", - "# 'water resources directives. CLC belongs to the Pan-European component of the '\n", - "# 'Copernicus Land Monitoring Service (https://land.copernicus.eu/), part of the '\n", - "# 'European Copernicus Programme coordinated by the European Environment Agency, '\n", - "# 'providing environmental information from a combination of air- and space-based observation '\n", - "# 'systems and in-situ monitoring. Additional information about CLC product description including '\n", - "# 'mapping guides can be found at https://land.copernicus.eu/user-corner/technical-library/. '\n", - "# 'CLC class descriptions can be found at '\n", - "# 'https://land.copernicus.eu/user-corner/technical-library/corine-land-cover-nomenclature-guidelines/html/.')\n" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "'''\n", - "def deconstruct_clc_name(filename: str):\n", - " id = os.path.basename(filename).split('.')[0]\n", - " p = re.compile((\"U(?P[0-9]{4})_\"\n", - " \"(?PCLC|CHA)(?P[0-9]{4})_\"\n", - " \"V(?P[0-9]{4})_(?P[0-9a-z]*)\"\n", - " \"_?(?P[A-Z]*)?\"\n", - " \"_?(?P[A-Z]*)?\"))\n", - " m = p.search(id)\n", - "\n", - " return(m.groupdict())\n", - "'''\n", - "\n", - "def deconstruct_clc_name(filename: str):\n", - " p = re.compile('^(?P[A-Z0-9a-z_]*).(?P.*)$')\n", - " m = p.search(os.path.basename(filename))\n", - "\n", - " filename_split = m.groupdict()\n", - "\n", - " p = re.compile((\"U(?P[0-9]{4})_\"\n", - " \"(?PCLC|CHA)(?P[0-9]{4})_\"\n", - " \"V(?P[0-9]{4})_(?P[0-9a-z]*)\"\n", - " \"_?(?P[A-Z]*)?\"\n", - " \"_?(?P[A-Z]*)?\"))\n", - " m = p.search(filename_split['id'])\n", - " \n", - " if m:\n", - " return(m.groupdict() | filename_split)\n", - " else:\n", - " return(filename_split)\n", - "\n", - "\n", - "def create_asset(filename: str, DOM_code: str):\n", - " filename_elements = deconstruct_clc_name(filename)\n", - " suffix = filename_elements['suffix'].replace('.', '_')\n", - " \n", - " label = DOM_DICT[DOM_code]\n", - " \n", - " asset = pystac.Asset(href=filename, title=TITLE_DICT[suffix].format(label=label), media_type=MEDIA_TYPE_DICT[suffix], roles=ROLES_DICT[suffix])\n", - " return(f\"{filename_elements['id']}_{suffix}\", asset)\n", - "\n", - "def get_img_paths(path: str): \n", - " img_paths=[]\n", - " for root, dirs, files in os.walk(path):\n", - " if root.endswith(('DATA', 'French_DOMs')):\n", - " for file in files:\n", - " if file.endswith('.tif'):\n", - " img_paths.append(os.path.join(root, file))\n", - "\n", - " return(img_paths)\n", - "\n", - "# Not needed anymore, part of deconstruct_clc_name...\n", - "# def extract_clc_name(path: str):\n", - "# clc_name = os.path.basename(path).split('.')[0]\n", - "# return(clc_name)\n", - "\n", - "# def get_asset_files(path, clc_name):\n", - "\n", - "# clc_name_elements = deconstruct_clc_name(clc_name)\n", - "\n", - "# asset_files = []\n", - " \n", - "# for root, dirs, files in os.walk(path):\n", - "# if not clc_name_elements['DOM_code'] and 'French_DOMs' in root:\n", - "# continue\n", - " \n", - "# if clc_name_elements['DOM_code'] and ('Legend' in root and not 'French_DOMs' in root):\n", - "# continue\n", - " \n", - "# for file in files:\n", - "# if (file.startswith(clc_name + '.') or \n", - "# file.endswith((f'{clc_name_elements[\"DOM_code\"]}.tif.lyr', 'QGIS.txt',)) and \n", - "# clc_name in file):\n", - "# asset_files.append(os.path.join(root, file))\n", - "\n", - "# return(asset_files)\n", - " \n", - "\n", - "# def get_asset_files(path: str, clc_name: str) -> list[str]:\n", - "\n", - "# clc_name_elements = deconstruct_clc_name(clc_name)\n", - "\n", - "# asset_files = []\n", - " \n", - "# for root, dirs, files in os.walk(path):\n", - "# if not clc_name_elements['DOM_code'] and 'French_DOMs' in root:\n", - "# continue\n", - " \n", - "# if clc_name_elements['DOM_code'] and ('Legend' in root and not 'French_DOMs' in root):\n", - "# continue\n", - " \n", - "# for file in files:\n", - "# # print(root, file)\n", - "# if (file.startswith(clc_name_elements['id'] + '.') or \n", - "# file.endswith((f'{clc_name_elements['DOM_code']}.tif.lyr', 'QGIS.txt',)) and clc_name_elements['id'].lower() in root or\n", - "# file == f'readme_{clc_name_elements['id']}.txt'):\n", - " \n", - "# asset_files.append(os.path.join(root, file)) \n", - "\n", - "# return asset_files\n", - "\n", - "def get_asset_files(data_root: str, img_path: str) -> list[str]:\n", - "\n", - " clc_name_elements = deconstruct_clc_name(img_path)\n", - " id = clc_name_elements['id']\n", - " dom_code = clc_name_elements['DOM_code']\n", - "\n", - " asset_files = []\n", - " \n", - " for root, _, files in os.walk(data_root):\n", - " if not dom_code and 'French_DOMs' in root:\n", - " continue\n", - "\n", - " if dom_code and 'Legend' in root and not 'French_DOMs' in root:\n", - " continue\n", - " \n", - " if not 'U{update_campaign}_{theme}{reference_year}_V{release_year}'.format(**clc_name_elements).lower() in root:\n", - " continue\n", - " \n", - " for file in files:\n", - "\n", - " if (file.startswith(id + '.') or \n", - " file.endswith(f'{dom_code}.tif.lyr') or \n", - " file.endswith('QGIS.txt',) or \n", - " file == f'readme_{id}.txt'):\n", - "\n", - " asset_files.append(os.path.join(root, file))\n", - "\n", - " return asset_files\n", - "\n", - "\n", - "def project_bbox(img, target_epsg=4326):\n", - " target_crs = rio.crs.CRS.from_epsg(target_epsg)\n", - " bbox_warped = rio.warp.transform_bounds(img.crs, target_crs, *img.bounds)\n", - " return(bbox_warped)\n", - "\n", - "def create_item(img_path, root):\n", - "\n", - " clc_name_elements = deconstruct_clc_name(img_path)\n", - "\n", - " asset_files = get_asset_files(root, clc_name=os.path.basename(img_path))\n", - " asset_files = [f for f in asset_files if not f.endswith('aux')]\n", - " year = clc_name_elements.get('reference_year')\n", - " props = {'description': ITEM_DESCRIPTION.format(year=year),\n", - " 'created': None,\n", - " 'providers': CLC_PROVIDER.to_dict(),\n", - " }\n", - "\n", - " with rio.open(img_path) as img:\n", - "\n", - " bbox = project_bbox(img)\n", - " params = {\n", - " 'id': clc_name_elements.get('id'),\n", - " 'bbox': bbox,\n", - " 'geometry': mapping(box(*bbox)),\n", - " 'datetime': None,\n", - " 'start_datetime': datetime(int(year), 1, 1, microsecond=0, tzinfo=UTC),\n", - " 'end_datetime': datetime(int(year), 12, 31, microsecond=0, tzinfo=UTC),\n", - " 'properties': props,\n", - " }\n", - "\n", - " item = pystac.Item(**params)\n", - " \n", - " for asset_file in asset_files:\n", - " # print(asset_file)\n", - " key, asset = create_asset(asset_file, DOM_code=clc_name_elements.get('DOM_code'))\n", - " item.add_asset(\n", - " key=key,\n", - " asset=asset,\n", - " )\n", - "\n", - " proj_ext = ProjectionExtension.ext(item.assets[os.path.basename(img_path).replace('.', '_')], add_if_missing=True)\n", - " proj_ext.apply(epsg=rio.crs.CRS(img.crs).to_epsg(),\n", - " bbox=img.bounds,\n", - " shape=[_ for _ in img.shape],\n", - " transform=[_ for _ in img.transform] + [0.0, 0.0, 1.0],\n", - " )\n", - "\n", - " # license = pystac.link.Link(rel='LICENSE', target=\"https://land.copernicus.eu/en/data-policy\")\n", - " # item.add_link(license)\n", - " links = [CLMS_LICENSE, CLMS_CATALOG_LINK, ITEM_PARENT_LINK, COLLECTION_LINK]\n", - " item.add_links(links)\n", - "\n", - " return item\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'x:\\\\projects\\\\ETC-DI\\\\Task_18\\\\clms-stac\\\\scripts\\\\clc'" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "os.getcwd()" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [], - "source": [ - "if '':\n", - " print('buh')" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "['../../../CLC_100m\\\\u2018_clc2012_v2020_20u1_raster100m\\\\DATA\\\\U2018_CLC2012_V2020_20u1.tif', '../../../CLC_100m\\\\u2018_clc2012_v2020_20u1_raster100m\\\\DATA\\\\French_DOMs\\\\U2018_CLC2012_V2020_20u1_FR_GLP.tif', '../../../CLC_100m\\\\u2018_clc2012_v2020_20u1_raster100m\\\\DATA\\\\French_DOMs\\\\U2018_CLC2012_V2020_20u1_FR_GUF.tif', '../../../CLC_100m\\\\u2018_clc2012_v2020_20u1_raster100m\\\\DATA\\\\French_DOMs\\\\U2018_CLC2012_V2020_20u1_FR_MTQ.tif', '../../../CLC_100m\\\\u2018_clc2012_v2020_20u1_raster100m\\\\DATA\\\\French_DOMs\\\\U2018_CLC2012_V2020_20u1_FR_MYT.tif', '../../../CLC_100m\\\\u2018_clc2012_v2020_20u1_raster100m\\\\DATA\\\\French_DOMs\\\\U2018_CLC2012_V2020_20u1_FR_REU.tif', '../../../CLC_100m\\\\u2018_clc2018_v2020_20u1_raster100m\\\\DATA\\\\U2018_CLC2018_V2020_20u1.tif', '../../../CLC_100m\\\\u2018_clc2018_v2020_20u1_raster100m\\\\DATA\\\\French_DOMs\\\\U2018_CLC2018_V2020_20u1_FR_GLP.tif', '../../../CLC_100m\\\\u2018_clc2018_v2020_20u1_raster100m\\\\DATA\\\\French_DOMs\\\\U2018_CLC2018_V2020_20u1_FR_GUF.tif', '../../../CLC_100m\\\\u2018_clc2018_v2020_20u1_raster100m\\\\DATA\\\\French_DOMs\\\\U2018_CLC2018_V2020_20u1_FR_MTQ.tif', '../../../CLC_100m\\\\u2018_clc2018_v2020_20u1_raster100m\\\\DATA\\\\French_DOMs\\\\U2018_CLC2018_V2020_20u1_FR_MYT.tif', '../../../CLC_100m\\\\u2018_clc2018_v2020_20u1_raster100m\\\\DATA\\\\French_DOMs\\\\U2018_CLC2018_V2020_20u1_FR_REU.tif']\n" - ] - } - ], - "source": [ - "root = '../../../CLC_100m'\n", - "\n", - "img_paths = get_img_paths(path=root)\n", - "print(img_paths)\n", - "# for img_path in img_paths:\n", - "\n", - "img_path = img_paths[0]\n", - "\n", - "# get_asset_files(root, deconstruct_clc_name(img_path)['id'])" - ] - }, - { - "cell_type": "code", - "execution_count": 103, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "U2018_CLC2012_V2020_20u1 ../../../CLC_100m\\u2018_clc2012_v2020_20u1_raster100m\\DATA\\U2018_CLC2012_V2020_20u1.tif\n", - "U2018_CLC2012_V2020_20u1.tfw u2018_clc2012_v2020_20u1 ../../../CLC_100m\\u2018_clc2012_v2020_20u1_raster100m\\DATA\n", - "U2018_CLC2012_V2020_20u1.tif u2018_clc2012_v2020_20u1 ../../../CLC_100m\\u2018_clc2012_v2020_20u1_raster100m\\DATA\n", - "U2018_CLC2012_V2020_20u1.tif.aux.xml u2018_clc2012_v2020_20u1 ../../../CLC_100m\\u2018_clc2012_v2020_20u1_raster100m\\DATA\n", - "U2018_CLC2012_V2020_20u1.tif.ovr u2018_clc2012_v2020_20u1 ../../../CLC_100m\\u2018_clc2012_v2020_20u1_raster100m\\DATA\n", - "U2018_CLC2012_V2020_20u1.tif.vat.cpg u2018_clc2012_v2020_20u1 ../../../CLC_100m\\u2018_clc2012_v2020_20u1_raster100m\\DATA\n", - "U2018_CLC2012_V2020_20u1.tif.vat.dbf u2018_clc2012_v2020_20u1 ../../../CLC_100m\\u2018_clc2012_v2020_20u1_raster100m\\DATA\n", - "U2018_CLC2012_V2020_20u1.tif.xml u2018_clc2012_v2020_20u1 ../../../CLC_100m\\u2018_clc2012_v2020_20u1_raster100m\\DATA\n", - "readme_U2018_CLC2012_V2020_20u1.txt u2018_clc2012_v2020_20u1 ../../../CLC_100m\\u2018_clc2012_v2020_20u1_raster100m\\Documents\n", - "CLC2018_CLC2012_V2018_20.tif.lyr u2018_clc2012_v2020_20u1 ../../../CLC_100m\\u2018_clc2012_v2020_20u1_raster100m\\Legend\n", - "CLC2018_CLC2012_V2018_20_QGIS.txt u2018_clc2012_v2020_20u1 ../../../CLC_100m\\u2018_clc2012_v2020_20u1_raster100m\\Legend\n", - "U2018_CLC2012_V2020_20u1.xml u2018_clc2012_v2020_20u1 ../../../CLC_100m\\u2018_clc2012_v2020_20u1_raster100m\\Metadata\n" - ] - }, - { - "data": { - "text/plain": [ - "['../../../CLC_100m\\\\u2018_clc2012_v2020_20u1_raster100m\\\\DATA\\\\U2018_CLC2012_V2020_20u1.tfw',\n", - " '../../../CLC_100m\\\\u2018_clc2012_v2020_20u1_raster100m\\\\DATA\\\\U2018_CLC2012_V2020_20u1.tif',\n", - " '../../../CLC_100m\\\\u2018_clc2012_v2020_20u1_raster100m\\\\DATA\\\\U2018_CLC2012_V2020_20u1.tif.aux.xml',\n", - " '../../../CLC_100m\\\\u2018_clc2012_v2020_20u1_raster100m\\\\DATA\\\\U2018_CLC2012_V2020_20u1.tif.ovr',\n", - " '../../../CLC_100m\\\\u2018_clc2012_v2020_20u1_raster100m\\\\DATA\\\\U2018_CLC2012_V2020_20u1.tif.vat.cpg',\n", - " '../../../CLC_100m\\\\u2018_clc2012_v2020_20u1_raster100m\\\\DATA\\\\U2018_CLC2012_V2020_20u1.tif.vat.dbf',\n", - " '../../../CLC_100m\\\\u2018_clc2012_v2020_20u1_raster100m\\\\DATA\\\\U2018_CLC2012_V2020_20u1.tif.xml',\n", - " '../../../CLC_100m\\\\u2018_clc2012_v2020_20u1_raster100m\\\\Documents\\\\readme_U2018_CLC2012_V2020_20u1.txt',\n", - " '../../../CLC_100m\\\\u2018_clc2012_v2020_20u1_raster100m\\\\Legend\\\\CLC2018_CLC2012_V2018_20.tif.lyr',\n", - " '../../../CLC_100m\\\\u2018_clc2012_v2020_20u1_raster100m\\\\Legend\\\\CLC2018_CLC2012_V2018_20_QGIS.txt',\n", - " '../../../CLC_100m\\\\u2018_clc2012_v2020_20u1_raster100m\\\\Metadata\\\\U2018_CLC2012_V2020_20u1.xml']" - ] - }, - "execution_count": 103, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "clc_name = deconstruct_clc_name(img_path)['id']\n", - "print(clc_name, img_path)\n", - "get_asset_files(root, img_path)\n", - "# deconstruct_clc_name(img_path)" - ] - }, - { - "cell_type": "code", - "execution_count": 68, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'id': 'CLC2018_CLC2012_V2018_20_QGIS', 'suffix': 'txt'}" - ] - }, - "execution_count": 68, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "deconstruct_clc_name('CLC2018_CLC2012_V2018_20_QGIS.txt')" - ] - }, - { - "cell_type": "code", - "execution_count": 69, - "metadata": {}, - "outputs": [ - { - "ename": "NameError", - "evalue": "name 'CLMS_CATALOG_LINK' is not defined", - "output_type": "error", - "traceback": [ - "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[1;32mIn[69], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m item \u001b[38;5;241m=\u001b[39m \u001b[43mcreate_item\u001b[49m\u001b[43m(\u001b[49m\u001b[43mimg_path\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mroot\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 2\u001b[0m item\n", - "Cell \u001b[1;32mIn[65], line 152\u001b[0m, in \u001b[0;36mcreate_item\u001b[1;34m(img_path, root)\u001b[0m\n\u001b[0;32m 144\u001b[0m proj_ext\u001b[38;5;241m.\u001b[39mapply(epsg\u001b[38;5;241m=\u001b[39mrio\u001b[38;5;241m.\u001b[39mcrs\u001b[38;5;241m.\u001b[39mCRS(img\u001b[38;5;241m.\u001b[39mcrs)\u001b[38;5;241m.\u001b[39mto_epsg(),\n\u001b[0;32m 145\u001b[0m bbox\u001b[38;5;241m=\u001b[39mimg\u001b[38;5;241m.\u001b[39mbounds,\n\u001b[0;32m 146\u001b[0m shape\u001b[38;5;241m=\u001b[39m[_ \u001b[38;5;28;01mfor\u001b[39;00m _ \u001b[38;5;129;01min\u001b[39;00m img\u001b[38;5;241m.\u001b[39mshape],\n\u001b[0;32m 147\u001b[0m transform\u001b[38;5;241m=\u001b[39m[_ \u001b[38;5;28;01mfor\u001b[39;00m _ \u001b[38;5;129;01min\u001b[39;00m img\u001b[38;5;241m.\u001b[39mtransform] \u001b[38;5;241m+\u001b[39m [\u001b[38;5;241m0.0\u001b[39m, \u001b[38;5;241m0.0\u001b[39m, \u001b[38;5;241m1.0\u001b[39m],\n\u001b[0;32m 148\u001b[0m )\n\u001b[0;32m 150\u001b[0m \u001b[38;5;66;03m# license = pystac.link.Link(rel='LICENSE', target=\"https://land.copernicus.eu/en/data-policy\")\u001b[39;00m\n\u001b[0;32m 151\u001b[0m \u001b[38;5;66;03m# item.add_link(license)\u001b[39;00m\n\u001b[1;32m--> 152\u001b[0m links \u001b[38;5;241m=\u001b[39m [CLMS_LICENSE, \u001b[43mCLMS_CATALOG_LINK\u001b[49m, ITEM_PARENT_LINK, COLLECTION_LINK]\n\u001b[0;32m 153\u001b[0m item\u001b[38;5;241m.\u001b[39madd_links(links)\n\u001b[0;32m 155\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m item\n", - "\u001b[1;31mNameError\u001b[0m: name 'CLMS_CATALOG_LINK' is not defined" - ] - } - ], - "source": [ - "item = create_item(img_path, root)\n", - "item" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "ename": "AttributeError", - "evalue": "'Item' object has no attribute 'normalize_hrefs'", - "output_type": "error", - "traceback": [ - "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[1;31mAttributeError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[1;32mIn[12], line 4\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;66;03m# import pystac.extensions\u001b[39;00m\n\u001b[0;32m 2\u001b[0m \u001b[38;5;66;03m# import pystac.extensions.projection\u001b[39;00m\n\u001b[1;32m----> 4\u001b[0m \u001b[43mitem\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mnormalize_hrefs\u001b[49m()\n", - "\u001b[1;31mAttributeError\u001b[0m: 'Item' object has no attribute 'normalize_hrefs'" - ] - } - ], - "source": [ - "# import pystac.extensions\n", - "# import pystac.extensions.projection\n", - "\n", - "item.normalize_hrefs()" - ] - }, - { - "cell_type": "code", - "execution_count": 72, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'bla': 'blu', 'blo': 42}" - ] - }, - "execution_count": 72, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "test_fun()" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [ - { - "ename": "FileNotFoundError", - "evalue": "[Errno 2] No such file or directory: 'x:\\\\projects\\\\ETC-DI\\\\Task_18\\\\clms-stac\\\\scripts\\\\clc\\\\stacs/corine-land-cover-raster/corine-land-cover-raster.json'", - "output_type": "error", - "traceback": [ - "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[1;31mFileNotFoundError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[1;32mIn[17], line 9\u001b[0m\n\u001b[0;32m 1\u001b[0m WORKING_DIR \u001b[38;5;241m=\u001b[39m os\u001b[38;5;241m.\u001b[39mgetcwd()\n\u001b[0;32m 3\u001b[0m CLMS_CATALOG_LINK \u001b[38;5;241m=\u001b[39m pystac\u001b[38;5;241m.\u001b[39mlink\u001b[38;5;241m.\u001b[39mLink(\n\u001b[0;32m 4\u001b[0m \u001b[38;5;66;03m# rel=pystac.RelType.ROOT, target=pystac.STACObject.from_file(os.path.join(WORKING_DIR, \"stacs/clms_catalog.json\"))\u001b[39;00m\n\u001b[0;32m 5\u001b[0m rel\u001b[38;5;241m=\u001b[39mpystac\u001b[38;5;241m.\u001b[39mRelType\u001b[38;5;241m.\u001b[39mROOT, target\u001b[38;5;241m=\u001b[39mpystac\u001b[38;5;241m.\u001b[39mSTACObject\u001b[38;5;241m.\u001b[39mfrom_file(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mx:\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124mprojects\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124mETC-DI\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124mTask_18\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124mclms-stac\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124mstacs\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124mclms_catalog.json\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 6\u001b[0m )\n\u001b[0;32m 7\u001b[0m COLLECTION_LINK \u001b[38;5;241m=\u001b[39m pystac\u001b[38;5;241m.\u001b[39mlink\u001b[38;5;241m.\u001b[39mLink(\n\u001b[0;32m 8\u001b[0m rel\u001b[38;5;241m=\u001b[39mpystac\u001b[38;5;241m.\u001b[39mRelType\u001b[38;5;241m.\u001b[39mCOLLECTION,\n\u001b[1;32m----> 9\u001b[0m target\u001b[38;5;241m=\u001b[39m\u001b[43mpystac\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mSTACObject\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_file\u001b[49m\u001b[43m(\u001b[49m\u001b[43mos\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpath\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mjoin\u001b[49m\u001b[43m(\u001b[49m\u001b[43mWORKING_DIR\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43mf\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mstacs/\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mCOLLECTION_ID\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m/\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mCOLLECTION_ID\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m.json\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m,\n\u001b[0;32m 10\u001b[0m )\n\u001b[0;32m 11\u001b[0m ITEM_PARENT_LINK \u001b[38;5;241m=\u001b[39m pystac\u001b[38;5;241m.\u001b[39mlink\u001b[38;5;241m.\u001b[39mLink(\n\u001b[0;32m 12\u001b[0m rel\u001b[38;5;241m=\u001b[39mpystac\u001b[38;5;241m.\u001b[39mRelType\u001b[38;5;241m.\u001b[39mPARENT,\n\u001b[0;32m 13\u001b[0m target\u001b[38;5;241m=\u001b[39mpystac\u001b[38;5;241m.\u001b[39mSTACObject\u001b[38;5;241m.\u001b[39mfrom_file(os\u001b[38;5;241m.\u001b[39mpath\u001b[38;5;241m.\u001b[39mjoin(WORKING_DIR, \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mstacs/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mCOLLECTION_ID\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mCOLLECTION_ID\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.json\u001b[39m\u001b[38;5;124m\"\u001b[39m)),\n\u001b[0;32m 14\u001b[0m )\n\u001b[0;32m 16\u001b[0m item\u001b[38;5;241m.\u001b[39mset_self_href(os\u001b[38;5;241m.\u001b[39mpath\u001b[38;5;241m.\u001b[39mjoin(WORKING_DIR, \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mSTAC_DIR\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mCOLLECTION_ID\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mitem\u001b[38;5;241m.\u001b[39mid\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mitem\u001b[38;5;241m.\u001b[39mid\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.json\u001b[39m\u001b[38;5;124m\"\u001b[39m))\n", - "File \u001b[1;32mc:\\Users\\boeck\\AppData\\Local\\miniforge3\\envs\\stacdev\\Lib\\site-packages\\pystac\\stac_object.py:619\u001b[0m, in \u001b[0;36mSTACObject.from_file\u001b[1;34m(cls, href, stac_io)\u001b[0m\n\u001b[0;32m 607\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Reads a STACObject implementation from a file.\u001b[39;00m\n\u001b[0;32m 608\u001b[0m \n\u001b[0;32m 609\u001b[0m \u001b[38;5;124;03mArgs:\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 616\u001b[0m \u001b[38;5;124;03m by the JSON read from the file located at HREF.\u001b[39;00m\n\u001b[0;32m 617\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m 618\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mcls\u001b[39m \u001b[38;5;241m==\u001b[39m STACObject:\n\u001b[1;32m--> 619\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m cast(S, \u001b[43mpystac\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread_file\u001b[49m\u001b[43m(\u001b[49m\u001b[43mhref\u001b[49m\u001b[43m)\u001b[49m)\n\u001b[0;32m 621\u001b[0m href \u001b[38;5;241m=\u001b[39m make_posix_style(href)\n\u001b[0;32m 623\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m stac_io \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n", - "File \u001b[1;32mc:\\Users\\boeck\\AppData\\Local\\miniforge3\\envs\\stacdev\\Lib\\site-packages\\pystac\\__init__.py:165\u001b[0m, in \u001b[0;36mread_file\u001b[1;34m(href, stac_io)\u001b[0m\n\u001b[0;32m 163\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m stac_io \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m 164\u001b[0m stac_io \u001b[38;5;241m=\u001b[39m StacIO\u001b[38;5;241m.\u001b[39mdefault()\n\u001b[1;32m--> 165\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mstac_io\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread_stac_object\u001b[49m\u001b[43m(\u001b[49m\u001b[43mhref\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[1;32mc:\\Users\\boeck\\AppData\\Local\\miniforge3\\envs\\stacdev\\Lib\\site-packages\\pystac\\stac_io.py:234\u001b[0m, in \u001b[0;36mStacIO.read_stac_object\u001b[1;34m(self, source, root, *args, **kwargs)\u001b[0m\n\u001b[0;32m 208\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mread_stac_object\u001b[39m(\n\u001b[0;32m 209\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[0;32m 210\u001b[0m source: HREF,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 213\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs: Any,\n\u001b[0;32m 214\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m STACObject:\n\u001b[0;32m 215\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Read a STACObject from a JSON file at the given source.\u001b[39;00m\n\u001b[0;32m 216\u001b[0m \n\u001b[0;32m 217\u001b[0m \u001b[38;5;124;03m See :func:`StacIO.read_text ` for usage of\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 232\u001b[0m \u001b[38;5;124;03m contained in the file at the given uri.\u001b[39;00m\n\u001b[0;32m 233\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[1;32m--> 234\u001b[0m d \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread_json\u001b[49m\u001b[43m(\u001b[49m\u001b[43msource\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 235\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstac_object_from_dict(\n\u001b[0;32m 236\u001b[0m d, href\u001b[38;5;241m=\u001b[39msource, root\u001b[38;5;241m=\u001b[39mroot, preserve_dict\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[0;32m 237\u001b[0m )\n", - "File \u001b[1;32mc:\\Users\\boeck\\AppData\\Local\\miniforge3\\envs\\stacdev\\Lib\\site-packages\\pystac\\stac_io.py:205\u001b[0m, in \u001b[0;36mStacIO.read_json\u001b[1;34m(self, source, *args, **kwargs)\u001b[0m\n\u001b[0;32m 188\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mread_json\u001b[39m(\u001b[38;5;28mself\u001b[39m, source: HREF, \u001b[38;5;241m*\u001b[39margs: Any, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs: Any) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28mdict\u001b[39m[\u001b[38;5;28mstr\u001b[39m, Any]:\n\u001b[0;32m 189\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Read a dict from the given source.\u001b[39;00m\n\u001b[0;32m 190\u001b[0m \n\u001b[0;32m 191\u001b[0m \u001b[38;5;124;03m See :func:`StacIO.read_text ` for usage of\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 203\u001b[0m \u001b[38;5;124;03m given source.\u001b[39;00m\n\u001b[0;32m 204\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[1;32m--> 205\u001b[0m txt \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread_text\u001b[49m\u001b[43m(\u001b[49m\u001b[43msource\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 206\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mjson_loads(txt)\n", - "File \u001b[1;32mc:\\Users\\boeck\\AppData\\Local\\miniforge3\\envs\\stacdev\\Lib\\site-packages\\pystac\\stac_io.py:282\u001b[0m, in \u001b[0;36mDefaultStacIO.read_text\u001b[1;34m(self, source, *_, **__)\u001b[0m\n\u001b[0;32m 277\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"A concrete implementation of :meth:`StacIO.read_text\u001b[39;00m\n\u001b[0;32m 278\u001b[0m \u001b[38;5;124;03m`. Converts the ``source`` argument to a string (if it\u001b[39;00m\n\u001b[0;32m 279\u001b[0m \u001b[38;5;124;03mis not already) and delegates to :meth:`DefaultStacIO.read_text_from_href` for\u001b[39;00m\n\u001b[0;32m 280\u001b[0m \u001b[38;5;124;03mopening and reading the file.\"\"\"\u001b[39;00m\n\u001b[0;32m 281\u001b[0m href \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mstr\u001b[39m(os\u001b[38;5;241m.\u001b[39mfspath(source))\n\u001b[1;32m--> 282\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread_text_from_href\u001b[49m\u001b[43m(\u001b[49m\u001b[43mhref\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[1;32mc:\\Users\\boeck\\AppData\\Local\\miniforge3\\envs\\stacdev\\Lib\\site-packages\\pystac\\stac_io.py:305\u001b[0m, in \u001b[0;36mDefaultStacIO.read_text_from_href\u001b[1;34m(self, href)\u001b[0m\n\u001b[0;32m 303\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCould not read uri \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mhref\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01me\u001b[39;00m\n\u001b[0;32m 304\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m--> 305\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28;43mopen\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mhref\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mencoding\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mutf-8\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mas\u001b[39;00m f:\n\u001b[0;32m 306\u001b[0m href_contents \u001b[38;5;241m=\u001b[39m f\u001b[38;5;241m.\u001b[39mread()\n\u001b[0;32m 307\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m href_contents\n", - "\u001b[1;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: 'x:\\\\projects\\\\ETC-DI\\\\Task_18\\\\clms-stac\\\\scripts\\\\clc\\\\stacs/corine-land-cover-raster/corine-land-cover-raster.json'" - ] - } - ], - "source": [ - "WORKING_DIR = os.getcwd()\n", - "\n", - "CLMS_CATALOG_LINK = pystac.link.Link(\n", - " # rel=pystac.RelType.ROOT, target=pystac.STACObject.from_file(os.path.join(WORKING_DIR, \"stacs/clms_catalog.json\"))\n", - " rel=pystac.RelType.ROOT, target=pystac.STACObject.from_file(\"x:\\\\projects\\\\ETC-DI\\\\Task_18\\\\clms-stac\\\\stacs\\\\clms_catalog.json\")\n", - ")\n", - "COLLECTION_LINK = pystac.link.Link(\n", - " rel=pystac.RelType.COLLECTION,\n", - " target=pystac.STACObject.from_file(os.path.join(WORKING_DIR, f\"stacs/{COLLECTION_ID}/{COLLECTION_ID}.json\")),\n", - ")\n", - "ITEM_PARENT_LINK = pystac.link.Link(\n", - " rel=pystac.RelType.PARENT,\n", - " target=pystac.STACObject.from_file(os.path.join(WORKING_DIR, f\"stacs/{COLLECTION_ID}/{COLLECTION_ID}.json\")),\n", - ")\n", - "\n", - "item.set_self_href(os.path.join(WORKING_DIR, f\"{STAC_DIR}/{COLLECTION_ID}/{item.id}/{item.id}.json\"))\n", - "item.save_object()" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'x:\\\\projects\\\\ETC-DI\\\\Task_18\\\\clms-stac\\\\scripts\\\\clc'" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 62, - "metadata": {}, - "outputs": [], - "source": [ - "# # Taken from https://stackoverflow.com/questions/2148119/how-to-convert-an-xml-string-to-a-dictionary\n", - "# from xml.etree import cElementTree as ElementTree\n", - "\n", - "\n", - "# class XmlListConfig(list):\n", - "# def __init__(self, aList):\n", - "# for element in aList:\n", - "# if element:\n", - "# if len(element) == 1 or element[0].tag != element[1].tag:\n", - "# self.append(XmlDictConfig(element))\n", - "# elif element[0].tag == element[1].tag:\n", - "# self.append(XmlListConfig(element))\n", - "# elif element.text:\n", - "# text = element.text.strip()\n", - "# if text:\n", - "# self.append(text)\n", - "\n", - "\n", - "# class XmlDictConfig(dict):\n", - "# def __init__(self, parent_element):\n", - "# if parent_element.items():\n", - "# self.update(dict(parent_element.items()))\n", - "# for element in parent_element:\n", - "# if element:\n", - "# if len(element) == 1 or element[0].tag != element[1].tag:\n", - "# aDict = XmlDictConfig(element)\n", - "# else:\n", - "# aDict = {element[0].tag: XmlListConfig(element)}\n", - "# if element.items():\n", - "# aDict.update(dict(element.items()))\n", - "# self.update({element.tag: aDict})\n", - "# elif element.items():\n", - "# self.update({element.tag: dict(element.items())})\n", - "# else:\n", - "# self.update({element.tag: element.text})\n", - "\n", - "# stac_io = pystac.StacIO.default()\n", - "\n", - "# def get_metadata(xml: str):\n", - "# result = XmlDictConfig(ElementTree.XML(stac_io.read_text(xml)))\n", - "# result[\n", - "# \"ORIGINAL_URL\"\n", - "# ] = xml # Include the original URL in the metadata for use later\n", - "# return result\n", - "\n", - "\n", - "# xml_path = '../CLC_samples/U2018_CLC2018_V2020_20u1.xml'\n", - "\n", - "# get_metadata(xml_path)\n" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "stacdev", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.3" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/scripts/clc/reproject_data_bounds.ipynb b/scripts/clc/reproject_data_bounds.ipynb deleted file mode 100644 index 19a307d..0000000 --- a/scripts/clc/reproject_data_bounds.ipynb +++ /dev/null @@ -1,112 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import rasterio as rio\n", - "from rasterio.warp import Resampling\n", - "from shapely.geometry import box, mapping\n", - "from shapely import envelope" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "img_path = 'X:/EO/u2018_clc2018_v2020_20u1_raster100m/DATA/U2018_CLC2018_V2020_20u1.tif'\n", - "\n", - "\n", - "dst_crs = rio.CRS.from_epsg(4326)" - ] - }, - { - "cell_type": "code", - "execution_count": 60, - "metadata": {}, - "outputs": [], - "source": [ - "def project_data_window_bbox(src: rio.open, dst_crs: rio.CRS = rio.CRS.from_epsg(4326)) -> tuple:\n", - " data, transform = rio.warp.reproject(source=src.read(),\n", - " src_transform=src.transform,\n", - " src_crs=src.crs,\n", - " dst_crs=dst_crs,\n", - " dst_nodata=src.nodata,\n", - " dst_resolution=(0.25, 0.25),\n", - " resampling=Resampling.max)\n", - " \n", - " data_window = rio.windows.get_data_window(data, nodata=src.nodata)\n", - " bbox = rio.windows.bounds(data_window, transform=transform)\n", - " print(data.shape)\n", - " # bbox_raw = rio.windows.bounds(data_window, transform=transform)\n", - " # bbox_buff = box(*bbox_raw).buffer(0.25 / 2)\n", - " # bbox = envelope(bbox_buff).bounds\n", - " return(bbox)" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - } - ], - "source": [ - "with rio.open(img_path) as src:\n", - " print(type(src))\n", - " #bbox = project_data_window_bbox(src)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [], - "source": [ - "## To export results for examination in QGIS\n", - "\n", - "# profile = src.profile\n", - "# profile.update(transform=transform, driver='GTiff', height=data.shape[1], width=data.shape[2], crs=rio.CRS.from_epsg(4326))\n", - "\n", - "# with rio.open(dst_path, 'w', **profile) as dst:\n", - "# dst.write(data)\n", - "\n", - "# import geopandas as gpd\n", - "\n", - "# x = gpd.GeoSeries(box(*bbox), crs=dst_crs)\n", - "# x.to_file('X:/EO/u2018_clc2018_v2020_20u1_rio4326_box025max.gpkg')\n" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": ".venv", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.3" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} From 3192d5ce815e0649bc765b27046b021e880e579e Mon Sep 17 00:00:00 2001 From: chorng Date: Tue, 7 May 2024 10:32:16 +0200 Subject: [PATCH 67/80] Remove .DS_Store * remove .DS_Store * add .DS_Store to gitignore --- .DS_Store | Bin 6148 -> 0 bytes .gitignore | 3 +++ schema/.DS_Store | Bin 6148 -> 0 bytes scripts/.DS_Store | Bin 8196 -> 0 bytes 4 files changed, 3 insertions(+) delete mode 100644 .DS_Store delete mode 100644 schema/.DS_Store delete mode 100644 scripts/.DS_Store diff --git a/.DS_Store b/.DS_Store deleted file mode 100644 index a73d78f03ed74e4910ec06c8f245e9062651c22e..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6148 zcmeHK%}T>S5T32orijpkLh-cVwPI_fAYMY%7cim+m73V1!I&*gY7e21vp?@6K8Vlb z%98|(W8HX=~_DScYU@b*Nq0hJjKNuzb zAX34cHyi#U1N82eAb~5mfgVi1zn7pNg-Kekzlp*^adD~Sl$>Sf!n;u;FZI)5()I^u zv^rKQ3MO+uI0<{BPGxmp#i<|0y@3u0yFGL{KMmuq8nx9h?j}0cGaF9XDR(N>ENiq{ z&6;d9$2FPlZr5var?oR4mz}kZt-YhpZFCo_2ScR5Z%)gK#R)v(WGbh7ZxBZ+zD7I6 zlytrW2%!%tB%i+6*I&N^|07UwY)Pi&BB`8K(bS>rvaR)`1P(%|dY>OdG zINGK4b1miuO*jbKdPx#XzUO4LhK67tIpeuUsemtyp#Qd~vVf_6z0MAu?&5G^SDBcN&E Kff@K!2HpW6|BD9z diff --git a/.gitignore b/.gitignore index 4b381b7..deabeb3 100644 --- a/.gitignore +++ b/.gitignore @@ -100,3 +100,6 @@ target/ # Pycopy __pycache__/ + +# macOS +.DS_Store diff --git a/schema/.DS_Store b/schema/.DS_Store deleted file mode 100644 index 795a8847d734afe9e31c3119ffe63b4876fb75d1..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6148 zcmeHK%}T>S5T4blTSUk~p~nTU1zVMZcnP(>fDt{Y)W#GI#%yU)dnkpR^@V&ApU0Wq z4Op6kClNaXyWi~m>}Ed5{s4gJPNFt|2LKWqp&+F~$Xw}KbHRkd%rSun$RP`-*{EWo zzi6UwmoS1cXo%q3_lJoN(f1LKk~qs-txvJGUfbB*l3Q|H-UUy38svj~l642;8ya0| zorI-62(RMdwD0Yn=`

                      2Pck;%JB=x7TqR>1kI_(kL^zfpy5btoOafY}W2{j+&}{ zG;gZe$ziLhjyuQmd0p=9ADmwFpOcqVzgaQ`@@{0uVF~Z3EY;D-#*{-CfC`VI?=X6wL?uFvGJ5t5)zZwW%>(05o^ zL=TFvsfadJxF?3N>1bCj&UaW?wCNzs$~cc(xqQ3`vpU+92?ya@k^@U3S4rm`~7^n3s b(lO3=SXiW4$ZpaR`66Hl;f@*j1qQwVu*gkQ diff --git a/scripts/.DS_Store b/scripts/.DS_Store deleted file mode 100644 index 64e4227841b26927f89261dee143e2eb51ae82b8..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 8196 zcmeHMU2GIp6u#fIzziMe6e)E1+l9)1g)I~UB4qmmw56nkUFfzH>+a4#2c|Q1XLc88 zNJ@-R6nrr1n?a*KiYAIi5{dCA(FaW=4Mw6y6OAUu2NM&+i+JwbS!{von<44k8Q|qp$qFpPvTSht{XIKunXVU)UxQRpIc@q3d4^miA5IQBrKFef z3T{g#e~5D_$FZ{Ic_w?%G)e>7+*Zf-GNx_heFIZBNTt8Wv~{P{;uLJ%^=*@4K~`jC zKwB^}vSH(fcth-g#`xyb4Y84}oN3zJbo#U+*VeD!wtwKbHDo*IL=Xrz0ygWmpw5qv z$a0IV;)_JZu5v`z%MmFvtJGPMbZ@#(?N6II$4;83i~+6MrLa1P-Iq6t6i!dhu?j=B zJD}C%GFGl&Wjv$VvbZwIs@+`Pv8;WLYkH<*_ohALgjaSnVJY3`I(e_vcQ&AD1=rYb zkT0`nw2?=0HHSZ~dzia%4;p!wXni6s!c#8Jp1)}6@-^$5w(e-%-JKk()@tV5r`D={ zltbGykC;ZjH{<3ETQ}{a2TG>y4fYgG*UVZ*%FYZKN{tj(e5<<1f=IfMH6JaQ-bq=j zm&fMRXw~YHNKDZ#$^AP0n7xk9J?j2SWJ%&QYdWs@KA>5{Jmut|Vq;t;D5XIvPaU#+p zYoE#w^JjNuT(5iBu<7wU_BQ%iyxM4NW7X{)oqM_>5fS@8xhQ9$&mGLQizXqSUWNd^EX(y}$kiuSiRpx_DH=b`7PA#> z3!R%Oc7!?X3_Hgzu(#R!>4S(XMG*eQgS<-B2zO-6eBdwL1q}@`x)FE|BNvU5lrDGxfA(#(dq@HuA*F?7xBiudx1- zU1#63U)Ue)2BxD58lqT*b!Z|G??OBFpbL8m$Z7Q9kPqgg1m$Bm4i80w^GO2qV|W}- z;7Odtvv>~A`=EZ=2lZ>Xh&S*i-ohnZ!3X#dAK_zsf-mqDe!LB|T@yizpT>i>n9SRj zV;?118RQeyrwmsiBCX^1|DBV6|DQ7X2m=i>FcmX^%C=-%3q?NJe#r0IG3t*{=Z))) y3hJBC;9tjy`qy!yxBg*B^BAden?yiI1tkf!|NTS2UH`fc&;RiJ_u)8YhQ9&2;bzhR From e6997d7d5b0af65e3528bef209dbf1d080ba9235 Mon Sep 17 00:00:00 2001 From: Sebastian Boeck Date: Tue, 7 May 2024 13:48:36 +0200 Subject: [PATCH 68/80] adds title to clms license --- scripts/clc/constants.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scripts/clc/constants.py b/scripts/clc/constants.py index f82b3a2..c13d0ea 100644 --- a/scripts/clc/constants.py +++ b/scripts/clc/constants.py @@ -75,7 +75,9 @@ # Items -CLMS_LICENSE = pystac.link.Link(rel='LICENSE', target="https://land.copernicus.eu/en/data-policy") +CLMS_LICENSE = pystac.link.Link(rel=pystac.RelType.LICENSE, + target="https://land.copernicus.eu/en/data-policy", + title='Legal notice on the use of CLMS data') DOM_MAP = { 'GLP': 'Guadeloupe', From 944b4695bb10e365c29bc6faa1fae8a4dbd3039c Mon Sep 17 00:00:00 2001 From: Sebastian Boeck Date: Tue, 7 May 2024 16:21:36 +0200 Subject: [PATCH 69/80] adds dummy preview to item assets --- scripts/clc/item.py | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/scripts/clc/item.py b/scripts/clc/item.py index 905f278..ceb0be9 100644 --- a/scripts/clc/item.py +++ b/scripts/clc/item.py @@ -34,10 +34,15 @@ def deconstruct_clc_name(filename: str) -> dict[str]: + filename_split = { + 'dirname': os.path.dirname(filename), + 'basename': os.path.basename(filename) + } p = re.compile('^(?P[A-Z0-9a-z_-]*)\\.(?P.*)$') - m = p.search(os.path.basename(filename)) - - filename_split = m.groupdict() + m = p.search(filename_split['basename']) + + if m: + filename_split |= m.groupdict() p = re.compile(("U(?P[0-9]{4})_" "(?PCLC|CHA)(?P[0-9]{4})_" @@ -47,9 +52,9 @@ def deconstruct_clc_name(filename: str) -> dict[str]: m = p.search(filename_split['id']) if m: - return m.groupdict() | filename_split - else: - return filename_split + filename_split |= m.groupdict() + + return filename_split def create_item_asset(asset_file: str, DOM_code: str) -> pystac.Asset: @@ -127,7 +132,6 @@ def project_data_window_bbox(src: rio.io.DatasetReader, dst_crs: rio.CRS = rio.C return bbox def create_item(img_path: str, data_root: str) -> pystac.Item: - clc_name_elements = deconstruct_clc_name(img_path) asset_files = get_item_asset_files(data_root, img_path) @@ -167,6 +171,16 @@ def create_item(img_path: str, data_root: str) -> pystac.Item: except KeyError as e: print("An error occured:", e) + # TODO: "Thumbnail" was originally put at collection level in the template, while it should perhaps be at item level? Individual previews should be added to each item + key = 'preview' + asset = pystac.Asset( + href='https://sdi.eea.europa.eu/public/catalogue-graphic-overview/960998c1-1870-4e82-8051-6485205ebbac.png', + title=ITEM_TITLE_MAP['preview'].format(label=clc_name_elements['DOM_code']), + media_type=ITEM_MEDIA_TYPE_MAP[key], + roles=ITEM_ROLES_MAP[key] + ) + + item.add_asset(key=key, asset=asset) proj_ext = ProjectionExtension.ext(item.assets[os.path.basename(img_path).replace('.', '_')], add_if_missing=True) proj_ext.apply(epsg=rio.crs.CRS(img.crs).to_epsg(), From 1c44f51f0a29a27b196219e58577a7ef2bcd85c0 Mon Sep 17 00:00:00 2001 From: Sebastian Boeck Date: Tue, 7 May 2024 16:22:27 +0200 Subject: [PATCH 70/80] adds entries for (dummy) previews --- scripts/clc/constants.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/scripts/clc/constants.py b/scripts/clc/constants.py index c13d0ea..ec31aad 100644 --- a/scripts/clc/constants.py +++ b/scripts/clc/constants.py @@ -100,6 +100,7 @@ 'tfw': pystac.MediaType.TEXT, 'xml': pystac.MediaType.XML, 'readme_txt': pystac.MediaType.TEXT, + 'preview': pystac.MediaType.PNG, } ITEM_ROLES_MAP = { @@ -114,6 +115,7 @@ 'tfw': ['metadata'], 'xml': ['metadata'], 'readme_txt': ['metadata'], + 'preview': ['thumbnail'], } ITEM_TITLE_MAP = { @@ -128,6 +130,7 @@ 'tfw': 'World File {label}', 'xml': 'Single Band Land Classification Metadata {label}', 'readme_txt': 'Description {label}', + 'preview': 'Single Band Land Classification Thumbnail {label}' } CLC_PROVIDER = pystac.provider.Provider( From 83cd87a6df26a70d945c36406dab5b0fe0da1b4d Mon Sep 17 00:00:00 2001 From: Sebastian Boeck Date: Tue, 7 May 2024 16:22:39 +0200 Subject: [PATCH 71/80] adds dev notebook --- .gitignore | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 26bc096..1f65787 100644 --- a/.gitignore +++ b/.gitignore @@ -102,4 +102,5 @@ target/ __pycache__/ clms-stac.code-workspace -stac_tests/* \ No newline at end of file +stac_tests/* +scripts/clc/dev.ipynb \ No newline at end of file From 4872adb571e2ef805e885125391752c92edef982 Mon Sep 17 00:00:00 2001 From: chorng Date: Tue, 7 May 2024 16:42:50 +0200 Subject: [PATCH 72/80] Refactor scripts * package scripts.uabh * move constants to constants.py * break create_collection function into small functions * add create_uabh_collection.py to create items * update schema * update sample --- create_uabh_collection.py | 18 ++ schema/products/uabh.json | 8 +- scripts/uabh/collection.py | 143 ++++++++++++++ scripts/uabh/constants.py | 33 ++-- scripts/uabh/uabh_collection.py | 176 ------------------ .../urban-atlas-building-height.json | 10 +- 6 files changed, 189 insertions(+), 199 deletions(-) create mode 100644 create_uabh_collection.py create mode 100644 scripts/uabh/collection.py delete mode 100644 scripts/uabh/uabh_collection.py diff --git a/create_uabh_collection.py b/create_uabh_collection.py new file mode 100644 index 0000000..18bc67d --- /dev/null +++ b/create_uabh_collection.py @@ -0,0 +1,18 @@ +import logging +from glob import glob + +from scripts.uabh.collection import create_uabh_collection, get_stac_validator +from scripts.uabh.constants import COLLECTION_ID, STAC_DIR, WORKING_DIR + +LOGGER = logging.getLogger(__name__) + + +def main(): + logging.basicConfig(filename="create_uabh_collection.log") + item_list = glob(f"{WORKING_DIR}/{STAC_DIR}/{COLLECTION_ID}/**/*.json") + validator = get_stac_validator("schema/products/uabh.json") + create_uabh_collection(item_list, validator) + + +if __name__ == "__main__": + main() diff --git a/schema/products/uabh.json b/schema/products/uabh.json index f14b366..4ac3fea 100644 --- a/schema/products/uabh.json +++ b/schema/products/uabh.json @@ -170,8 +170,8 @@ "stac_version": { "const": "1.0.0" }, "stac_extensions": { "const": [ - "https://stac-extensions.github.io/item-assets/v1.0.0/schema.json", - "https://stac-extensions.github.io/projection/v1.1.0/schema.json" + "https://stac-extensions.github.io/projection/v1.1.0/schema.json", + "https://stac-extensions.github.io/item-assets/v1.0.0/schema.json" ] }, "type": { "const": "Collection" }, @@ -192,7 +192,7 @@ "name": "Copernicus Land Monitoring Service", "description": "The Copernicus Land Monitoring Service provides geographical information on land cover and its changes, land use, ground motions, vegetation state, water cycle and Earth's surface energy variables to a broad range of users in Europe and across the World in the field of environmental terrestrial applications.", "roles": ["licensor", "host"], - "url": "https://land.copernicus.eu/en" + "url": "https://land.copernicus.eu" } ] }, @@ -202,7 +202,7 @@ "bbox": [[-22.13, 35.07, 33.48, 64.38]] }, "temporal": { - "interval": [["2012-01-01T00:00:00.000Z", null]] + "interval": [["2012-01-01T00:00:00Z", null]] } } }, diff --git a/scripts/uabh/collection.py b/scripts/uabh/collection.py new file mode 100644 index 0000000..80b1199 --- /dev/null +++ b/scripts/uabh/collection.py @@ -0,0 +1,143 @@ +from __future__ import annotations + +import json +import logging +import os +from enum import Enum + +import pystac +from jsonschema import Draft7Validator +from jsonschema.exceptions import best_match +from pystac.extensions.item_assets import AssetDefinition, ItemAssetsExtension +from pystac.extensions.projection import ProjectionExtension +from pystac.link import Link +from pystac.media_type import MediaType +from referencing import Registry, Resource + +from .constants import ( + CLMS_LICENSE, + COLLECTION_DESCRIPTION, + COLLECTION_EXTENT, + COLLECTION_ID, + COLLECTION_KEYWORD, + COLLECTION_TITLE, + HOST_AND_LICENSOR, + STAC_DIR, + WORKING_DIR, +) + +LOGGER = logging.getLogger(__name__) + + +class CollectionCreationError(Exception): + pass + + +class UABHItemAssets(Enum): + dataset = AssetDefinition({"title": "Building height raster", "media_type": MediaType.GEOTIFF, "roles": ["data"]}) + metadata = AssetDefinition( + {"title": "Building height metadata", "media_type": MediaType.XML, "roles": ["metadata"]} + ) + quality_check_report = AssetDefinition( + {"title": "Quality check report", "media_type": MediaType.PDF, "roles": ["metadata"]} + ) + quality_control_report = AssetDefinition( + {"title": "Quality control report", "media_type": MediaType.PDF, "roles": ["metadata"]} + ) + pixel_based_info_shp = AssetDefinition( + {"title": "Pixel based info shape format", "media_type": "application/octet-stream", "roles": ["metadata"]} + ) + pixel_based_info_shx = AssetDefinition( + {"title": "Pixel based info shape index", "media_type": "application/octet-stream", "roles": ["metadata"]} + ) + pixel_based_info_dbf = AssetDefinition( + {"title": "Pixel based info attribute", "media_type": "application/x-dbf", "roles": ["metadata"]} + ) + pixel_based_info_prj = AssetDefinition( + {"title": "Pixel based info projection description", "media_type": "text/plain", "roles": ["metadata"]} + ) + pixel_based_info_cpg = AssetDefinition( + {"title": "Pixel based info character encoding", "media_type": "text/plain", "roles": ["metadata"]} + ) + compressed_dataset = AssetDefinition( + {"title": "Compressed building height raster", "media_type": "application/zip", "roles": ["data"]} + ) + + +def get_stac_validator(product_schema: str) -> Draft7Validator: + with open(product_schema, encoding="utf-8") as f: + schema = json.load(f) + registry = Registry().with_resources( + [("http://example.com/schema.json", Resource.from_contents(schema))], + ) + return Draft7Validator({"$ref": "http://example.com/schema.json"}, registry=registry) + + +def create_core_collection() -> pystac.Collection: + return pystac.Collection( + id=COLLECTION_ID, + description=COLLECTION_DESCRIPTION, + extent=COLLECTION_EXTENT, + title=COLLECTION_TITLE, + keywords=COLLECTION_KEYWORD, + providers=[HOST_AND_LICENSOR], + ) + + +def add_summaries_to_collection(collection: pystac.Collection, epsg_list: list[int]) -> None: + summaries = ProjectionExtension.summaries(collection, add_if_missing=True) + summaries.epsg = epsg_list + + +def add_item_assets_to_collection(collection: pystac.Collection, item_asset_class: Enum) -> None: + item_assets = ItemAssetsExtension.ext(collection, add_if_missing=True) + item_assets.item_assets = {asset.name: asset.value for asset in item_asset_class} + + +def add_links_to_collection(collection: pystac.Collection, link_list: list[Link]) -> None: + for link in link_list: + collection.links.append(link) + + +def add_items_to_collection(collection: pystac.Collection, item_list: list[str]) -> None: + for item in item_list: + stac_object = pystac.read_file(item) + collection.add_item(stac_object, title=stac_object.id) + + +def create_collection(item_list: list[str]) -> None: + try: + collection = create_core_collection() + + # summaries + epsg_list = [3035] + add_summaries_to_collection(collection, epsg_list) + + # extensions + add_item_assets_to_collection(collection, UABHItemAssets) + + # links + link_list = [CLMS_LICENSE] + add_links_to_collection(collection, link_list) + + # add items + add_items_to_collection(collection, item_list) + + # add self, root and parent links + collection.set_self_href(os.path.join(WORKING_DIR, f"{STAC_DIR}/{collection.id}/{collection.id}.json")) + catalog = pystac.read_file(f"{WORKING_DIR}/{STAC_DIR}/clms_catalog.json") + collection.set_root(catalog) + collection.set_parent(catalog) + except Exception as error: + raise CollectionCreationError(f"Failed to create Urban Atlas Building Height collection. Reason: {error}.") + return collection + + +def create_uabh_collection(item_list: list[str], validator: Draft7Validator) -> None: + try: + collection = create_collection(item_list) + error_msg = best_match(validator.iter_errors(collection.to_dict())) + assert error_msg is None, f"Failed to create {collection.id} collection. Reason: {error_msg}." + collection.save_object() + except (AssertionError, CollectionCreationError) as error: + LOGGER.error(error) diff --git a/scripts/uabh/constants.py b/scripts/uabh/constants.py index e750b94..d01b2dc 100644 --- a/scripts/uabh/constants.py +++ b/scripts/uabh/constants.py @@ -1,10 +1,29 @@ import os +from datetime import datetime from typing import Final import pystac from pystac.link import Link from pystac.provider import ProviderRole +STAC_DIR = "stac_tests" +WORKING_DIR = os.getcwd() +CLMS_CATALOG_LINK: Final[Link] = Link( + rel=pystac.RelType.ROOT, target=pystac.STACObject.from_file(os.path.join(WORKING_DIR, "stacs/clms_catalog.json")) +) +CLMS_LICENSE: Final[Link] = Link(rel="license", target="https://land.copernicus.eu/en/data-policy") +COLLECTION_DESCRIPTION = "Urban Atlas building height over capital cities." +COLLECTION_EXTENT = pystac.Extent( + spatial=pystac.SpatialExtent([[-22.13, 35.07, 33.48, 64.38]]), + temporal=pystac.TemporalExtent([[datetime(year=2012, month=1, day=1), None]]), +) +COLLECTION_ID = "urban-atlas-building-height" +COLLECTION_KEYWORD = ["Buildings", "Building height", "Elevation"] +COLLECTION_LINK: Final[Link] = Link( + rel=pystac.RelType.COLLECTION, + target=pystac.STACObject.from_file(os.path.join(WORKING_DIR, f"stacs/{COLLECTION_ID}/{COLLECTION_ID}.json")), +) +COLLECTION_TITLE = "Urban Atlas Building Height 10m" HOST_AND_LICENSOR: Final[pystac.Provider] = pystac.Provider( name="Copernicus Land Monitoring Service", description=( @@ -19,20 +38,6 @@ roles=[ProviderRole.LICENSOR, ProviderRole.HOST], url="https://land.copernicus.eu", ) - -COLLECTION_ID = "urban-atlas-building-height" - -CLMS_LICENSE: Final[Link] = Link(rel="license", target="https://land.copernicus.eu/en/data-policy") - -WORKING_DIR = os.getcwd() -STAC_DIR = "stac_tests" -CLMS_CATALOG_LINK: Final[Link] = Link( - rel=pystac.RelType.ROOT, target=pystac.STACObject.from_file(os.path.join(WORKING_DIR, "stacs/clms_catalog.json")) -) -COLLECTION_LINK: Final[Link] = Link( - rel=pystac.RelType.COLLECTION, - target=pystac.STACObject.from_file(os.path.join(WORKING_DIR, f"stacs/{COLLECTION_ID}/{COLLECTION_ID}.json")), -) ITEM_PARENT_LINK: Final[Link] = Link( rel=pystac.RelType.PARENT, target=pystac.STACObject.from_file(os.path.join(WORKING_DIR, f"stacs/{COLLECTION_ID}/{COLLECTION_ID}.json")), diff --git a/scripts/uabh/uabh_collection.py b/scripts/uabh/uabh_collection.py deleted file mode 100644 index 38d6505..0000000 --- a/scripts/uabh/uabh_collection.py +++ /dev/null @@ -1,176 +0,0 @@ -from __future__ import annotations - -import json -import os -from datetime import datetime -from typing import Final - -import pystac -import rasterio as rio -from jsonschema import Draft7Validator -from jsonschema.exceptions import best_match -from pyproj import Transformer -from pystac import Extent, SpatialExtent, TemporalExtent -from pystac.extensions.item_assets import AssetDefinition, ItemAssetsExtension -from pystac.link import Link -from pystac.provider import ProviderRole -from rasterio.coords import BoundingBox -from rasterio.crs import CRS -from referencing import Registry, Resource -from shapely.geometry import Polygon, box - -KEY = "/Users/xiaomanhuang/pl/ETCDI_STAC/uabh_samples/AT001_WIEN_UA2012_DHM_v020" -head, tail = os.path.split(KEY) -(product_id, product_version) = tail.rsplit("_", 1) -PATH_Dataset = os.path.join(KEY, "Dataset/" + tail + ".tif") - -HOST_AND_LICENSOR: Final[pystac.Provider] = pystac.Provider( - name="Copernicus Land Monitoring Service", - description=( - "The Copernicus Land Monitoring Service provides " - "geographical information on land cover and its " - "changes, land use, ground motions, vegetation state, " - "water cycle and Earth's surface energy variables to " - "a broad range of users in Europe and across the " - "World in the field of environmental terrestrial " - "applications." - ), - roles=[ProviderRole.LICENSOR, ProviderRole.HOST], - url="https://land.copernicus.eu", -) - -COLLECTION_id = "urban-atlas-building-height" -COLLECTION_title = "Urban Atlas Building Height 10m" -COLLECTION_description = "Urban Atlas building height over capital cities." -COLLECTION_keywords = ["Buildings", "Building height", "Elevation"] - -# links -CLMS_LICENSE: Final[Link] = Link( - rel="license", - target="https://land.copernicus.eu/en/data-policy", - title="Legal notice on the use of CLMS data", -) - -WORKING_DIR = os.getcwd() -CLMS_CATALOG_LINK: Final[Link] = Link( - rel=pystac.RelType.ROOT, - target=pystac.STACObject.from_file(os.path.join(WORKING_DIR, "stacs/clms_catalog.json")), - title="CLMS Catalog", -) - -CLMS_PARENT_LINK: Final[Link] = Link( - rel=pystac.RelType.PARENT, - target=pystac.STACObject.from_file(os.path.join(WORKING_DIR, "stacs/clms_catalog.json")), - title="CLMS Catalog", -) - - -def get_metadata_from_tif(key: str) -> tuple[BoundingBox, CRS, int, int]: - with rio.open(key) as tif: - bounds = tif.bounds - crs = tif.crs - height = tif.height - width = tif.width - tif.close() - return (bounds, crs, height, width) - - -def get_geom_wgs84(bounds: BoundingBox, crs: CRS) -> Polygon: - transformer = Transformer.from_crs(crs.to_epsg(), 4326) - miny, minx = transformer.transform(bounds.left, bounds.bottom) - maxy, maxx = transformer.transform(bounds.right, bounds.top) - bbox = (minx, miny, maxx, maxy) - return box(*bbox) - - -def get_datetime(product_id: str) -> tuple[datetime, datetime]: - year = int(product_id.split("_")[2][2:]) - return (datetime(year=year, month=1, day=1), datetime(year=year, month=12, day=31)) - - -def get_collection_extent(bbox, start_datetime) -> Extent: - spatial_extent = SpatialExtent(bboxes=bbox) - temporal_extent = TemporalExtent(intervals=[[start_datetime, None]]) - return Extent(spatial=spatial_extent, temporal=temporal_extent) - - -def get_stac_validator(product_schema: str) -> Draft7Validator: - with open(product_schema, encoding="utf-8") as f: - schema = json.load(f) - registry = Registry().with_resources( - [("http://example.com/schema.json", Resource.from_contents(schema))], - ) - return Draft7Validator({"$ref": "http://example.com/schema.json"}, registry=registry) - - -if __name__ == "__main__": - head, tail = os.path.split(KEY) - (product_id,) = tail.split(".")[0].rsplit("_", 0) - bounds, crs, height, width = get_metadata_from_tif(PATH_Dataset) - geom_wgs84 = get_geom_wgs84(bounds, crs) - start_datetime, end_datetime = get_datetime(product_id) - COLLECTION_extent = get_collection_extent(list(geom_wgs84.bounds), start_datetime) - COLLECTION_summaries = pystac.Summaries({"proj:epsg": [crs.to_epsg()]}) - - collection = pystac.Collection( - stac_extensions=[ - "https://stac-extensions.github.io/item-assets/v1.0.0/schema.json", - "https://stac-extensions.github.io/projection/v1.1.0/schema.json", - ], - id=COLLECTION_id, - title=COLLECTION_title, - description=COLLECTION_description, - keywords=COLLECTION_keywords, - extent=COLLECTION_extent, - summaries=COLLECTION_summaries, - providers=[HOST_AND_LICENSOR], - ) - - # add item assets - add_item_assets = ItemAssetsExtension.ext(collection, add_if_missing=True) - add_item_assets.item_assets = { - "dataset": AssetDefinition( - {"title": "Building height raster", "media_type": pystac.MediaType.GEOTIFF, "roles": ["data"]} - ), - "quality_check_report": AssetDefinition( - {"title": "Quality check report", "media_type": pystac.MediaType.PDF, "roles": ["metadata"]} - ), - "metadata": AssetDefinition({"title": "Metadata", "media_type": pystac.MediaType.XML, "roles": ["metadata"]}), - "quality_control_report": AssetDefinition( - {"title": "Quality control report", "media_type": pystac.MediaType.PDF, "roles": ["metadata"]} - ), - "pixel_based_info_shp": AssetDefinition( - {"title": "Pixel based info shape format", "media_type": "application/octet-stream", "roles": ["metadata"]} - ), - "pixel_based_info_shx": AssetDefinition( - {"title": "Pixel based info shape index", "media_type": "application/octet-stream", "roles": ["metadata"]} - ), - "pixel_based_info_dbf": AssetDefinition( - {"title": "Pixel based info attribute", "media_type": "application/x-dbf", "roles": ["metadata"]} - ), - "pixel_based_info_prj": AssetDefinition( - { - "title": "Pixel based info projection description", - "media_type": pystac.MediaType.TEXT, - "roles": ["metadata"], - } - ), - "pixel_based_info_cpg": AssetDefinition( - {"title": "Pixel based info character encoding", "media_type": pystac.MediaType.TEXT, "roles": ["metadata"]} - ), - "compressed_dataset": AssetDefinition( - {"title": "Compressed building height raster", "media_type": "application/zip", "roles": ["data"]} - ), - } - - # add links - collection.links.append(CLMS_LICENSE) - collection.links.append(CLMS_CATALOG_LINK) - collection.links.append(CLMS_PARENT_LINK) - - collection.set_self_href("scripts/vabh/test_collection.json") - collection.save_object() - - # validate - validator = get_stac_validator("./schema/products/uabh.json") - error_msg = best_match(validator.iter_errors(collection.to_dict())) diff --git a/stacs/urban-atlas-building-height/urban-atlas-building-height.json b/stacs/urban-atlas-building-height/urban-atlas-building-height.json index 4b7125a..1c01afd 100644 --- a/stacs/urban-atlas-building-height/urban-atlas-building-height.json +++ b/stacs/urban-atlas-building-height/urban-atlas-building-height.json @@ -2,8 +2,8 @@ "type": "Collection", "stac_version": "1.0.0", "stac_extensions": [ - "https://stac-extensions.github.io/item-assets/v1.0.0/schema.json", - "https://stac-extensions.github.io/projection/v1.1.0/schema.json" + "https://stac-extensions.github.io/projection/v1.1.0/schema.json", + "https://stac-extensions.github.io/item-assets/v1.0.0/schema.json" ], "id": "urban-atlas-building-height", "title": "Urban Atlas Building Height 10m", @@ -15,7 +15,7 @@ "name": "Copernicus Land Monitoring Service", "description": "The Copernicus Land Monitoring Service provides geographical information on land cover and its changes, land use, ground motions, vegetation state, water cycle and Earth's surface energy variables to a broad range of users in Europe and across the World in the field of environmental terrestrial applications.", "roles": ["licensor", "host"], - "url": "https://land.copernicus.eu/en" + "url": "https://land.copernicus.eu" } ], "extent": { @@ -23,7 +23,7 @@ "bbox": [[-22.13, 35.07, 33.48, 64.38]] }, "temporal": { - "interval": [["2012-01-01T00:00:00.000Z", null]] + "interval": [["2012-01-01T00:00:00Z", null]] } }, "item_assets": { @@ -38,7 +38,7 @@ "roles": ["metadata"] }, "metadata": { - "title": "Metadata", + "title": "Building height metadata", "type": "application/xml", "roles": ["metadata"] }, From 37d94eaaae4902337a1f4a820dde94ce1e6467ef Mon Sep 17 00:00:00 2001 From: Sebastian Boeck Date: Tue, 7 May 2024 16:47:03 +0200 Subject: [PATCH 73/80] Adds logging, validation --- .gitignore | 3 +- scripts/clc/collection.py | 46 +++++++++- scripts/clc/item.py | 2 +- .../U2018_CLC2012_V2020_20u1.json | 92 +++++++++++++------ 4 files changed, 109 insertions(+), 34 deletions(-) diff --git a/.gitignore b/.gitignore index 1f65787..d61c51e 100644 --- a/.gitignore +++ b/.gitignore @@ -103,4 +103,5 @@ __pycache__/ clms-stac.code-workspace stac_tests/* -scripts/clc/dev.ipynb \ No newline at end of file +scripts/clc/dev.ipynb +stac_tests/corine-land-cover-raster/U2018_CLC2012_V2020_20u1/U2018_CLC2012_V2020_20u1.json diff --git a/scripts/clc/collection.py b/scripts/clc/collection.py index 24368d3..9928b6d 100644 --- a/scripts/clc/collection.py +++ b/scripts/clc/collection.py @@ -1,18 +1,24 @@ import os import re +import json +import logging import pystac import pystac.item import pystac.link from pystac.provider import ProviderRole from pystac.extensions.projection import ProjectionExtension - from pystac.extensions.item_assets import ItemAssetsExtension, AssetDefinition from datetime import datetime, UTC import rasterio.warp +#Taken 'as is' from other scripts +from jsonschema import Draft7Validator +from jsonschema.exceptions import best_match +from referencing import Registry, Resource + from .constants import ( COLLECTION_DESCRIPTION, COLLECTION_ID, @@ -32,6 +38,19 @@ from .item import create_item, get_img_paths, deconstruct_clc_name + +LOGGER = logging.getLogger(__name__) + + +#Taken 'as is' from other scripts +def get_stac_validator(product_schema: str) -> Draft7Validator: + with open(product_schema, encoding="utf-8") as f: + schema = json.load(f) + registry = Registry().with_resources( + [("http://example.com/schema.json", Resource.from_contents(schema))], + ) + return Draft7Validator({"$ref": "http://example.com/schema.json"}, registry=registry) + def proj_epsg_from_item_asset(item: pystac.Item) -> int: for asset_key in item.assets: asset = item.assets[asset_key].to_dict() @@ -54,7 +73,7 @@ def get_collection_asset_files(data_root: str) -> list[str]: return asset_files -def create_collection_asset(asset_file: str) -> pystac.Asset: +def create_collection_asset(asset_file: str) -> tuple[str, pystac.Asset]: filename_elements = deconstruct_clc_name(asset_file) id = filename_elements['id'] @@ -67,6 +86,7 @@ def create_collection_asset(asset_file: str) -> pystac.Asset: key = 'readme' asset = pystac.Asset(href=asset_file, title=COLLECTION_TITLE_MAP[key], media_type=COLLECTION_MEDIA_TYPE_MAP[key], roles=COLLECTION_ROLES_MAP[key]) + return id, asset @@ -100,10 +120,13 @@ def create_collection() -> pystac.Collection: collection.set_root(catalog) collection.set_parent(catalog) + collection.save_object() return collection + + def populate_collection(collection: pystac.Collection, data_root: str) -> pystac.Collection: img_paths = get_img_paths(data_root) @@ -118,6 +141,14 @@ def populate_collection(collection: pystac.Collection, data_root: str) -> pystac DOM_code = deconstruct_clc_name(img_path).get('DOM_code') href = os.path.join(WORKING_DIR, f"{STAC_DIR}/{COLLECTION_ID}/{item.id.removesuffix(f'_FR_{DOM_code}')}/{item.id}.json") item.set_self_href(href) + + validator = get_stac_validator("schema/products/clc.json") + error_msg = best_match(validator.iter_errors(item.to_dict())) + try: + assert error_msg is None, f"Failed to create {item.id} item. Reason: {error_msg}." + except AssertionError as error: + LOGGER.error(error) + item.save_object() asset_files = get_collection_asset_files(data_root) @@ -125,13 +156,20 @@ def populate_collection(collection: pystac.Collection, data_root: str) -> pystac for asset_file in asset_files: key, asset = create_collection_asset(asset_file) collection.assets |= {key: asset} - # if not key in collection.assets.keys(): - # collection.add_asset(key, asset) + + collection.make_all_asset_hrefs_relative() collection.update_extent_from_items() ProjectionExtension.add_to(collection) collection.summaries = pystac.Summaries({'proj:epsg': list(set(proj_epsg))}) + + try: + error_msg = best_match(validator.iter_errors(collection.to_dict())) + assert error_msg is None, f"Failed to create {collection.id} collection. Reason: {error_msg}." + except AssertionError as error: + LOGGER.error(error) + collection.save_object() return collection \ No newline at end of file diff --git a/scripts/clc/item.py b/scripts/clc/item.py index ceb0be9..3a9da96 100644 --- a/scripts/clc/item.py +++ b/scripts/clc/item.py @@ -17,7 +17,6 @@ import rasterio.warp from rasterio.warp import Resampling -# from .constants import * from .constants import ( DOM_MAP, @@ -33,6 +32,7 @@ ) + def deconstruct_clc_name(filename: str) -> dict[str]: filename_split = { 'dirname': os.path.dirname(filename), diff --git a/stac_tests/corine-land-cover-raster/U2018_CLC2012_V2020_20u1/U2018_CLC2012_V2020_20u1.json b/stac_tests/corine-land-cover-raster/U2018_CLC2012_V2020_20u1/U2018_CLC2012_V2020_20u1.json index 54f0a81..0852546 100644 --- a/stac_tests/corine-land-cover-raster/U2018_CLC2012_V2020_20u1/U2018_CLC2012_V2020_20u1.json +++ b/stac_tests/corine-land-cover-raster/U2018_CLC2012_V2020_20u1/U2018_CLC2012_V2020_20u1.json @@ -23,42 +23,61 @@ "coordinates": [ [ [ - 72.90613675900903, - 24.28417701147754 + 44.99485809829563, + 27.163269668344356 ], [ - 72.90613675900903, - 72.63376966542347 + 44.99485809829563, + 71.41326966834436 ], [ - -56.50514190170437, - 72.63376966542347 + -31.75514190170437, + 71.41326966834436 ], [ - -56.50514190170437, - 24.28417701147754 + -31.75514190170437, + 27.163269668344356 ], [ - 72.90613675900903, - 24.28417701147754 + 44.99485809829563, + 27.163269668344356 ] ] ] }, "links": [ { - "rel": "LICENSE", - "href": "https://land.copernicus.eu/en/data-policy" + "rel": "license", + "href": "https://land.copernicus.eu/en/data-policy", + "title": "Legal notice on the use of CLMS data" + }, + { + "rel": "root", + "href": "../../clms_catalog.json", + "type": "application/json", + "title": "CLMS Catalog" + }, + { + "rel": "parent", + "href": "../corine-land-cover-raster.json", + "type": "application/json", + "title": "CORINE Land Cover Raster" + }, + { + "rel": "collection", + "href": "../corine-land-cover-raster.json", + "type": "application/json", + "title": "CORINE Land Cover Raster" }, { "rel": "self", - "href": "x:/projects/ETC-DI/Task_18/clms-stac/stac_tests/corine-land-cover-raster/U2018_CLC2012_V2020_20u1/U2018_CLC2012_V2020_20u1.json", + "href": "X:/projects/ETC-DI/Task_18/clms-stac/stac_tests/corine-land-cover-raster/U2018_CLC2012_V2020_20u1/U2018_CLC2012_V2020_20u1.json", "type": "application/json" } ], "assets": { "U2018_CLC2012_V2020_20u1_tfw": { - "href": "X:/EO/u2018_clc2012_v2020_20u1_raster100m/DATA/U2018_CLC2012_V2020_20u1.tfw", + "href": "../CLC_100m/u2018_clc2012_v2020_20u1_raster100m/DATA/U2018_CLC2012_V2020_20u1.tfw", "type": "text/plain", "title": "World File Europe", "roles": [ @@ -66,7 +85,7 @@ ] }, "U2018_CLC2012_V2020_20u1_tif": { - "href": "X:/EO/u2018_clc2012_v2020_20u1_raster100m/DATA/U2018_CLC2012_V2020_20u1.tif", + "href": "../CLC_100m/u2018_clc2012_v2020_20u1_raster100m/DATA/U2018_CLC2012_V2020_20u1.tif", "type": "image/tiff; application=geotiff; profile=cloud-optimized", "title": "Single Band Land Classification Europe", "proj:epsg": 3035, @@ -100,7 +119,7 @@ ] }, "U2018_CLC2012_V2020_20u1_tif_aux_xml": { - "href": "X:/EO/u2018_clc2012_v2020_20u1_raster100m/DATA/U2018_CLC2012_V2020_20u1.tif.aux.xml", + "href": "../CLC_100m/u2018_clc2012_v2020_20u1_raster100m/DATA/U2018_CLC2012_V2020_20u1.tif.aux.xml", "type": "application/xml", "title": "TIFF Statistics Europe", "roles": [ @@ -108,7 +127,7 @@ ] }, "U2018_CLC2012_V2020_20u1_tif_ovr": { - "href": "X:/EO/u2018_clc2012_v2020_20u1_raster100m/DATA/U2018_CLC2012_V2020_20u1.tif.ovr", + "href": "../CLC_100m/u2018_clc2012_v2020_20u1_raster100m/DATA/U2018_CLC2012_V2020_20u1.tif.ovr", "type": "image/tiff; application=geotiff; profile=pyramid", "title": "Pyramid Europe", "roles": [ @@ -116,7 +135,7 @@ ] }, "U2018_CLC2012_V2020_20u1_tif_vat_cpg": { - "href": "X:/EO/u2018_clc2012_v2020_20u1_raster100m/DATA/U2018_CLC2012_V2020_20u1.tif.vat.cpg", + "href": "../CLC_100m/u2018_clc2012_v2020_20u1_raster100m/DATA/U2018_CLC2012_V2020_20u1.tif.vat.cpg", "type": "text/plain", "title": "Encoding Europe", "roles": [ @@ -124,7 +143,7 @@ ] }, "U2018_CLC2012_V2020_20u1_tif_vat_dbf": { - "href": "X:/EO/u2018_clc2012_v2020_20u1_raster100m/DATA/U2018_CLC2012_V2020_20u1.tif.vat.dbf", + "href": "../CLC_100m/u2018_clc2012_v2020_20u1_raster100m/DATA/U2018_CLC2012_V2020_20u1.tif.vat.dbf", "type": "application/dbf", "title": "Database Europe", "roles": [ @@ -132,15 +151,23 @@ ] }, "U2018_CLC2012_V2020_20u1_tif_xml": { - "href": "X:/EO/u2018_clc2012_v2020_20u1_raster100m/DATA/U2018_CLC2012_V2020_20u1.tif.xml", + "href": "../CLC_100m/u2018_clc2012_v2020_20u1_raster100m/DATA/U2018_CLC2012_V2020_20u1.tif.xml", "type": "application/xml", "title": "TIFF Metadata Europe", "roles": [ "metadata" ] }, + "readme_U2018_CLC2012_V2020_20u1_txt": { + "href": "../CLC_100m/u2018_clc2012_v2020_20u1_raster100m/Documents/readme_U2018_CLC2012_V2020_20u1.txt", + "type": "text/plain", + "title": "Description Europe", + "roles": [ + "metadata" + ] + }, "CLC2018_CLC2012_V2018_20_tif_lyr": { - "href": "X:/EO/u2018_clc2012_v2020_20u1_raster100m/Legend/CLC2018_CLC2012_V2018_20.tif.lyr", + "href": "../CLC_100m/u2018_clc2012_v2020_20u1_raster100m/Legend/CLC2018_CLC2012_V2018_20.tif.lyr", "type": "image/tiff; application=geotiff; profile=layer", "title": "Legend Layer Europe", "roles": [ @@ -148,7 +175,7 @@ ] }, "CLC2018_CLC2012_V2018_20_QGIS_txt": { - "href": "X:/EO/u2018_clc2012_v2020_20u1_raster100m/Legend/CLC2018_CLC2012_V2018_20_QGIS.txt", + "href": "../CLC_100m/u2018_clc2012_v2020_20u1_raster100m/Legend/CLC2018_CLC2012_V2018_20_QGIS.txt", "type": "text/plain", "title": "Legends Europe", "roles": [ @@ -156,21 +183,30 @@ ] }, "U2018_CLC2012_V2020_20u1_xml": { - "href": "X:/EO/u2018_clc2012_v2020_20u1_raster100m/Metadata/U2018_CLC2012_V2020_20u1.xml", + "href": "../CLC_100m/u2018_clc2012_v2020_20u1_raster100m/Metadata/U2018_CLC2012_V2020_20u1.xml", "type": "application/xml", "title": "Single Band Land Classification Metadata Europe", "roles": [ "metadata" ] + }, + "preview": { + "href": "https://sdi.eea.europa.eu/public/catalogue-graphic-overview/960998c1-1870-4e82-8051-6485205ebbac.png", + "type": "image/png", + "title": "Single Band Land Classification Thumbnail ", + "roles": [ + "thumbnail" + ] } }, "bbox": [ - -56.50514190170437, - 24.28417701147754, - 72.90613675900903, - 72.63376966542347 + -31.75514190170437, + 27.163269668344356, + 44.99485809829563, + 71.41326966834436 ], "stac_extensions": [ "https://stac-extensions.github.io/projection/v1.1.0/schema.json" - ] + ], + "collection": "corine-land-cover-raster" } \ No newline at end of file From a6e92c653abeea501c673b941700a8e056c0b1ee Mon Sep 17 00:00:00 2001 From: Sebastian Boeck Date: Tue, 7 May 2024 17:07:03 +0200 Subject: [PATCH 74/80] fixes item_asset creation in collection --- scripts/clc/collection.py | 14 +++++++------- scripts/clc/constants.py | 33 --------------------------------- 2 files changed, 7 insertions(+), 40 deletions(-) diff --git a/scripts/clc/collection.py b/scripts/clc/collection.py index 9928b6d..b2d0304 100644 --- a/scripts/clc/collection.py +++ b/scripts/clc/collection.py @@ -28,9 +28,9 @@ COLLECTION_TITLE_MAP, COLLECTION_MEDIA_TYPE_MAP, COLLECTION_ROLES_MAP, - COLLITAS_MEDIA_TYPE_MAP, - COLLITAS_ROLES_MAP, - COLLITAS_TITLE_MAP, + ITEM_TITLE_MAP, + ITEM_MEDIA_TYPE_MAP, + ITEM_ROLES_MAP, CLMS_LICENSE, WORKING_DIR, STAC_DIR @@ -108,10 +108,10 @@ def create_collection() -> pystac.Collection: item_assets = ItemAssetsExtension.ext(collection, add_if_missing=True) item_assets.item_assets = { - key: AssetDefinition({"title": COLLITAS_TITLE_MAP[key].format(label='').strip(), - "media_type": COLLITAS_MEDIA_TYPE_MAP[key], - "roles": COLLITAS_ROLES_MAP[key]}) - for key in COLLITAS_TITLE_MAP + f'clc_map_{key}': AssetDefinition({"title": ITEM_TITLE_MAP[key].format(label='').strip(), + "media_type": ITEM_MEDIA_TYPE_MAP[key], + "roles": ITEM_ROLES_MAP[key]}) + for key in ITEM_TITLE_MAP } collection.add_link(CLMS_LICENSE) diff --git a/scripts/clc/constants.py b/scripts/clc/constants.py index ec31aad..a2a7e14 100644 --- a/scripts/clc/constants.py +++ b/scripts/clc/constants.py @@ -40,39 +40,6 @@ 'readme': ['metadata'], } -COLLITAS_TITLE_MAP = { - 'clc_map': 'Corine Land Cover Map', - 'clc_map_statistics': 'Corine Land Cover Map Statistics', - 'clc_map_pyramid': 'Pyramid', - 'clc_map_encoding': 'Encoding', - 'clc_map_database': 'Database', - 'clc_map_database_metadata': 'Database Metadata', - 'clc_map_tif_metadata': 'TIFF Metadata', - 'clc_map_metadata': 'Corine Land Cover Map Metadata', -} - -COLLITAS_MEDIA_TYPE_MAP = { - 'clc_map': pystac.MediaType.COG, - 'clc_map_statistics': pystac.MediaType.XML, - 'clc_map_pyramid': 'image/tiff; application=geotiff; profile=pyramid', - 'clc_map_encoding': pystac.MediaType.TEXT, - 'clc_map_database': 'application/dbf', - 'clc_map_database_metadata': pystac.MediaType.TEXT, - 'clc_map_tif_metadata': 'image/tiff; application=geotiff; profile=layer', - 'clc_map_metadata': pystac.MediaType.XML, -} - -COLLITAS_ROLES_MAP = { - 'clc_map': ['data'], - 'clc_map_statistics': ['metadata'], - 'clc_map_pyramid': ['metadata'], - 'clc_map_encoding': ['metadata'], - 'clc_map_database': ['metadata'], - 'clc_map_database_metadata': ['metadata'], - 'clc_map_tif_metadata': ['metadata'], - 'clc_map_metadata': ['metadata'], -} - # Items CLMS_LICENSE = pystac.link.Link(rel=pystac.RelType.LICENSE, From 2a4858c75c13032ba2fcf288707afd453fa45441 Mon Sep 17 00:00:00 2001 From: Sebastian Boeck Date: Tue, 7 May 2024 17:14:27 +0200 Subject: [PATCH 75/80] adds logging --- scripts/clc/item.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/scripts/clc/item.py b/scripts/clc/item.py index 3a9da96..a393d03 100644 --- a/scripts/clc/item.py +++ b/scripts/clc/item.py @@ -1,6 +1,7 @@ import os import re +import logging import pystac import pystac.item import pystac.link @@ -31,7 +32,7 @@ COLLECTION_ID ) - +LOGGER = logging.getLogger(__name__) def deconstruct_clc_name(filename: str) -> dict[str]: filename_split = { @@ -168,8 +169,8 @@ def create_item(img_path: str, data_root: str) -> pystac.Item: key=key, asset=asset, ) - except KeyError as e: - print("An error occured:", e) + except KeyError as error: + LOGGER.error("An error occured:", error) # TODO: "Thumbnail" was originally put at collection level in the template, while it should perhaps be at item level? Individual previews should be added to each item key = 'preview' From f01a8f7b3e1a88db40539e50e6a975ea9a3be5b9 Mon Sep 17 00:00:00 2001 From: chorng Date: Tue, 7 May 2024 18:58:21 +0200 Subject: [PATCH 76/80] Refactor collection scripts * break create collection into small functions * fix typing of the create collection function * add ItemCreationError and CollectionCreationError to wrap all possible errors for logging purpose --- create_vpp_collection.py | 15 ++++- scripts/vpp/collection.py | 114 +++++++++++++++++++++++++------------- scripts/vpp/item.py | 69 ++++++++++++----------- 3 files changed, 124 insertions(+), 74 deletions(-) diff --git a/create_vpp_collection.py b/create_vpp_collection.py index 28779d3..9b8cd74 100644 --- a/create_vpp_collection.py +++ b/create_vpp_collection.py @@ -1,9 +1,18 @@ import logging +from glob import glob -from scripts.vpp.collection import create_collection +from scripts.vpp.collection import create_vpp_collection, get_stac_validator from scripts.vpp.constants import COLLECTION_ID, STAC_DIR, WORKING_DIR LOGGER = logging.getLogger(__name__) -if __name__ == "__main__": + + +def main(): logging.basicConfig(filename="create_vpp_collection.log") - create_collection(f"{WORKING_DIR}/{STAC_DIR}/{COLLECTION_ID}/**/VPP*.json") + item_list = glob(f"{WORKING_DIR}/{STAC_DIR}/{COLLECTION_ID}/**/*.json") + validator = get_stac_validator("schema/products/vpp.json") + create_vpp_collection(item_list, validator) + + +if __name__ == "__main__": + main() diff --git a/scripts/vpp/collection.py b/scripts/vpp/collection.py index 80640e0..859d776 100644 --- a/scripts/vpp/collection.py +++ b/scripts/vpp/collection.py @@ -3,7 +3,6 @@ import json import logging import os -from glob import glob import pystac import pystac.extensions @@ -12,6 +11,7 @@ from jsonschema.exceptions import best_match from pystac.extensions.item_assets import AssetDefinition, ItemAssetsExtension from pystac.extensions.projection import ProjectionExtension +from pystac.link import Link from referencing import Registry, Resource from .constants import ( @@ -31,6 +31,10 @@ LOGGER = logging.getLogger(__name__) +class CollectionCreationError(Exception): + pass + + def get_stac_validator(product_schema: str) -> Draft7Validator: with open(product_schema, encoding="utf-8") as f: schema = json.load(f) @@ -40,8 +44,8 @@ def get_stac_validator(product_schema: str) -> Draft7Validator: return Draft7Validator({"$ref": "http://example.com/schema.json"}, registry=registry) -def create_collection(item_list: list[str]) -> pystac.Collection: - collection = pystac.Collection( +def create_core_collection() -> pystac.Collection: + return pystac.Collection( id=COLLECTION_ID, description=COLLECTION_DESCRIPTION, extent=COLLECTION_EXTENT, @@ -50,54 +54,84 @@ def create_collection(item_list: list[str]) -> pystac.Collection: providers=[VPP_HOST_AND_LICENSOR, VPP_PRODUCER_AND_PROCESSOR], ) - # summaries + +def add_summaries_to_collection(collection: pystac.Collection, epsg_list: list[int]) -> None: summaries = ProjectionExtension.summaries(collection, add_if_missing=True) - summaries.epsg = [ - 32620, - 32621, - 32622, - 32625, - 32626, - 32627, - 32628, - 32629, - 32630, - 32631, - 32632, - 32633, - 32634, - 32635, - 32636, - 32637, - 32638, - 32738, - 32740, - ] - - # extensions + summaries.epsg = epsg_list + + +def add_item_assets_to_collection(collection: pystac.Collection, asset_title_map: dict[str, str]) -> None: item_assets = ItemAssetsExtension.ext(collection, add_if_missing=True) item_assets.item_assets = { - key: AssetDefinition({"title": TITLE_MAP[key], "media_type": pystac.MediaType.GEOTIFF, "roles": ["data"]}) - for key in TITLE_MAP + key: AssetDefinition({"title": asset_title_map[key], "media_type": pystac.MediaType.GEOTIFF, "roles": ["data"]}) + for key in asset_title_map } - # links - collection.links.append(CLMS_LICENSE) - # add items - items = glob(item_list) - for item in items: +def add_links_to_collection(collection: pystac.Collection, link_list: list[Link]) -> None: + for link in link_list: + collection.links.append(link) + + +def add_items_to_collection(collection: pystac.Collection, item_list: list[str]) -> None: + for item in item_list: stac_object = pystac.read_file(item) collection.add_item(stac_object, title=stac_object.id) - collection.set_self_href(os.path.join(WORKING_DIR, f"{STAC_DIR}/{collection.id}/{collection.id}.json")) - catalog = pystac.read_file(f"{WORKING_DIR}/{STAC_DIR}/clms_catalog.json") - collection.set_root(catalog) - collection.set_parent(catalog) - validator = get_stac_validator("schema/products/vpp.json") + +def create_collection(item_list: list[str]) -> pystac.Collection: + try: + collection = create_core_collection() + + # summaries + epsg_list = [ + 32620, + 32621, + 32622, + 32625, + 32626, + 32627, + 32628, + 32629, + 32630, + 32631, + 32632, + 32633, + 32634, + 32635, + 32636, + 32637, + 32638, + 32738, + 32740, + ] + add_summaries_to_collection(collection, epsg_list) + + # extensions + add_item_assets_to_collection(collection, TITLE_MAP) + + # links + link_list = [CLMS_LICENSE] + add_links_to_collection(collection, link_list) + + # add items + add_items_to_collection(collection, item_list) + + # add self, root, and parent links + collection.set_self_href(os.path.join(WORKING_DIR, f"{STAC_DIR}/{collection.id}/{collection.id}.json")) + catalog = pystac.read_file(f"{WORKING_DIR}/{STAC_DIR}/clms_catalog.json") + collection.set_root(catalog) + collection.set_parent(catalog) + except Exception as error: + raise CollectionCreationError(error) + return collection + + +def create_vpp_collection(item_list: list[str], validator: Draft7Validator) -> None: try: + collection = create_collection(item_list) error_msg = best_match(validator.iter_errors(collection.to_dict())) assert error_msg is None, f"Failed to create {collection.id} collection. Reason: {error_msg}." collection.save_object() - except AssertionError as error: + except (AssertionError, CollectionCreationError) as error: LOGGER.error(error) diff --git a/scripts/vpp/item.py b/scripts/vpp/item.py index 04604f8..140620d 100644 --- a/scripts/vpp/item.py +++ b/scripts/vpp/item.py @@ -9,14 +9,12 @@ import boto3 import pystac import rasterio as rio -from botocore.exceptions import BotoCoreError from botocore.paginate import PageIterator from jsonschema import Draft7Validator from jsonschema.exceptions import best_match from pystac.extensions.projection import ProjectionExtension from rasterio.coords import BoundingBox from rasterio.crs import CRS -from rasterio.errors import RasterioIOError from rasterio.warp import transform_bounds from referencing import Registry, Resource from shapely.geometry import Polygon, box, mapping @@ -38,6 +36,10 @@ LOGGER = logging.getLogger(__name__) +class ItemCreationError(Exception): + pass + + def create_product_list(start_year: int, end_year: int) -> list[str]: product_list = [] for year in range(start_year, end_year + 1): @@ -49,7 +51,7 @@ def create_product_list(start_year: int, end_year: int) -> list[str]: def create_page_iterator(aws_session: boto3.Session, bucket: str, prefix: str) -> PageIterator: client = aws_session.client("s3") paginator = client.get_paginator("list_objects_v2") - return paginator.paginate(Bucket=bucket, Prefix=prefix, Delimiter="-", MaxKeys=10) + return paginator.paginate(Bucket=bucket, Prefix=prefix, Delimiter="-") def read_metadata_from_s3(bucket: str, key: str, aws_session: boto3.Session) -> tuple[BoundingBox, CRS, int, int]: @@ -140,33 +142,38 @@ def add_assets_to_item(item: pystac.Item, asset_dict: dict[str, pystac.Asset]) - def create_item(aws_session: boto3.Session, bucket: str, tile: str) -> pystac.Item: - client = aws_session.client("s3") - parameters = client.list_objects(Bucket=bucket, Prefix=tile, Delimiter=".")["CommonPrefixes"] - asset_keys = [parameter["Prefix"] + "tif" for parameter in parameters] - _, tail = os.path.split(asset_keys[0]) - product_id = "_".join((tail[:23], tail[29:31])) - bounds, crs, height, width, created = read_metadata_from_s3(bucket, asset_keys[0], aws_session) - geom_wgs84 = get_geom_wgs84(bounds, crs) - description = get_description(product_id) - start_datetime, end_datetime = get_datetime(product_id) - - # core metadata - item = create_core_item(product_id, geom_wgs84, start_datetime, end_datetime, created, description, COLLECTION_ID) - - # common metadata - provider_list = [VPP_HOST_AND_LICENSOR, VPP_PRODUCER_AND_PROCESSOR] - add_providers_to_item(item, provider_list) - - # extensions - add_projection_extension_to_item(item, crs, bounds, height, width) - - # links - link_list = [CLMS_LICENSE, CLMS_CATALOG_LINK, ITEM_PARENT_LINK, COLLECTION_LINK] - add_links_to_item(item, link_list) - - # assets - assets = {os.path.split(key)[-1][:-4].lower(): create_asset(key) for key in asset_keys} - add_assets_to_item(item, assets) + try: + client = aws_session.client("s3") + parameters = client.list_objects(Bucket=bucket, Prefix=tile, Delimiter=".")["CommonPrefixes"] + asset_keys = [parameter["Prefix"] + "tif" for parameter in parameters] + _, tail = os.path.split(asset_keys[0]) + product_id = "_".join((tail[:23], tail[29:31])) + bounds, crs, height, width, created = read_metadata_from_s3(bucket, asset_keys[0], aws_session) + geom_wgs84 = get_geom_wgs84(bounds, crs) + description = get_description(product_id) + start_datetime, end_datetime = get_datetime(product_id) + + # core metadata + item = create_core_item( + product_id, geom_wgs84, start_datetime, end_datetime, created, description, COLLECTION_ID + ) + + # common metadata + provider_list = [VPP_HOST_AND_LICENSOR, VPP_PRODUCER_AND_PROCESSOR] + add_providers_to_item(item, provider_list) + + # extensions + add_projection_extension_to_item(item, crs, bounds, height, width) + + # links + link_list = [CLMS_LICENSE, CLMS_CATALOG_LINK, ITEM_PARENT_LINK, COLLECTION_LINK] + add_links_to_item(item, link_list) + + # assets + assets = {os.path.split(key)[-1][:-4].lower(): create_asset(key) for key in asset_keys} + add_assets_to_item(item, assets) + except Exception as error: + raise ItemCreationError(error) return item @@ -186,5 +193,5 @@ def create_vpp_item(aws_session: boto3.Session, bucket: str, validator: Draft7Va error_msg = best_match(validator.iter_errors(item.to_dict())) assert error_msg is None, f"Failed to create {item.id} item. Reason: {error_msg}." item.save_object() - except (AssertionError, BotoCoreError, RasterioIOError) as error: + except (AssertionError, ItemCreationError) as error: LOGGER.error(error) From 997bd2446f3eeed6fa9e4ca958d6236335501d91 Mon Sep 17 00:00:00 2001 From: Sebastian Boeck Date: Fri, 10 May 2024 09:30:01 +0200 Subject: [PATCH 77/80] clean up --- .gitignore | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/.gitignore b/.gitignore index d61c51e..6a1dc7e 100644 --- a/.gitignore +++ b/.gitignore @@ -99,9 +99,4 @@ target/ *.pot # Pycopy -__pycache__/ -clms-stac.code-workspace - -stac_tests/* -scripts/clc/dev.ipynb -stac_tests/corine-land-cover-raster/U2018_CLC2012_V2020_20u1/U2018_CLC2012_V2020_20u1.json +__pycache__/ \ No newline at end of file From 62cc262c95947d9ddf536f3116d96ff1bf6ccb1b Mon Sep 17 00:00:00 2001 From: Sebastian Boeck Date: Fri, 10 May 2024 09:31:32 +0200 Subject: [PATCH 78/80] del --- .../U2018_CLC2012_V2020_20u1.json | 212 ------------------ 1 file changed, 212 deletions(-) delete mode 100644 stac_tests/corine-land-cover-raster/U2018_CLC2012_V2020_20u1/U2018_CLC2012_V2020_20u1.json diff --git a/stac_tests/corine-land-cover-raster/U2018_CLC2012_V2020_20u1/U2018_CLC2012_V2020_20u1.json b/stac_tests/corine-land-cover-raster/U2018_CLC2012_V2020_20u1/U2018_CLC2012_V2020_20u1.json deleted file mode 100644 index 0852546..0000000 --- a/stac_tests/corine-land-cover-raster/U2018_CLC2012_V2020_20u1/U2018_CLC2012_V2020_20u1.json +++ /dev/null @@ -1,212 +0,0 @@ -{ - "type": "Feature", - "stac_version": "1.0.0", - "id": "U2018_CLC2012_V2020_20u1", - "properties": { - "description": "Corine Land Cover 2012 (CLC2012) is one of the Corine Land Cover (CLC) datasets produced within the frame the Copernicus Land Monitoring Service referring to land cover / land use status of year 2012. CLC service has a long-time heritage (formerly known as \"CORINE Land Cover Programme\"), coordinated by the European Environment Agency (EEA). It provides consistent and thematically detailed information on land cover and land cover changes across Europe. CLC datasets are based on the classification of satellite images produced by the national teams of the participating countries - the EEA members and cooperating countries (EEA39). National CLC inventories are then further integrated into a seamless land cover map of Europe. The resulting European database relies on standard methodology and nomenclature with following base parameters: 44 classes in the hierarchical 3-level CLC nomenclature; minimum mapping unit (MMU) for status layers is 25 hectares; minimum width of linear elements is 100 metres. Change layers have higher resolution, i.e. minimum mapping unit (MMU) is 5 hectares for Land Cover Changes (LCC), and the minimum width of linear elements is 100 metres. The CLC service delivers important data sets supporting the implementation of key priority areas of the Environment Action Programmes of the European Union as e.g. protecting ecosystems, halting the loss of biological diversity, tracking the impacts of climate change, monitoring urban land take, assessing developments in agriculture or dealing with water resources directives. CLC belongs to the Pan-European component of the Copernicus Land Monitoring Service (https://land.copernicus.eu/), part of the European Copernicus Programme coordinated by the European Environment Agency, providing environmental information from a combination of air- and space-based observation systems and in-situ monitoring. Additional information about CLC product description including mapping guides can be found at https://land.copernicus.eu/user-corner/technical-library/. CLC class descriptions can be found at https://land.copernicus.eu/user-corner/technical-library/corine-land-cover-nomenclature-guidelines/html/.", - "created": null, - "providers": { - "name": "Copernicus Land Monitoring Service", - "description": "The Copernicus Land Monitoring Service provides geographical information on land cover and its changes, land use, ground motions, vegetation state, water cycle and Earth's surface energy variables to a broad range of users in Europe and across the World in the field of environmental terrestrial applications.", - "roles": [ - "licensor", - "host" - ], - "url": "https://land.copernicus.eu" - }, - "start_datetime": "2012-01-01T00:00:00Z", - "end_datetime": "2012-12-31T00:00:00Z", - "datetime": null - }, - "geometry": { - "type": "Polygon", - "coordinates": [ - [ - [ - 44.99485809829563, - 27.163269668344356 - ], - [ - 44.99485809829563, - 71.41326966834436 - ], - [ - -31.75514190170437, - 71.41326966834436 - ], - [ - -31.75514190170437, - 27.163269668344356 - ], - [ - 44.99485809829563, - 27.163269668344356 - ] - ] - ] - }, - "links": [ - { - "rel": "license", - "href": "https://land.copernicus.eu/en/data-policy", - "title": "Legal notice on the use of CLMS data" - }, - { - "rel": "root", - "href": "../../clms_catalog.json", - "type": "application/json", - "title": "CLMS Catalog" - }, - { - "rel": "parent", - "href": "../corine-land-cover-raster.json", - "type": "application/json", - "title": "CORINE Land Cover Raster" - }, - { - "rel": "collection", - "href": "../corine-land-cover-raster.json", - "type": "application/json", - "title": "CORINE Land Cover Raster" - }, - { - "rel": "self", - "href": "X:/projects/ETC-DI/Task_18/clms-stac/stac_tests/corine-land-cover-raster/U2018_CLC2012_V2020_20u1/U2018_CLC2012_V2020_20u1.json", - "type": "application/json" - } - ], - "assets": { - "U2018_CLC2012_V2020_20u1_tfw": { - "href": "../CLC_100m/u2018_clc2012_v2020_20u1_raster100m/DATA/U2018_CLC2012_V2020_20u1.tfw", - "type": "text/plain", - "title": "World File Europe", - "roles": [ - "metadata" - ] - }, - "U2018_CLC2012_V2020_20u1_tif": { - "href": "../CLC_100m/u2018_clc2012_v2020_20u1_raster100m/DATA/U2018_CLC2012_V2020_20u1.tif", - "type": "image/tiff; application=geotiff; profile=cloud-optimized", - "title": "Single Band Land Classification Europe", - "proj:epsg": 3035, - "proj:bbox": [ - 900000.0, - 900000.0, - 7400000.0, - 5500000.0 - ], - "proj:shape": [ - 46000, - 65000 - ], - "proj:transform": [ - 100.0, - 0.0, - 900000.0, - 0.0, - -100.0, - 5500000.0, - 0.0, - 0.0, - 1.0, - 0.0, - 0.0, - 1.0 - ], - "roles": [ - "data", - "visual" - ] - }, - "U2018_CLC2012_V2020_20u1_tif_aux_xml": { - "href": "../CLC_100m/u2018_clc2012_v2020_20u1_raster100m/DATA/U2018_CLC2012_V2020_20u1.tif.aux.xml", - "type": "application/xml", - "title": "TIFF Statistics Europe", - "roles": [ - "metadata" - ] - }, - "U2018_CLC2012_V2020_20u1_tif_ovr": { - "href": "../CLC_100m/u2018_clc2012_v2020_20u1_raster100m/DATA/U2018_CLC2012_V2020_20u1.tif.ovr", - "type": "image/tiff; application=geotiff; profile=pyramid", - "title": "Pyramid Europe", - "roles": [ - "metadata" - ] - }, - "U2018_CLC2012_V2020_20u1_tif_vat_cpg": { - "href": "../CLC_100m/u2018_clc2012_v2020_20u1_raster100m/DATA/U2018_CLC2012_V2020_20u1.tif.vat.cpg", - "type": "text/plain", - "title": "Encoding Europe", - "roles": [ - "metadata" - ] - }, - "U2018_CLC2012_V2020_20u1_tif_vat_dbf": { - "href": "../CLC_100m/u2018_clc2012_v2020_20u1_raster100m/DATA/U2018_CLC2012_V2020_20u1.tif.vat.dbf", - "type": "application/dbf", - "title": "Database Europe", - "roles": [ - "metadata" - ] - }, - "U2018_CLC2012_V2020_20u1_tif_xml": { - "href": "../CLC_100m/u2018_clc2012_v2020_20u1_raster100m/DATA/U2018_CLC2012_V2020_20u1.tif.xml", - "type": "application/xml", - "title": "TIFF Metadata Europe", - "roles": [ - "metadata" - ] - }, - "readme_U2018_CLC2012_V2020_20u1_txt": { - "href": "../CLC_100m/u2018_clc2012_v2020_20u1_raster100m/Documents/readme_U2018_CLC2012_V2020_20u1.txt", - "type": "text/plain", - "title": "Description Europe", - "roles": [ - "metadata" - ] - }, - "CLC2018_CLC2012_V2018_20_tif_lyr": { - "href": "../CLC_100m/u2018_clc2012_v2020_20u1_raster100m/Legend/CLC2018_CLC2012_V2018_20.tif.lyr", - "type": "image/tiff; application=geotiff; profile=layer", - "title": "Legend Layer Europe", - "roles": [ - "metadata" - ] - }, - "CLC2018_CLC2012_V2018_20_QGIS_txt": { - "href": "../CLC_100m/u2018_clc2012_v2020_20u1_raster100m/Legend/CLC2018_CLC2012_V2018_20_QGIS.txt", - "type": "text/plain", - "title": "Legends Europe", - "roles": [ - "metadata" - ] - }, - "U2018_CLC2012_V2020_20u1_xml": { - "href": "../CLC_100m/u2018_clc2012_v2020_20u1_raster100m/Metadata/U2018_CLC2012_V2020_20u1.xml", - "type": "application/xml", - "title": "Single Band Land Classification Metadata Europe", - "roles": [ - "metadata" - ] - }, - "preview": { - "href": "https://sdi.eea.europa.eu/public/catalogue-graphic-overview/960998c1-1870-4e82-8051-6485205ebbac.png", - "type": "image/png", - "title": "Single Band Land Classification Thumbnail ", - "roles": [ - "thumbnail" - ] - } - }, - "bbox": [ - -31.75514190170437, - 27.163269668344356, - 44.99485809829563, - 71.41326966834436 - ], - "stac_extensions": [ - "https://stac-extensions.github.io/projection/v1.1.0/schema.json" - ], - "collection": "corine-land-cover-raster" -} \ No newline at end of file From 487d7004d37d35c001c29ba0186929f61978fa90 Mon Sep 17 00:00:00 2001 From: Sebastian Boeck Date: Fri, 10 May 2024 09:43:02 +0200 Subject: [PATCH 79/80] after pre-commit hooks --- .gitignore | 2 +- create_clc_collection.py | 3 +- scripts/clc/collection.py | 134 +++++++++++----------- scripts/clc/constants.py | 211 +++++++++++++++++----------------- scripts/clc/item.py | 231 ++++++++++++++++++++------------------ 5 files changed, 298 insertions(+), 283 deletions(-) diff --git a/.gitignore b/.gitignore index 6a1dc7e..4b381b7 100644 --- a/.gitignore +++ b/.gitignore @@ -99,4 +99,4 @@ target/ *.pot # Pycopy -__pycache__/ \ No newline at end of file +__pycache__/ diff --git a/create_clc_collection.py b/create_clc_collection.py index 47a2bcd..60e594b 100644 --- a/create_clc_collection.py +++ b/create_clc_collection.py @@ -4,6 +4,7 @@ LOGGER = logging.getLogger(__name__) + def main(): logging.basicConfig(filename="create_clc_collection.log") collection = create_collection() @@ -11,4 +12,4 @@ def main(): if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/scripts/clc/collection.py b/scripts/clc/collection.py index b2d0304..7a718e7 100644 --- a/scripts/clc/collection.py +++ b/scripts/clc/collection.py @@ -1,48 +1,41 @@ -import os -import re - import json import logging +import os +from datetime import UTC, datetime + import pystac import pystac.item import pystac.link -from pystac.provider import ProviderRole -from pystac.extensions.projection import ProjectionExtension -from pystac.extensions.item_assets import ItemAssetsExtension, AssetDefinition -from datetime import datetime, UTC - -import rasterio.warp - -#Taken 'as is' from other scripts +# Taken 'as is' from other scripts from jsonschema import Draft7Validator from jsonschema.exceptions import best_match +from pystac.extensions.item_assets import AssetDefinition, ItemAssetsExtension +from pystac.extensions.projection import ProjectionExtension from referencing import Registry, Resource from .constants import ( + CLMS_LICENSE, COLLECTION_DESCRIPTION, COLLECTION_ID, COLLECTION_KEYWORDS, - COLLECTION_TITLE, COLLECTION_LICENSE, - COLLECTION_TITLE_MAP, COLLECTION_MEDIA_TYPE_MAP, COLLECTION_ROLES_MAP, - ITEM_TITLE_MAP, + COLLECTION_TITLE, + COLLECTION_TITLE_MAP, ITEM_MEDIA_TYPE_MAP, ITEM_ROLES_MAP, - CLMS_LICENSE, + ITEM_TITLE_MAP, + STAC_DIR, WORKING_DIR, - STAC_DIR ) - -from .item import create_item, get_img_paths, deconstruct_clc_name - +from .item import create_item, deconstruct_clc_name, get_img_paths LOGGER = logging.getLogger(__name__) -#Taken 'as is' from other scripts +# Taken 'as is' from other scripts def get_stac_validator(product_schema: str) -> Draft7Validator: with open(product_schema, encoding="utf-8") as f: schema = json.load(f) @@ -51,66 +44,74 @@ def get_stac_validator(product_schema: str) -> Draft7Validator: ) return Draft7Validator({"$ref": "http://example.com/schema.json"}, registry=registry) + def proj_epsg_from_item_asset(item: pystac.Item) -> int: for asset_key in item.assets: asset = item.assets[asset_key].to_dict() - if 'proj:epsg' in asset.keys(): - return asset.get('proj:epsg') + if "proj:epsg" in asset.keys(): + return asset.get("proj:epsg") -def get_collection_asset_files(data_root: str) -> list[str]: +def get_collection_asset_files(data_root: str) -> list[str]: asset_files = [] - - for root, _, files in os.walk(data_root): + for root, _, files in os.walk(data_root): for file in files: - - if ((file.startswith('clc-country-coverage') and file.endswith('pdf')) or - file.startswith('clc-file-naming-convention') or - (file.startswith('readme') and file.endswith('raster.txt'))): - + if ( + (file.startswith("clc-country-coverage") and file.endswith("pdf")) + or file.startswith("clc-file-naming-convention") + or (file.startswith("readme") and file.endswith("raster.txt")) + ): asset_files.append(os.path.join(root, file)) return asset_files -def create_collection_asset(asset_file: str) -> tuple[str, pystac.Asset]: +def create_collection_asset(asset_file: str) -> tuple[str, pystac.Asset]: filename_elements = deconstruct_clc_name(asset_file) - id = filename_elements['id'] - - if id.startswith('clc-file-naming'): - key = 'clc_file_naming' - elif id.startswith('clc-country-coverage'): - key = 'clc_country_coverage' - elif id.startswith('readme'): - key = 'readme' - - asset = pystac.Asset(href=asset_file, title=COLLECTION_TITLE_MAP[key], media_type=COLLECTION_MEDIA_TYPE_MAP[key], roles=COLLECTION_ROLES_MAP[key]) - + id = filename_elements["id"] + + if id.startswith("clc-file-naming"): + key = "clc_file_naming" + elif id.startswith("clc-country-coverage"): + key = "clc_country_coverage" + elif id.startswith("readme"): + key = "readme" + + asset = pystac.Asset( + href=asset_file, + title=COLLECTION_TITLE_MAP[key], + media_type=COLLECTION_MEDIA_TYPE_MAP[key], + roles=COLLECTION_ROLES_MAP[key], + ) + return id, asset def create_collection() -> pystac.Collection: - sp_extent = pystac.SpatialExtent([None, None, None, None]) tmp_extent = pystac.TemporalExtent([datetime(1990, 1, 1, microsecond=0, tzinfo=UTC), None]) - extent = pystac.Extent(sp_extent, tmp_extent) - - collection = pystac.Collection(id=COLLECTION_ID, - description=COLLECTION_DESCRIPTION, - title=COLLECTION_TITLE, - extent=extent, - keywords=COLLECTION_KEYWORDS, - license=COLLECTION_LICENSE, - stac_extensions=[] - ) - + extent = pystac.Extent(sp_extent, tmp_extent) + + collection = pystac.Collection( + id=COLLECTION_ID, + description=COLLECTION_DESCRIPTION, + title=COLLECTION_TITLE, + extent=extent, + keywords=COLLECTION_KEYWORDS, + license=COLLECTION_LICENSE, + stac_extensions=[], + ) item_assets = ItemAssetsExtension.ext(collection, add_if_missing=True) item_assets.item_assets = { - f'clc_map_{key}': AssetDefinition({"title": ITEM_TITLE_MAP[key].format(label='').strip(), - "media_type": ITEM_MEDIA_TYPE_MAP[key], - "roles": ITEM_ROLES_MAP[key]}) + f"clc_map_{key}": AssetDefinition( + { + "title": ITEM_TITLE_MAP[key].format(label="").strip(), + "media_type": ITEM_MEDIA_TYPE_MAP[key], + "roles": ITEM_ROLES_MAP[key], + } + ) for key in ITEM_TITLE_MAP } @@ -126,7 +127,6 @@ def create_collection() -> pystac.Collection: return collection - def populate_collection(collection: pystac.Collection, data_root: str) -> pystac.Collection: img_paths = get_img_paths(data_root) @@ -138,10 +138,12 @@ def populate_collection(collection: pystac.Collection, data_root: str) -> pystac item_epsg = proj_epsg_from_item_asset(item) proj_epsg.append(item_epsg) - DOM_code = deconstruct_clc_name(img_path).get('DOM_code') - href = os.path.join(WORKING_DIR, f"{STAC_DIR}/{COLLECTION_ID}/{item.id.removesuffix(f'_FR_{DOM_code}')}/{item.id}.json") + DOM_code = deconstruct_clc_name(img_path).get("DOM_code") + href = os.path.join( + WORKING_DIR, f"{STAC_DIR}/{COLLECTION_ID}/{item.id.removesuffix(f'_FR_{DOM_code}')}/{item.id}.json" + ) item.set_self_href(href) - + validator = get_stac_validator("schema/products/clc.json") error_msg = best_match(validator.iter_errors(item.to_dict())) try: @@ -157,19 +159,17 @@ def populate_collection(collection: pystac.Collection, data_root: str) -> pystac key, asset = create_collection_asset(asset_file) collection.assets |= {key: asset} - - collection.make_all_asset_hrefs_relative() collection.update_extent_from_items() ProjectionExtension.add_to(collection) - collection.summaries = pystac.Summaries({'proj:epsg': list(set(proj_epsg))}) - + collection.summaries = pystac.Summaries({"proj:epsg": list(set(proj_epsg))}) + try: error_msg = best_match(validator.iter_errors(collection.to_dict())) assert error_msg is None, f"Failed to create {collection.id} collection. Reason: {error_msg}." except AssertionError as error: LOGGER.error(error) - + collection.save_object() - return collection \ No newline at end of file + return collection diff --git a/scripts/clc/constants.py b/scripts/clc/constants.py index a2a7e14..d309219 100644 --- a/scripts/clc/constants.py +++ b/scripts/clc/constants.py @@ -1,145 +1,148 @@ import os -from datetime import datetime, UTC import pystac from pystac.provider import ProviderRole - # os.chdir('x:\\projects\\ETC-DI\\Task_18\\clms-stac') WORKING_DIR = os.getcwd() -STAC_DIR = 'stac_tests' +STAC_DIR = "stac_tests" # Collection -COLLECTION_ID = 'corine-land-cover-raster' -COLLECTION_TITLE = 'CORINE Land Cover Raster' -COLLECTION_DESCRIPTION = ("The European Commission launched the CORINE (Coordination of Information on the Environment) " - "program in an effort to develop a standardized methodology for producing continent-scale land " - "cover, biotope, and air quality maps. The CORINE Land Cover (CLC) product offers a pan-European " - "land cover and land use inventory with 44 thematic classes, ranging from broad forested areas " - "to individual vineyards.") -COLLECTION_KEYWORDS = ["clms", "corine", "derived data", "land cover", "machine learning", "open data"] -COLLECTION_LICENSE = 'proprietary' +COLLECTION_ID = "corine-land-cover-raster" +COLLECTION_TITLE = "CORINE Land Cover Raster" +COLLECTION_DESCRIPTION = ( + "The European Commission launched the CORINE (Coordination of Information on the Environment) " + "program in an effort to develop a standardized methodology for producing continent-scale land " + "cover, biotope, and air quality maps. The CORINE Land Cover (CLC) product offers a pan-European " + "land cover and land use inventory with 44 thematic classes, ranging from broad forested areas " + "to individual vineyards." +) +COLLECTION_KEYWORDS = ["clms", "corine", "derived data", "land cover", "machine learning", "open data"] +COLLECTION_LICENSE = "proprietary" COLLECTION_TITLE_MAP = { - 'clc_country_coverage': 'Coverage', - 'clc_file_naming': 'Naming Convention Description', - 'readme': 'Description', + "clc_country_coverage": "Coverage", + "clc_file_naming": "Naming Convention Description", + "readme": "Description", } COLLECTION_MEDIA_TYPE_MAP = { - 'clc_country_coverage': pystac.MediaType.PDF, - 'clc_file_naming': pystac.MediaType.TEXT, - 'readme': pystac.MediaType.TEXT, + "clc_country_coverage": pystac.MediaType.PDF, + "clc_file_naming": pystac.MediaType.TEXT, + "readme": pystac.MediaType.TEXT, } COLLECTION_ROLES_MAP = { - 'clc_country_coverage': ['metadata'], - 'clc_file_naming': ['metadata'], - 'readme': ['metadata'], + "clc_country_coverage": ["metadata"], + "clc_file_naming": ["metadata"], + "readme": ["metadata"], } # Items -CLMS_LICENSE = pystac.link.Link(rel=pystac.RelType.LICENSE, - target="https://land.copernicus.eu/en/data-policy", - title='Legal notice on the use of CLMS data') +CLMS_LICENSE = pystac.link.Link( + rel=pystac.RelType.LICENSE, + target="https://land.copernicus.eu/en/data-policy", + title="Legal notice on the use of CLMS data", +) DOM_MAP = { - 'GLP': 'Guadeloupe', - 'GUF': 'French Guyana', - 'MTQ': 'Martinique', - 'MYT': 'Mayotte', - 'REU': 'Réunion', - '': 'Europe', + "GLP": "Guadeloupe", + "GUF": "French Guyana", + "MTQ": "Martinique", + "MYT": "Mayotte", + "REU": "Réunion", + "": "Europe", } ITEM_MEDIA_TYPE_MAP = { - 'tif': pystac.MediaType.COG, - 'tif_xml': pystac.MediaType.XML, - 'tif_aux_xml': pystac.MediaType.XML, - 'tif_ovr': 'image/tiff; application=geotiff; profile=pyramid', - 'tif_vat_cpg': pystac.MediaType.TEXT, - 'tif_vat_dbf': 'application/dbf', - 'legend_txt': pystac.MediaType.TEXT, - 'tif_lyr': 'image/tiff; application=geotiff; profile=layer', - 'tfw': pystac.MediaType.TEXT, - 'xml': pystac.MediaType.XML, - 'readme_txt': pystac.MediaType.TEXT, - 'preview': pystac.MediaType.PNG, + "tif": pystac.MediaType.COG, + "tif_xml": pystac.MediaType.XML, + "tif_aux_xml": pystac.MediaType.XML, + "tif_ovr": "image/tiff; application=geotiff; profile=pyramid", + "tif_vat_cpg": pystac.MediaType.TEXT, + "tif_vat_dbf": "application/dbf", + "legend_txt": pystac.MediaType.TEXT, + "tif_lyr": "image/tiff; application=geotiff; profile=layer", + "tfw": pystac.MediaType.TEXT, + "xml": pystac.MediaType.XML, + "readme_txt": pystac.MediaType.TEXT, + "preview": pystac.MediaType.PNG, } ITEM_ROLES_MAP = { - 'tif': ['data', 'visual'], - 'tif_xml': ['metadata'], - 'tif_aux_xml': ['metadata'], - 'tif_ovr': ['metadata'], - 'tif_vat_cpg': ['metadata'], - 'tif_vat_dbf': ['metadata'], - 'legend_txt': ['metadata'], - 'tif_lyr': ['metadata'], - 'tfw': ['metadata'], - 'xml': ['metadata'], - 'readme_txt': ['metadata'], - 'preview': ['thumbnail'], + "tif": ["data", "visual"], + "tif_xml": ["metadata"], + "tif_aux_xml": ["metadata"], + "tif_ovr": ["metadata"], + "tif_vat_cpg": ["metadata"], + "tif_vat_dbf": ["metadata"], + "legend_txt": ["metadata"], + "tif_lyr": ["metadata"], + "tfw": ["metadata"], + "xml": ["metadata"], + "readme_txt": ["metadata"], + "preview": ["thumbnail"], } ITEM_TITLE_MAP = { - 'tif': 'Single Band Land Classification {label}', - 'tif_xml': 'TIFF Metadata {label}', - 'tif_aux_xml': 'TIFF Statistics {label}', - 'tif_ovr': 'Pyramid {label}', - 'tif_vat_cpg': 'Encoding {label}', - 'tif_vat_dbf': 'Database {label}', - 'legend_txt': 'Legends {label}', - 'tif_lyr': 'Legend Layer {label}', - 'tfw': 'World File {label}', - 'xml': 'Single Band Land Classification Metadata {label}', - 'readme_txt': 'Description {label}', - 'preview': 'Single Band Land Classification Thumbnail {label}' + "tif": "Single Band Land Classification {label}", + "tif_xml": "TIFF Metadata {label}", + "tif_aux_xml": "TIFF Statistics {label}", + "tif_ovr": "Pyramid {label}", + "tif_vat_cpg": "Encoding {label}", + "tif_vat_dbf": "Database {label}", + "legend_txt": "Legends {label}", + "tif_lyr": "Legend Layer {label}", + "tfw": "World File {label}", + "xml": "Single Band Land Classification Metadata {label}", + "readme_txt": "Description {label}", + "preview": "Single Band Land Classification Thumbnail {label}", } CLC_PROVIDER = pystac.provider.Provider( - name='Copernicus Land Monitoring Service', - description=('The Copernicus Land Monitoring Service provides ' - 'geographical information on land cover and its ' - 'changes, land use, ground motions, vegetation state, ' - 'water cycle and Earth\'s surface energy variables to ' - 'a broad range of users in Europe and across the World ' - 'in the field of environmental terrestrial applications.'), + name="Copernicus Land Monitoring Service", + description=( + "The Copernicus Land Monitoring Service provides " + "geographical information on land cover and its " + "changes, land use, ground motions, vegetation state, " + "water cycle and Earth's surface energy variables to " + "a broad range of users in Europe and across the World " + "in the field of environmental terrestrial applications." + ), roles=[ProviderRole.LICENSOR, ProviderRole.HOST], - url='https://land.copernicus.eu' + url="https://land.copernicus.eu", ) -ITEM_DESCRIPTION = ('Corine Land Cover {year} (CLC{year}) is one of the Corine Land Cover (CLC) ' - 'datasets produced within the frame the Copernicus Land Monitoring Service ' - 'referring to land cover / land use status of year {year}. ' - 'CLC service has a long-time heritage (formerly known as \"CORINE Land Cover Programme\"), ' - 'coordinated by the European Environment Agency (EEA). It provides consistent ' - 'and thematically detailed information on land cover and land cover changes across Europe. ' - 'CLC datasets are based on the classification of satellite images produced by the national ' - 'teams of the participating countries - the EEA members and cooperating countries (EEA39). ' - 'National CLC inventories are then further integrated into a seamless land cover map of Europe. ' - 'The resulting European database relies on standard methodology and nomenclature with following ' - 'base parameters: 44 classes in the hierarchical 3-level CLC nomenclature; ' - 'minimum mapping unit (MMU) for status layers is 25 hectares; ' - 'minimum width of linear elements is 100 metres. ' - 'Change layers have higher resolution, i.e. minimum mapping unit (MMU) is 5 hectares ' - 'for Land Cover Changes (LCC), and the minimum width of linear elements is 100 metres. ' - 'The CLC service delivers important data sets supporting the implementation of key priority ' - 'areas of the Environment Action Programmes of the European Union as e.g. protecting ecosystems, ' - 'halting the loss of biological diversity, tracking the impacts of climate change, ' - 'monitoring urban land take, assessing developments in agriculture or dealing with ' - 'water resources directives. CLC belongs to the Pan-European component of the ' - 'Copernicus Land Monitoring Service (https://land.copernicus.eu/), part of the ' - 'European Copernicus Programme coordinated by the European Environment Agency, ' - 'providing environmental information from a combination of air- and space-based observation ' - 'systems and in-situ monitoring. Additional information about CLC product description including ' - 'mapping guides can be found at https://land.copernicus.eu/user-corner/technical-library/. ' - 'CLC class descriptions can be found at ' - 'https://land.copernicus.eu/user-corner/technical-library/corine-land-cover-nomenclature-guidelines/html/.') - - - +ITEM_DESCRIPTION = ( + "Corine Land Cover {year} (CLC{year}) is one of the Corine Land Cover (CLC) " + "datasets produced within the frame the Copernicus Land Monitoring Service " + "referring to land cover / land use status of year {year}. " + 'CLC service has a long-time heritage (formerly known as "CORINE Land Cover Programme"), ' + "coordinated by the European Environment Agency (EEA). It provides consistent " + "and thematically detailed information on land cover and land cover changes across Europe. " + "CLC datasets are based on the classification of satellite images produced by the national " + "teams of the participating countries - the EEA members and cooperating countries (EEA39). " + "National CLC inventories are then further integrated into a seamless land cover map of Europe. " + "The resulting European database relies on standard methodology and nomenclature with following " + "base parameters: 44 classes in the hierarchical 3-level CLC nomenclature; " + "minimum mapping unit (MMU) for status layers is 25 hectares; " + "minimum width of linear elements is 100 metres. " + "Change layers have higher resolution, i.e. minimum mapping unit (MMU) is 5 hectares " + "for Land Cover Changes (LCC), and the minimum width of linear elements is 100 metres. " + "The CLC service delivers important data sets supporting the implementation of key priority " + "areas of the Environment Action Programmes of the European Union as e.g. protecting ecosystems, " + "halting the loss of biological diversity, tracking the impacts of climate change, " + "monitoring urban land take, assessing developments in agriculture or dealing with " + "water resources directives. CLC belongs to the Pan-European component of the " + "Copernicus Land Monitoring Service (https://land.copernicus.eu/), part of the " + "European Copernicus Programme coordinated by the European Environment Agency, " + "providing environmental information from a combination of air- and space-based observation " + "systems and in-situ monitoring. Additional information about CLC product description including " + "mapping guides can be found at https://land.copernicus.eu/user-corner/technical-library/. " + "CLC class descriptions can be found at " + "https://land.copernicus.eu/user-corner/technical-library/corine-land-cover-nomenclature-guidelines/html/." +) diff --git a/scripts/clc/item.py b/scripts/clc/item.py index a393d03..516aa07 100644 --- a/scripts/clc/item.py +++ b/scripts/clc/item.py @@ -1,170 +1,176 @@ +import logging import os import re +from datetime import UTC, datetime -import logging import pystac import pystac.item import pystac.link -from pystac.provider import ProviderRole -from pystac.extensions.projection import ProjectionExtension -from pystac.extensions.item_assets import AssetDefinition - -from pyproj import Transformer -from shapely.geometry import GeometryCollection, box, shape, mapping - -from datetime import datetime, UTC - import rasterio as rio import rasterio.warp -from rasterio.warp import Resampling - +from pystac.extensions.projection import ProjectionExtension +from shapely.geometry import box, mapping from .constants import ( + CLC_PROVIDER, + CLMS_LICENSE, + COLLECTION_ID, DOM_MAP, - ITEM_TITLE_MAP, + ITEM_DESCRIPTION, ITEM_MEDIA_TYPE_MAP, ITEM_ROLES_MAP, - ITEM_DESCRIPTION, - CLC_PROVIDER, - CLMS_LICENSE, - WORKING_DIR, + ITEM_TITLE_MAP, STAC_DIR, - COLLECTION_ID + WORKING_DIR, ) LOGGER = logging.getLogger(__name__) + def deconstruct_clc_name(filename: str) -> dict[str]: - filename_split = { - 'dirname': os.path.dirname(filename), - 'basename': os.path.basename(filename) - } - p = re.compile('^(?P[A-Z0-9a-z_-]*)\\.(?P.*)$') - m = p.search(filename_split['basename']) - + filename_split = {"dirname": os.path.dirname(filename), "basename": os.path.basename(filename)} + p = re.compile("^(?P[A-Z0-9a-z_-]*)\\.(?P.*)$") + m = p.search(filename_split["basename"]) + if m: - filename_split |= m.groupdict() - - p = re.compile(("U(?P[0-9]{4})_" - "(?PCLC|CHA)(?P[0-9]{4})_" - "V(?P[0-9]{4})_(?P[0-9a-z]*)" - "_?(?P[A-Z]*)?" - "_?(?P[A-Z]*)?")) - m = p.search(filename_split['id']) - + filename_split |= m.groupdict() + + p = re.compile( + "U(?P[0-9]{4})_" + "(?PCLC|CHA)(?P[0-9]{4})_" + "V(?P[0-9]{4})_(?P[0-9a-z]*)" + "_?(?P[A-Z]*)?" + "_?(?P[A-Z]*)?" + ) + m = p.search(filename_split["id"]) + if m: filename_split |= m.groupdict() - + return filename_split def create_item_asset(asset_file: str, DOM_code: str) -> pystac.Asset: filename_elements = deconstruct_clc_name(asset_file) - id = filename_elements['id'] - suffix = filename_elements['suffix'].replace('.', '_') - - if id.startswith('readme'): - key = 'readme_' + suffix - elif id.endswith('QGIS'): - key = 'legend_' + suffix + id = filename_elements["id"] + suffix = filename_elements["suffix"].replace(".", "_") + + if id.startswith("readme"): + key = "readme_" + suffix + elif id.endswith("QGIS"): + key = "legend_" + suffix else: key = suffix label = DOM_MAP[DOM_code] - - asset = pystac.Asset(href=asset_file, title=ITEM_TITLE_MAP[key].format(label=label), media_type=ITEM_MEDIA_TYPE_MAP[key], roles=ITEM_ROLES_MAP[key]) + + asset = pystac.Asset( + href=asset_file, + title=ITEM_TITLE_MAP[key].format(label=label), + media_type=ITEM_MEDIA_TYPE_MAP[key], + roles=ITEM_ROLES_MAP[key], + ) return f"{filename_elements['id']}_{suffix}", asset -def get_img_paths(data_root: str) -> list[str]: - img_paths=[] + +def get_img_paths(data_root: str) -> list[str]: + img_paths = [] for root, _, files in os.walk(data_root): - if root.endswith(('DATA', 'French_DOMs')): + if root.endswith(("DATA", "French_DOMs")): for file in files: - if file.endswith('.tif'): + if file.endswith(".tif"): img_paths.append(os.path.join(root, file)) return img_paths def get_item_asset_files(data_root: str, img_path: str) -> list[str]: - clc_name_elements = deconstruct_clc_name(img_path) - id = clc_name_elements['id'] - dom_code = clc_name_elements['DOM_code'] + id = clc_name_elements["id"] + dom_code = clc_name_elements["DOM_code"] asset_files = [] - - for root, _, files in os.walk(data_root): - if not dom_code and 'French_DOMs' in root: - continue - if dom_code and 'Legend' in root and not 'French_DOMs' in root: + for root, _, files in os.walk(data_root): + if not dom_code and "French_DOMs" in root: continue - - if not 'U{update_campaign}_{theme}{reference_year}_V{release_year}'.format(**clc_name_elements).lower() in root: + + if dom_code and "Legend" in root and not "French_DOMs" in root: continue - - for file in files: - if (file.startswith(id + '.') or - file.endswith(f'{dom_code}.tif.lyr') or - file.endswith('QGIS.txt',) or - file == f'readme_{id}.txt'): + if not "U{update_campaign}_{theme}{reference_year}_V{release_year}".format(**clc_name_elements).lower() in root: + continue + for file in files: + if ( + file.startswith(id + ".") + or file.endswith(f"{dom_code}.tif.lyr") + or file.endswith( + "QGIS.txt", + ) + or file == f"readme_{id}.txt" + ): asset_files.append(os.path.join(root, file)) return asset_files - + + def project_bbox(src: rio.io.DatasetReader, dst_crs: rio.CRS = rio.CRS.from_epsg(4326)) -> tuple[float]: bbox = rio.warp.transform_bounds(src.crs, dst_crs, *src.bounds) - return(bbox) - -def project_data_window_bbox(src: rio.io.DatasetReader, dst_crs: rio.CRS = rio.CRS.from_epsg(4326), dst_resolution: tuple = (0.25, 0.25)) -> tuple[float]: - data, transform = rio.warp.reproject(source=src.read(), - src_transform=src.transform, - src_crs=src.crs, - dst_crs=dst_crs, - dst_nodata=src.nodata, - dst_resolution=dst_resolution, - resampling=rio.warp.Resampling.max) - - data_window = rio.windows.get_data_window(data, nodata=src.nodata) - bbox = rio.windows.bounds(data_window, transform=transform) - return bbox + return bbox + + +def project_data_window_bbox( + src: rio.io.DatasetReader, dst_crs: rio.CRS = rio.CRS.from_epsg(4326), dst_resolution: tuple = (0.25, 0.25) +) -> tuple[float]: + data, transform = rio.warp.reproject( + source=src.read(), + src_transform=src.transform, + src_crs=src.crs, + dst_crs=dst_crs, + dst_nodata=src.nodata, + dst_resolution=dst_resolution, + resampling=rio.warp.Resampling.max, + ) + + data_window = rio.windows.get_data_window(data, nodata=src.nodata) + bbox = rio.windows.bounds(data_window, transform=transform) + return bbox + def create_item(img_path: str, data_root: str) -> pystac.Item: clc_name_elements = deconstruct_clc_name(img_path) asset_files = get_item_asset_files(data_root, img_path) - asset_files = [f for f in asset_files if not f.endswith('aux')] - year = clc_name_elements.get('reference_year') - props = {'description': ITEM_DESCRIPTION.format(year=year), - 'created': None, - 'providers': CLC_PROVIDER.to_dict(), + asset_files = [f for f in asset_files if not f.endswith("aux")] + year = clc_name_elements.get("reference_year") + props = { + "description": ITEM_DESCRIPTION.format(year=year), + "created": None, + "providers": CLC_PROVIDER.to_dict(), } with rio.open(img_path) as img: - - if clc_name_elements['DOM_code']: + if clc_name_elements["DOM_code"]: bbox = project_bbox(img) else: bbox = project_data_window_bbox(img) params = { - 'id': clc_name_elements.get('id'), - 'bbox': bbox, - 'geometry': mapping(box(*bbox)), - 'datetime': None, - 'start_datetime': datetime(int(year), 1, 1, microsecond=0, tzinfo=UTC), - 'end_datetime': datetime(int(year), 12, 31, microsecond=0, tzinfo=UTC), - 'properties': props, + "id": clc_name_elements.get("id"), + "bbox": bbox, + "geometry": mapping(box(*bbox)), + "datetime": None, + "start_datetime": datetime(int(year), 1, 1, microsecond=0, tzinfo=UTC), + "end_datetime": datetime(int(year), 12, 31, microsecond=0, tzinfo=UTC), + "properties": props, } item = pystac.Item(**params) - + for asset_file in asset_files: try: - key, asset = create_item_asset(asset_file, DOM_code=clc_name_elements.get('DOM_code')) + key, asset = create_item_asset(asset_file, DOM_code=clc_name_elements.get("DOM_code")) item.add_asset( key=key, asset=asset, @@ -173,37 +179,42 @@ def create_item(img_path: str, data_root: str) -> pystac.Item: LOGGER.error("An error occured:", error) # TODO: "Thumbnail" was originally put at collection level in the template, while it should perhaps be at item level? Individual previews should be added to each item - key = 'preview' + key = "preview" asset = pystac.Asset( - href='https://sdi.eea.europa.eu/public/catalogue-graphic-overview/960998c1-1870-4e82-8051-6485205ebbac.png', - title=ITEM_TITLE_MAP['preview'].format(label=clc_name_elements['DOM_code']), + href="https://sdi.eea.europa.eu/public/catalogue-graphic-overview/960998c1-1870-4e82-8051-6485205ebbac.png", + title=ITEM_TITLE_MAP["preview"].format(label=clc_name_elements["DOM_code"]), media_type=ITEM_MEDIA_TYPE_MAP[key], - roles=ITEM_ROLES_MAP[key] + roles=ITEM_ROLES_MAP[key], ) item.add_asset(key=key, asset=asset) - proj_ext = ProjectionExtension.ext(item.assets[os.path.basename(img_path).replace('.', '_')], add_if_missing=True) - proj_ext.apply(epsg=rio.crs.CRS(img.crs).to_epsg(), - bbox=img.bounds, - shape=[_ for _ in img.shape], - transform=[_ for _ in img.transform] + [0.0, 0.0, 1.0], - ) + proj_ext = ProjectionExtension.ext(item.assets[os.path.basename(img_path).replace(".", "_")], add_if_missing=True) + proj_ext.apply( + epsg=rio.crs.CRS(img.crs).to_epsg(), + bbox=img.bounds, + shape=[_ for _ in img.shape], + transform=[_ for _ in img.transform] + [0.0, 0.0, 1.0], + ) CLMS_CATALOG_LINK = pystac.link.Link( - rel=pystac.RelType.ROOT, target=pystac.STACObject.from_file(os.path.join(WORKING_DIR, f"{STAC_DIR}/clms_catalog.json")) + rel=pystac.RelType.ROOT, + target=pystac.STACObject.from_file(os.path.join(WORKING_DIR, f"{STAC_DIR}/clms_catalog.json")), ) COLLECTION_LINK = pystac.link.Link( rel=pystac.RelType.COLLECTION, - target=pystac.STACObject.from_file(os.path.join(WORKING_DIR, f"{STAC_DIR}/{COLLECTION_ID}/{COLLECTION_ID}.json")), + target=pystac.STACObject.from_file( + os.path.join(WORKING_DIR, f"{STAC_DIR}/{COLLECTION_ID}/{COLLECTION_ID}.json") + ), ) ITEM_PARENT_LINK = pystac.link.Link( rel=pystac.RelType.PARENT, - target=pystac.STACObject.from_file(os.path.join(WORKING_DIR, f"{STAC_DIR}/{COLLECTION_ID}/{COLLECTION_ID}.json")), + target=pystac.STACObject.from_file( + os.path.join(WORKING_DIR, f"{STAC_DIR}/{COLLECTION_ID}/{COLLECTION_ID}.json") + ), ) - + links = [CLMS_LICENSE, CLMS_CATALOG_LINK, ITEM_PARENT_LINK, COLLECTION_LINK] item.add_links(links) return item - From 927443e30e4ad1e5fa5a5d95f6047be6f6cd8b1c Mon Sep 17 00:00:00 2001 From: Sebastian Boeck Date: Tue, 14 May 2024 12:04:04 +0200 Subject: [PATCH 80/80] sorts out ruff errors --- scripts/clc/collection.py | 18 ++++++++------ scripts/clc/item.py | 51 ++++++++++++++++++++------------------- 2 files changed, 36 insertions(+), 33 deletions(-) diff --git a/scripts/clc/collection.py b/scripts/clc/collection.py index 7a718e7..4b456b7 100644 --- a/scripts/clc/collection.py +++ b/scripts/clc/collection.py @@ -48,9 +48,11 @@ def get_stac_validator(product_schema: str) -> Draft7Validator: def proj_epsg_from_item_asset(item: pystac.Item) -> int: for asset_key in item.assets: asset = item.assets[asset_key].to_dict() - if "proj:epsg" in asset.keys(): + if "proj:epsg" in asset: return asset.get("proj:epsg") + return None + def get_collection_asset_files(data_root: str) -> list[str]: asset_files = [] @@ -69,13 +71,13 @@ def get_collection_asset_files(data_root: str) -> list[str]: def create_collection_asset(asset_file: str) -> tuple[str, pystac.Asset]: filename_elements = deconstruct_clc_name(asset_file) - id = filename_elements["id"] + clc_id = filename_elements["id"] - if id.startswith("clc-file-naming"): + if clc_id.startswith("clc-file-naming"): key = "clc_file_naming" - elif id.startswith("clc-country-coverage"): + elif clc_id.startswith("clc-country-coverage"): key = "clc_country_coverage" - elif id.startswith("readme"): + elif clc_id.startswith("readme"): key = "readme" asset = pystac.Asset( @@ -85,7 +87,7 @@ def create_collection_asset(asset_file: str) -> tuple[str, pystac.Asset]: roles=COLLECTION_ROLES_MAP[key], ) - return id, asset + return clc_id, asset def create_collection() -> pystac.Collection: @@ -138,9 +140,9 @@ def populate_collection(collection: pystac.Collection, data_root: str) -> pystac item_epsg = proj_epsg_from_item_asset(item) proj_epsg.append(item_epsg) - DOM_code = deconstruct_clc_name(img_path).get("DOM_code") + dom_code = deconstruct_clc_name(img_path).get("DOM_code") href = os.path.join( - WORKING_DIR, f"{STAC_DIR}/{COLLECTION_ID}/{item.id.removesuffix(f'_FR_{DOM_code}')}/{item.id}.json" + WORKING_DIR, f"{STAC_DIR}/{COLLECTION_ID}/{item.id.removesuffix(f'_FR_{dom_code}')}/{item.id}.json" ) item.set_self_href(href) diff --git a/scripts/clc/item.py b/scripts/clc/item.py index 516aa07..c510520 100644 --- a/scripts/clc/item.py +++ b/scripts/clc/item.py @@ -50,9 +50,9 @@ def deconstruct_clc_name(filename: str) -> dict[str]: return filename_split -def create_item_asset(asset_file: str, DOM_code: str) -> pystac.Asset: +def create_item_asset(asset_file: str, dom_code: str) -> pystac.Asset: filename_elements = deconstruct_clc_name(asset_file) - id = filename_elements["id"] + suffix = filename_elements["suffix"].replace(".", "_") if id.startswith("readme"): @@ -62,7 +62,7 @@ def create_item_asset(asset_file: str, DOM_code: str) -> pystac.Asset: else: key = suffix - label = DOM_MAP[DOM_code] + label = DOM_MAP[dom_code] asset = pystac.Asset( href=asset_file, @@ -86,7 +86,7 @@ def get_img_paths(data_root: str) -> list[str]: def get_item_asset_files(data_root: str, img_path: str) -> list[str]: clc_name_elements = deconstruct_clc_name(img_path) - id = clc_name_elements["id"] + clc_id = clc_name_elements["id"] dom_code = clc_name_elements["DOM_code"] asset_files = [] @@ -95,33 +95,34 @@ def get_item_asset_files(data_root: str, img_path: str) -> list[str]: if not dom_code and "French_DOMs" in root: continue - if dom_code and "Legend" in root and not "French_DOMs" in root: + if dom_code and "Legend" in root and "French_DOMs" not in root: continue - if not "U{update_campaign}_{theme}{reference_year}_V{release_year}".format(**clc_name_elements).lower() in root: + if "U{update_campaign}_{theme}{reference_year}_V{release_year}".format(**clc_name_elements).lower() not in root: continue for file in files: if ( - file.startswith(id + ".") - or file.endswith(f"{dom_code}.tif.lyr") + file.startswith(f"{clc_id}.") or file.endswith( - "QGIS.txt", + ( + f"{dom_code}.tif.lyr", + "QGIS.txt", + ) ) - or file == f"readme_{id}.txt" + or file == f"readme_{clc_id}.txt" ): asset_files.append(os.path.join(root, file)) return asset_files -def project_bbox(src: rio.io.DatasetReader, dst_crs: rio.CRS = rio.CRS.from_epsg(4326)) -> tuple[float]: - bbox = rio.warp.transform_bounds(src.crs, dst_crs, *src.bounds) - return bbox +def project_bbox(src: rio.io.DatasetReader, dst_crs: rio.CRS) -> tuple[float]: + return rio.warp.transform_bounds(src.crs, dst_crs, *src.bounds) def project_data_window_bbox( - src: rio.io.DatasetReader, dst_crs: rio.CRS = rio.CRS.from_epsg(4326), dst_resolution: tuple = (0.25, 0.25) + src: rio.io.DatasetReader, dst_crs: rio.CRS, dst_resolution: tuple = (0.25, 0.25) ) -> tuple[float]: data, transform = rio.warp.reproject( source=src.read(), @@ -134,8 +135,7 @@ def project_data_window_bbox( ) data_window = rio.windows.get_data_window(data, nodata=src.nodata) - bbox = rio.windows.bounds(data_window, transform=transform) - return bbox + return rio.windows.bounds(data_window, transform=transform) def create_item(img_path: str, data_root: str) -> pystac.Item: @@ -152,9 +152,9 @@ def create_item(img_path: str, data_root: str) -> pystac.Item: with rio.open(img_path) as img: if clc_name_elements["DOM_code"]: - bbox = project_bbox(img) + bbox = project_bbox(img, dst_crs=rio.CRS.from_epsg(4326)) else: - bbox = project_data_window_bbox(img) + bbox = project_data_window_bbox(img, dst_crs=rio.CRS.from_epsg(4326)) params = { "id": clc_name_elements.get("id"), @@ -178,7 +178,8 @@ def create_item(img_path: str, data_root: str) -> pystac.Item: except KeyError as error: LOGGER.error("An error occured:", error) - # TODO: "Thumbnail" was originally put at collection level in the template, while it should perhaps be at item level? Individual previews should be added to each item + # TODO: "Thumbnail" was originally put at collection level in the template, + # while it should perhaps be at item level? Individual previews should be added to each item key = "preview" asset = pystac.Asset( href="https://sdi.eea.europa.eu/public/catalogue-graphic-overview/960998c1-1870-4e82-8051-6485205ebbac.png", @@ -193,28 +194,28 @@ def create_item(img_path: str, data_root: str) -> pystac.Item: proj_ext.apply( epsg=rio.crs.CRS(img.crs).to_epsg(), bbox=img.bounds, - shape=[_ for _ in img.shape], - transform=[_ for _ in img.transform] + [0.0, 0.0, 1.0], + shape=list(img.shape), + transform=[*list(img.transform), 0.0, 0.0, 1.0], ) - CLMS_CATALOG_LINK = pystac.link.Link( + clms_catalog_link = pystac.link.Link( rel=pystac.RelType.ROOT, target=pystac.STACObject.from_file(os.path.join(WORKING_DIR, f"{STAC_DIR}/clms_catalog.json")), ) - COLLECTION_LINK = pystac.link.Link( + collection_link = pystac.link.Link( rel=pystac.RelType.COLLECTION, target=pystac.STACObject.from_file( os.path.join(WORKING_DIR, f"{STAC_DIR}/{COLLECTION_ID}/{COLLECTION_ID}.json") ), ) - ITEM_PARENT_LINK = pystac.link.Link( + item_parent_link = pystac.link.Link( rel=pystac.RelType.PARENT, target=pystac.STACObject.from_file( os.path.join(WORKING_DIR, f"{STAC_DIR}/{COLLECTION_ID}/{COLLECTION_ID}.json") ), ) - links = [CLMS_LICENSE, CLMS_CATALOG_LINK, ITEM_PARENT_LINK, COLLECTION_LINK] + links = [CLMS_LICENSE, clms_catalog_link, item_parent_link, collection_link] item.add_links(links) return item