-
Notifications
You must be signed in to change notification settings - Fork 405
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
147 changed files
with
809 additions
and
14 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,10 +1,10 @@ | ||
# datasets | ||
h5py==3.12.1 | ||
h5py==3.13.0 | ||
laspy==2.5.4 | ||
netcdf4==1.7.2 | ||
opencv-python==4.11.0.86 | ||
pandas[parquet]==2.2.3 | ||
pycocotools==2.0.8 | ||
scikit-image==0.25.1 | ||
scikit-image==0.25.2 | ||
scipy==1.15.2 | ||
xarray==2024.11.0 | ||
xarray==2025.1.2 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Binary file not shown.
Binary file not shown.
Binary file added
BIN
+28.5 KB
...GRDH_1SDV_20170613T165043_33UUP_61_39/S1A_IW_GRDH_1SDV_20170613T165043_33UUP_61_39_VH.tif
Binary file not shown.
Binary file added
BIN
+28.5 KB
...GRDH_1SDV_20170613T165043_33UUP_61_39/S1A_IW_GRDH_1SDV_20170613T165043_33UUP_61_39_VV.tif
Binary file not shown.
Binary file added
BIN
+28.5 KB
...GRDH_1SDV_20170614T165154_32TQT_71_84/S1A_IW_GRDH_1SDV_20170614T165154_32TQT_71_84_VH.tif
Binary file not shown.
Binary file added
BIN
+28.5 KB
...GRDH_1SDV_20170614T165154_32TQT_71_84/S1A_IW_GRDH_1SDV_20170614T165154_32TQT_71_84_VV.tif
Binary file not shown.
Binary file added
BIN
+28.5 KB
...GRDH_1SDV_20170615T170156_32TNS_77_12/S1A_IW_GRDH_1SDV_20170615T170156_32TNS_77_12_VH.tif
Binary file not shown.
Binary file added
BIN
+28.5 KB
...GRDH_1SDV_20170615T170156_32TNS_77_12/S1A_IW_GRDH_1SDV_20170615T170156_32TNS_77_12_VV.tif
Binary file not shown.
Binary file added
BIN
+28.5 KB
...GRDH_1SDV_20170618T165722_32TQR_92_45/S1A_IW_GRDH_1SDV_20170618T165722_32TQR_92_45_VH.tif
Binary file not shown.
Binary file added
BIN
+28.5 KB
...GRDH_1SDV_20170618T165722_32TQR_92_45/S1A_IW_GRDH_1SDV_20170618T165722_32TQR_92_45_VV.tif
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file added
BIN
+28.5 KB
...101031_N9999_R022_T33UUP_26_57/S2A_MSIL2A_20170613T101031_N9999_R022_T33UUP_26_57_B01.tif
Binary file not shown.
Binary file added
BIN
+28.5 KB
...101031_N9999_R022_T33UUP_26_57/S2A_MSIL2A_20170613T101031_N9999_R022_T33UUP_26_57_B02.tif
Binary file not shown.
Binary file added
BIN
+28.5 KB
...101031_N9999_R022_T33UUP_26_57/S2A_MSIL2A_20170613T101031_N9999_R022_T33UUP_26_57_B03.tif
Binary file not shown.
Binary file added
BIN
+28.5 KB
...101031_N9999_R022_T33UUP_26_57/S2A_MSIL2A_20170613T101031_N9999_R022_T33UUP_26_57_B04.tif
Binary file not shown.
Binary file added
BIN
+28.5 KB
...101031_N9999_R022_T33UUP_26_57/S2A_MSIL2A_20170613T101031_N9999_R022_T33UUP_26_57_B05.tif
Binary file not shown.
Binary file added
BIN
+28.5 KB
...101031_N9999_R022_T33UUP_26_57/S2A_MSIL2A_20170613T101031_N9999_R022_T33UUP_26_57_B06.tif
Binary file not shown.
Binary file added
BIN
+28.5 KB
...101031_N9999_R022_T33UUP_26_57/S2A_MSIL2A_20170613T101031_N9999_R022_T33UUP_26_57_B07.tif
Binary file not shown.
Binary file added
BIN
+28.5 KB
...101031_N9999_R022_T33UUP_26_57/S2A_MSIL2A_20170613T101031_N9999_R022_T33UUP_26_57_B08.tif
Binary file not shown.
Binary file added
BIN
+28.5 KB
...101031_N9999_R022_T33UUP_26_57/S2A_MSIL2A_20170613T101031_N9999_R022_T33UUP_26_57_B09.tif
Binary file not shown.
Binary file added
BIN
+28.5 KB
...101031_N9999_R022_T33UUP_26_57/S2A_MSIL2A_20170613T101031_N9999_R022_T33UUP_26_57_B11.tif
Binary file not shown.
Binary file added
BIN
+28.5 KB
...101031_N9999_R022_T33UUP_26_57/S2A_MSIL2A_20170613T101031_N9999_R022_T33UUP_26_57_B12.tif
Binary file not shown.
Binary file added
BIN
+28.5 KB
...101031_N9999_R022_T33UUP_26_57/S2A_MSIL2A_20170613T101031_N9999_R022_T33UUP_26_57_B8A.tif
Binary file not shown.
Binary file added
BIN
+28.5 KB
...102021_N9999_R122_T32TQT_45_38/S2A_MSIL2A_20170614T102021_N9999_R122_T32TQT_45_38_B01.tif
Binary file not shown.
Binary file added
BIN
+28.5 KB
...102021_N9999_R122_T32TQT_45_38/S2A_MSIL2A_20170614T102021_N9999_R122_T32TQT_45_38_B02.tif
Binary file not shown.
Binary file added
BIN
+28.5 KB
...102021_N9999_R122_T32TQT_45_38/S2A_MSIL2A_20170614T102021_N9999_R122_T32TQT_45_38_B03.tif
Binary file not shown.
Binary file added
BIN
+28.5 KB
...102021_N9999_R122_T32TQT_45_38/S2A_MSIL2A_20170614T102021_N9999_R122_T32TQT_45_38_B04.tif
Binary file not shown.
Binary file added
BIN
+28.5 KB
...102021_N9999_R122_T32TQT_45_38/S2A_MSIL2A_20170614T102021_N9999_R122_T32TQT_45_38_B05.tif
Binary file not shown.
Binary file added
BIN
+28.5 KB
...102021_N9999_R122_T32TQT_45_38/S2A_MSIL2A_20170614T102021_N9999_R122_T32TQT_45_38_B06.tif
Binary file not shown.
Binary file added
BIN
+28.5 KB
...102021_N9999_R122_T32TQT_45_38/S2A_MSIL2A_20170614T102021_N9999_R122_T32TQT_45_38_B07.tif
Binary file not shown.
Binary file added
BIN
+28.5 KB
...102021_N9999_R122_T32TQT_45_38/S2A_MSIL2A_20170614T102021_N9999_R122_T32TQT_45_38_B08.tif
Binary file not shown.
Binary file added
BIN
+28.5 KB
...102021_N9999_R122_T32TQT_45_38/S2A_MSIL2A_20170614T102021_N9999_R122_T32TQT_45_38_B09.tif
Binary file not shown.
Binary file added
BIN
+28.5 KB
...102021_N9999_R122_T32TQT_45_38/S2A_MSIL2A_20170614T102021_N9999_R122_T32TQT_45_38_B11.tif
Binary file not shown.
Binary file added
BIN
+28.5 KB
...102021_N9999_R122_T32TQT_45_38/S2A_MSIL2A_20170614T102021_N9999_R122_T32TQT_45_38_B12.tif
Binary file not shown.
Binary file added
BIN
+28.5 KB
...102021_N9999_R122_T32TQT_45_38/S2A_MSIL2A_20170614T102021_N9999_R122_T32TQT_45_38_B8A.tif
Binary file not shown.
Binary file added
BIN
+28.5 KB
...101021_N9999_R022_T32TQR_89_34/S2A_MSIL2A_20170618T101021_N9999_R022_T32TQR_89_34_B01.tif
Binary file not shown.
Binary file added
BIN
+28.5 KB
...101021_N9999_R022_T32TQR_89_34/S2A_MSIL2A_20170618T101021_N9999_R022_T32TQR_89_34_B02.tif
Binary file not shown.
Binary file added
BIN
+28.5 KB
...101021_N9999_R022_T32TQR_89_34/S2A_MSIL2A_20170618T101021_N9999_R022_T32TQR_89_34_B03.tif
Binary file not shown.
Binary file added
BIN
+28.5 KB
...101021_N9999_R022_T32TQR_89_34/S2A_MSIL2A_20170618T101021_N9999_R022_T32TQR_89_34_B04.tif
Binary file not shown.
Binary file added
BIN
+28.5 KB
...101021_N9999_R022_T32TQR_89_34/S2A_MSIL2A_20170618T101021_N9999_R022_T32TQR_89_34_B05.tif
Binary file not shown.
Binary file added
BIN
+28.5 KB
...101021_N9999_R022_T32TQR_89_34/S2A_MSIL2A_20170618T101021_N9999_R022_T32TQR_89_34_B06.tif
Binary file not shown.
Binary file added
BIN
+28.5 KB
...101021_N9999_R022_T32TQR_89_34/S2A_MSIL2A_20170618T101021_N9999_R022_T32TQR_89_34_B07.tif
Binary file not shown.
Binary file added
BIN
+28.5 KB
...101021_N9999_R022_T32TQR_89_34/S2A_MSIL2A_20170618T101021_N9999_R022_T32TQR_89_34_B08.tif
Binary file not shown.
Binary file added
BIN
+28.5 KB
...101021_N9999_R022_T32TQR_89_34/S2A_MSIL2A_20170618T101021_N9999_R022_T32TQR_89_34_B09.tif
Binary file not shown.
Binary file added
BIN
+28.5 KB
...101021_N9999_R022_T32TQR_89_34/S2A_MSIL2A_20170618T101021_N9999_R022_T32TQR_89_34_B11.tif
Binary file not shown.
Binary file added
BIN
+28.5 KB
...101021_N9999_R022_T32TQR_89_34/S2A_MSIL2A_20170618T101021_N9999_R022_T32TQR_89_34_B12.tif
Binary file not shown.
Binary file added
BIN
+28.5 KB
...101021_N9999_R022_T32TQR_89_34/S2A_MSIL2A_20170618T101021_N9999_R022_T32TQR_89_34_B8A.tif
Binary file not shown.
Binary file added
BIN
+28.5 KB
...102019_N9999_R122_T32TNS_45_23/S2B_MSIL2A_20170615T102019_N9999_R122_T32TNS_45_23_B01.tif
Binary file not shown.
Binary file added
BIN
+28.5 KB
...102019_N9999_R122_T32TNS_45_23/S2B_MSIL2A_20170615T102019_N9999_R122_T32TNS_45_23_B02.tif
Binary file not shown.
Binary file added
BIN
+28.5 KB
...102019_N9999_R122_T32TNS_45_23/S2B_MSIL2A_20170615T102019_N9999_R122_T32TNS_45_23_B03.tif
Binary file not shown.
Binary file added
BIN
+28.5 KB
...102019_N9999_R122_T32TNS_45_23/S2B_MSIL2A_20170615T102019_N9999_R122_T32TNS_45_23_B04.tif
Binary file not shown.
Binary file added
BIN
+28.5 KB
...102019_N9999_R122_T32TNS_45_23/S2B_MSIL2A_20170615T102019_N9999_R122_T32TNS_45_23_B05.tif
Binary file not shown.
Binary file added
BIN
+28.5 KB
...102019_N9999_R122_T32TNS_45_23/S2B_MSIL2A_20170615T102019_N9999_R122_T32TNS_45_23_B06.tif
Binary file not shown.
Binary file added
BIN
+28.5 KB
...102019_N9999_R122_T32TNS_45_23/S2B_MSIL2A_20170615T102019_N9999_R122_T32TNS_45_23_B07.tif
Binary file not shown.
Binary file added
BIN
+28.5 KB
...102019_N9999_R122_T32TNS_45_23/S2B_MSIL2A_20170615T102019_N9999_R122_T32TNS_45_23_B08.tif
Binary file not shown.
Binary file added
BIN
+28.5 KB
...102019_N9999_R122_T32TNS_45_23/S2B_MSIL2A_20170615T102019_N9999_R122_T32TNS_45_23_B09.tif
Binary file not shown.
Binary file added
BIN
+28.5 KB
...102019_N9999_R122_T32TNS_45_23/S2B_MSIL2A_20170615T102019_N9999_R122_T32TNS_45_23_B11.tif
Binary file not shown.
Binary file added
BIN
+28.5 KB
...102019_N9999_R122_T32TNS_45_23/S2B_MSIL2A_20170615T102019_N9999_R122_T32TNS_45_23_B12.tif
Binary file not shown.
Binary file added
BIN
+28.5 KB
...102019_N9999_R122_T32TNS_45_23/S2B_MSIL2A_20170615T102019_N9999_R122_T32TNS_45_23_B8A.tif
Binary file not shown.
Binary file not shown.
Binary file added
BIN
+28.5 KB
...99_R022_T33UUP_26_57/S2A_MSIL2A_20170613T101031_N9999_R022_T33UUP_26_57_reference_map.tif
Binary file not shown.
Binary file added
BIN
+28.5 KB
...99_R122_T32TQT_45_38/S2A_MSIL2A_20170614T102021_N9999_R122_T32TQT_45_38_reference_map.tif
Binary file not shown.
Binary file added
BIN
+28.5 KB
...99_R022_T32TQR_89_34/S2A_MSIL2A_20170618T101021_N9999_R022_T32TQR_89_34_reference_map.tif
Binary file not shown.
Binary file added
BIN
+28.5 KB
...99_R122_T32TNS_45_23/S2B_MSIL2A_20170615T102019_N9999_R122_T32TNS_45_23_reference_map.tif
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,238 @@ | ||
#!/usr/bin/env python3 | ||
|
||
# Copyright (c) Microsoft Corporation. All rights reserved. | ||
# Licensed under the MIT License. | ||
|
||
import hashlib | ||
import os | ||
import shutil | ||
from pathlib import Path | ||
|
||
import numpy as np | ||
import pandas as pd | ||
import rasterio | ||
|
||
# Constants | ||
IMG_SIZE = 120 | ||
ROOT_DIR = '.' | ||
CHUNK_SIZE = 2**12 | ||
|
||
# Sample patch definitions | ||
SAMPLE_PATCHES = [ | ||
{ | ||
's2_name': 'S2A_MSIL2A_20170613T101031_N9999_R022_T33UUP_26_57', | ||
's2_base': 'S2A_MSIL2A_20170613T101031_N9999_R022_T33UUP', | ||
's1_name': 'S1A_IW_GRDH_1SDV_20170613T165043_33UUP_61_39', | ||
's1_base': 'S1A_IW_GRDH_1SDV_20170613T165043', | ||
'split': 'train', | ||
'labels': [ | ||
'Urban fabric', | ||
'Industrial or commercial units', | ||
'Complex cultivation patterns', | ||
], | ||
}, | ||
{ | ||
's2_name': 'S2A_MSIL2A_20170614T102021_N9999_R122_T32TQT_45_38', | ||
's2_base': 'S2A_MSIL2A_20170614T102021_N9999_R122_T32TQT', | ||
's1_name': 'S1A_IW_GRDH_1SDV_20170614T165154_32TQT_71_84', | ||
's1_base': 'S1A_IW_GRDH_1SDV_20170614T165154', | ||
'split': 'train', | ||
'labels': [ | ||
'Broad-leaved forest', | ||
'Mixed forest', | ||
'Transitional woodland, shrub', | ||
], | ||
}, | ||
{ | ||
's2_name': 'S2B_MSIL2A_20170615T102019_N9999_R122_T32TNS_45_23', | ||
's2_base': 'S2B_MSIL2A_20170615T102019_N9999_R122_T32TNS', | ||
's1_name': 'S1A_IW_GRDH_1SDV_20170615T170156_32TNS_77_12', | ||
's1_base': 'S1A_IW_GRDH_1SDV_20170615T170156', | ||
'split': 'val', | ||
'labels': ['Arable land', 'Pastures', 'Inland waters'], | ||
}, | ||
{ | ||
's2_name': 'S2A_MSIL2A_20170618T101021_N9999_R022_T32TQR_89_34', | ||
's2_base': 'S2A_MSIL2A_20170618T101021_N9999_R022_T32TQR', | ||
's1_name': 'S1A_IW_GRDH_1SDV_20170618T165722_32TQR_92_45', | ||
's1_base': 'S1A_IW_GRDH_1SDV_20170618T165722', | ||
'split': 'test', | ||
'labels': [ | ||
'Coniferous forest', | ||
'Natural grassland and sparsely vegetated areas', | ||
], | ||
}, | ||
] | ||
|
||
LABEL_TO_CLC = { | ||
'Urban fabric': 111, | ||
'Industrial or commercial units': 121, | ||
'Arable land': 211, | ||
'Permanent crops': 221, | ||
'Pastures': 231, | ||
'Complex cultivation patterns': 242, | ||
'Land principally occupied by agriculture, with significant areas of natural vegetation': 243, | ||
'Agro-forestry areas': 244, | ||
'Broad-leaved forest': 311, | ||
'Coniferous forest': 312, | ||
'Mixed forest': 313, | ||
'Natural grassland and sparsely vegetated areas': 321, | ||
'Moors, heathland and sclerophyllous vegetation': 322, | ||
'Transitional woodland, shrub': 324, | ||
'Beaches, dunes, sands': 331, | ||
'Inland wetlands': 411, | ||
'Coastal wetlands': 421, | ||
'Inland waters': 511, | ||
'Marine waters': 523, | ||
} | ||
|
||
S1_BANDS = ['VV', 'VH'] | ||
S2_BANDS = [ | ||
'B01', | ||
'B02', | ||
'B03', | ||
'B04', | ||
'B05', | ||
'B06', | ||
'B07', | ||
'B08', | ||
'B8A', | ||
'B09', | ||
'B11', | ||
'B12', | ||
] | ||
|
||
|
||
def create_directory_structure() -> None: | ||
"""Create the base directory structure""" | ||
|
||
for dir_name in ['BigEarthNet-S1', 'BigEarthNet-S2', 'Reference_Maps']: | ||
if os.path.exists(os.path.join(ROOT_DIR, dir_name)): | ||
shutil.rmtree(os.path.join(ROOT_DIR, dir_name)) | ||
Path(os.path.join(ROOT_DIR, dir_name)).mkdir(parents=True, exist_ok=True) | ||
|
||
|
||
def create_dummy_image( | ||
path: str, shape: tuple[int, int], dtype: str, labels: list[str] | None = None | ||
) -> None: | ||
"""Create a dummy GeoTIFF file""" | ||
if dtype == 's1': | ||
data = np.random.randint(-25, 0, shape).astype(np.int16) | ||
elif dtype == 's2': | ||
data = np.random.randint(0, 10000, shape).astype(np.int16) | ||
else: # reference map | ||
clc_codes = [LABEL_TO_CLC[label] for label in labels] | ||
data = np.random.choice(clc_codes, size=shape).astype(np.uint16) | ||
|
||
with rasterio.open( | ||
path, | ||
'w', | ||
driver='GTiff', | ||
height=shape[0], | ||
width=shape[1], | ||
count=1, | ||
dtype=data.dtype, | ||
crs='+proj=utm +zone=32 +datum=WGS84 +units=m +no_defs', | ||
transform=rasterio.transform.from_origin(0, 0, 10, 10), | ||
) as dst: | ||
dst.write(data, 1) | ||
|
||
|
||
def generate_sample(patch_info: dict) -> None: | ||
"""Generate a complete sample with S1, S2 and reference data""" | ||
# Create S1 data | ||
s1_dir = os.path.join( | ||
ROOT_DIR, 'BigEarthNet-S1', patch_info['s1_base'], patch_info['s1_name'] | ||
) | ||
os.makedirs(s1_dir, exist_ok=True) | ||
|
||
for band in S1_BANDS: | ||
path = os.path.join(s1_dir, f'{patch_info["s1_name"]}_{band}.tif') | ||
create_dummy_image(path, (IMG_SIZE, IMG_SIZE), 's1') | ||
|
||
# Create S2 data | ||
s2_dir = os.path.join( | ||
ROOT_DIR, 'BigEarthNet-S2', patch_info['s2_base'], patch_info['s2_name'] | ||
) | ||
os.makedirs(s2_dir, exist_ok=True) | ||
|
||
for band in S2_BANDS: | ||
path = os.path.join(s2_dir, f'{patch_info["s2_name"]}_{band}.tif') | ||
create_dummy_image(path, (IMG_SIZE, IMG_SIZE), 's2') | ||
|
||
# Create reference map | ||
ref_dir = os.path.join( | ||
ROOT_DIR, 'Reference_Maps', patch_info['s2_base'], patch_info['s2_name'] | ||
) | ||
os.makedirs(ref_dir, exist_ok=True) | ||
|
||
path = os.path.join(ref_dir, f'{patch_info["s2_name"]}_reference_map.tif') | ||
create_dummy_image( | ||
path, (IMG_SIZE, IMG_SIZE), 'reference', labels=patch_info['labels'] | ||
) | ||
|
||
|
||
def create_metadata() -> None: | ||
"""Create metadata parquet file""" | ||
records = [] | ||
|
||
for patch in SAMPLE_PATCHES: | ||
records.append( | ||
{ | ||
'patch_id': patch['s2_name'], | ||
's1_name': patch['s1_name'], | ||
'split': patch['split'], | ||
'labels': patch['labels'], | ||
} | ||
) | ||
|
||
df = pd.DataFrame.from_records(records) | ||
df.to_parquet(os.path.join(ROOT_DIR, 'metadata.parquet')) | ||
|
||
|
||
def main() -> None: | ||
create_directory_structure() | ||
|
||
for patch_info in SAMPLE_PATCHES: | ||
generate_sample(patch_info) | ||
|
||
create_metadata() | ||
|
||
for directory in ['BigEarthNet-S1', 'BigEarthNet-S2', 'Reference_Maps']: | ||
shutil.make_archive(directory, 'gztar', '.', directory) | ||
tar_path = f'{directory}.tar.gz' | ||
|
||
split_paths = [] | ||
if directory.startswith('BigEarthNet-'): | ||
with open(tar_path, 'rb') as f: | ||
content = f.read() | ||
file_size = len(content) | ||
midpoint = file_size // 2 | ||
splits = [content[:midpoint], content[midpoint:]] | ||
suffixes = ['aa', 'ab'] | ||
for suf, split_data in zip(suffixes, splits): | ||
split_name = f'{directory}.tar.gz{suf}' | ||
with open(split_name, 'wb') as g: | ||
g.write(split_data) | ||
split_paths.append(split_name) | ||
|
||
elif directory == 'Reference_Maps': | ||
# For Reference_Maps, create only one split with suffix "aa" | ||
split_name = f'{directory}.tar.gzaa' | ||
with open(tar_path, 'rb') as f: | ||
with open(split_name, 'wb') as g: | ||
g.write(f.read()) | ||
split_paths.append(split_name) | ||
|
||
os.remove(tar_path) | ||
|
||
for path in split_paths: | ||
hash_md5 = hashlib.md5() | ||
with open(path, 'rb') as f: | ||
for chunk in iter(lambda: f.read(4096), b''): | ||
hash_md5.update(chunk) | ||
print(path, hash_md5.hexdigest()) | ||
|
||
|
||
if __name__ == '__main__': | ||
main() |
Binary file not shown.
Oops, something went wrong.