Skip to content

Commit

Permalink
Merge branch 'main' into datasets/glaciers_alps
Browse files Browse the repository at this point in the history
  • Loading branch information
dcodrut authored Jan 13, 2025
2 parents 5d7b27a + 68e0cfe commit d5413cb
Show file tree
Hide file tree
Showing 71 changed files with 757 additions and 96 deletions.
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
repos:
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.8.0
rev: v0.9.1
hooks:
- id: ruff
types_or:
Expand Down
5 changes: 5 additions & 0 deletions docs/api/datasets.rst
Original file line number Diff line number Diff line change
Expand Up @@ -358,6 +358,11 @@ MapInWild

.. autoclass:: MapInWild

MDAS
^^^^

.. autoclass:: MDAS

Million-AID
^^^^^^^^^^^

Expand Down
1 change: 1 addition & 0 deletions docs/api/datasets/non_geo_datasets.csv
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ Dataset,Task,Source,License,# Samples,# Classes,Size (px),Resolution (m),Bands
`LEVIR-CD+`_,CD,Google Earth,-,985,2,"1,024x1,024",0.5,RGB
`LoveDA`_,S,Google Earth,"CC-BY-NC-SA-4.0","5,987",7,"1,024x1,024",0.3,RGB
`MapInWild`_,S,"Sentinel-1/2, ESA WorldCover, NOAA VIIRS DNB","CC-BY-4.0",1018,1,1920x1920,10--463.83,"SAR, MSI, 2020_Map, avg_rad"
`MDAS`_,S,"Sentinel-1/2,EnMAP,HySpex","CC-BY-SA-4.0",3,20,"100x120, 300x360, 1364x1636, 10000x12000, 15000x18000",0.3--30,HSI
`Million-AID`_,C,Google Earth,-,1M,51--73,,0.5--153,RGB
`MMEarth`_,"C, S","Aster, Sentinel, ERA5","CC-BY-4.0","100K--1M",,"128x128 or 64x64",10,MSI
`NASA Marine Debris`_,OD,PlanetScope,"Apache-2.0",707,1,256x256,3,RGB
Expand Down
2 changes: 1 addition & 1 deletion docs/tutorials/transforms.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -707,7 +707,7 @@
"sample = dataset[idx]\n",
"rgb = sample['image'][0, 1:4]\n",
"image = T.ToPILImage()(rgb)\n",
"print(f\"Class Label: {dataset.classes[sample['label']]}\")\n",
"print(f'Class Label: {dataset.classes[sample[\"label\"]]}')\n",
"image.resize((256, 256), resample=Image.BILINEAR)"
]
},
Expand Down
2 changes: 1 addition & 1 deletion experiments/torchgeo/run_resisc45_experiments.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def do_work(work: 'Queue[str]', gpu_idx: int) -> bool:
for model, lr, loss, weights in itertools.product(
model_options, lr_options, loss_options, weight_options
):
experiment_name = f"{model}_{lr}_{loss}_{weights.replace('_', '-')}"
experiment_name = f'{model}_{lr}_{loss}_{weights.replace("_", "-")}'

output_dir = os.path.join('output', 'resisc45_experiments')
log_dir = os.path.join(output_dir, 'logs')
Expand Down
2 changes: 1 addition & 1 deletion experiments/torchgeo/run_so2sat_byol_experiments.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def do_work(work: 'Queue[str]', gpu_idx: int) -> bool:
for model, lr, loss, weights, bands in itertools.product(
model_options, lr_options, loss_options, weight_options, bands_options
):
experiment_name = f"{model}_{lr}_{loss}_byol_{bands}-{weights.split('/')[-2]}"
experiment_name = f'{model}_{lr}_{loss}_byol_{bands}-{weights.split("/")[-2]}'

output_dir = os.path.join('output', 'so2sat_experiments')
log_dir = os.path.join(output_dir, 'logs')
Expand Down
2 changes: 1 addition & 1 deletion experiments/torchgeo/run_so2sat_experiments.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def do_work(work: 'Queue[str]', gpu_idx: int) -> bool:
for model, lr, loss, weights in itertools.product(
model_options, lr_options, loss_options, weight_options
):
experiment_name = f"{model}_{lr}_{loss}_{weights.replace('_', '-')}"
experiment_name = f'{model}_{lr}_{loss}_{weights.replace("_", "-")}'

output_dir = os.path.join('output', 'so2sat_experiments')
log_dir = os.path.join(output_dir, 'logs')
Expand Down
2 changes: 1 addition & 1 deletion experiments/torchgeo/run_so2sat_seed_experiments.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def do_work(work: 'Queue[str]', gpu_idx: int) -> bool:
for model, lr, loss, weights, seed in itertools.product(
model_options, lr_options, loss_options, weight_options, seeds
):
experiment_name = f"{model}_{lr}_{loss}_{weights.replace('_', '-')}_{seed}"
experiment_name = f'{model}_{lr}_{loss}_{weights.replace("_", "-")}_{seed}'

output_dir = os.path.join('output', 'so2sat_seed_experiments')
log_dir = os.path.join(output_dir, 'logs')
Expand Down
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -115,8 +115,8 @@ docs = [
style = [
# mypy 0.900+ required for pyproject.toml support
"mypy>=0.900",
# ruff 0.8+ required for removal of ANN101, ANN102
"ruff>=0.8",
# ruff 0.9+ required for 2025 style guide
"ruff>=0.9",
]
tests = [
# nbmake 1.3.3+ required for variable mocking
Expand Down
2 changes: 1 addition & 1 deletion requirements/datasets.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,6 @@ pandas[parquet]==2.2.3
pycocotools==2.0.8
pyvista==0.44.2
scikit-image==0.25.0
scipy==1.14.1
scipy==1.15.0
xarray==2024.11.0
netcdf4==1.7.2
4 changes: 2 additions & 2 deletions requirements/required.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# setup
setuptools==75.6.0
setuptools==75.8.0

# install
einops==0.8.0
Expand All @@ -10,7 +10,7 @@ lightning[pytorch-extra]==2.5.0.post0
matplotlib==3.10.0
numpy==2.2.1
pandas==2.2.3
pillow==11.0.0
pillow==11.1.0
pyproj==3.7.0
rasterio==1.4.3
rtree==1.3.0
Expand Down
2 changes: 1 addition & 1 deletion requirements/style.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
# style
mypy==1.14.1
ruff==0.8.4
ruff==0.9.1
6 changes: 3 additions & 3 deletions tests/data/inria/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,9 +68,9 @@ def generate_test_data(root: str, n_samples: int = 2) -> str:
lbl = np.random.randint(2, size=size, dtype=dtype)
timg = np.random.randint(dtype_max, size=size, dtype=dtype)

img_path = os.path.join(img_dir, f'austin{i+1}.tif')
lbl_path = os.path.join(lbl_dir, f'austin{i+1}.tif')
timg_path = os.path.join(timg_dir, f'austin{i+10}.tif')
img_path = os.path.join(img_dir, f'austin{i + 1}.tif')
lbl_path = os.path.join(lbl_dir, f'austin{i + 1}.tif')
timg_path = os.path.join(timg_dir, f'austin{i + 10}.tif')

write_data(img_path, img, driver, crs, transform)
write_data(lbl_path, lbl, driver, crs, transform)
Expand Down
Binary file added tests/data/mdas/Augsburg_data_4_publication.zip
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
161 changes: 161 additions & 0 deletions tests/data/mdas/data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
#!/usr/bin/env python3

# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.

import hashlib
import os
import shutil

import numpy as np
import rasterio
from rasterio.crs import CRS
from rasterio.transform import from_origin

# Set the random seed for reproducibility
np.random.seed(0)

# Define the root directory, dataset name, subareas, and modalities based on mdas.py
root_dir = '.'
ds_root_name = 'Augsburg_data_4_publication'
subareas = ['sub_area_1', 'sub_area_2', 'sub_area_3']
modalities = [
'3K_DSM',
'3K_RGB',
'HySpex',
'EeteS_EnMAP_10m',
'EeteS_EnMAP_30m',
'EeteS_Sentinel_2_10m',
'Sentinel_1',
'Sentinel_2',
'osm_buildings',
'osm_landuse',
'osm_water',
]

landuse_class_codes = [
-2147483647, # no label
7201, # forest
7202, # park
7203, # residential
7204, # industrial
7205, # farm
7206, # cemetery
7207, # allotments
7208, # meadow
7209, # commercial
7210, # nature reserve
7211, # recreation ground
7212, # retail
7213, # military
7214, # quarry
7215, # orchard
7217, # scrub
7218, # grass
7219, # heath
]

# Remove existing dummy data if it exists
dataset_path = os.path.join(root_dir, ds_root_name)
if os.path.exists(dataset_path):
shutil.rmtree(dataset_path)


def create_dummy_geotiff(
path: str,
num_bands: int = 3,
width: int = 32,
height: int = 32,
dtype: np.dtype = np.uint16,
binary: bool = False,
landuse: bool = False,
) -> None:
"""Create a dummy GeoTIFF file."""
crs = CRS.from_epsg(32632)
transform = from_origin(0, 0, 1, 1)

if binary:
data = np.random.randint(0, 2, size=(num_bands, height, width)).astype(dtype)
elif landuse:
num_pixels = num_bands * height * width
no_label_ratio = 0.1
num_no_label = int(no_label_ratio * num_pixels)
num_labels = num_pixels - num_no_label
landuse_values = np.random.choice(landuse_class_codes[1:], size=num_labels)
no_label_values = np.full(num_no_label, landuse_class_codes[0], dtype=dtype)
combined = np.concatenate([landuse_values, no_label_values])
np.random.shuffle(combined)
data = combined.reshape((num_bands, height, width)).astype(dtype)
else:
# Generate random data for other modalities
data = np.random.randint(0, 255, size=(num_bands, height, width)).astype(dtype)

os.makedirs(os.path.dirname(path), exist_ok=True)

with rasterio.open(
path,
'w',
driver='GTiff',
height=height,
width=width,
count=num_bands,
dtype=dtype,
crs=crs,
transform=transform,
) as dst:
dst.write(data)


# Create directory structure and dummy data
for subarea in subareas:
# Format the subarea name for filenames, as in mdas.py _format_subarea method
parts = subarea.split('_')
subarea_formatted = parts[0] + '_' + parts[1] + parts[2] # e.g., 'sub_area1'

subarea_dir = os.path.join(root_dir, ds_root_name, subarea)

for modality in modalities:
filename = f'{modality}_{subarea_formatted}.tif'
file_path = os.path.join(subarea_dir, filename)

if modality in ['osm_buildings', 'osm_water']:
create_dummy_geotiff(file_path, num_bands=1, dtype=np.uint8, binary=True)
elif modality == 'osm_landuse':
create_dummy_geotiff(file_path, num_bands=1, dtype=np.float64, landuse=True)
elif modality == 'HySpex':
create_dummy_geotiff(file_path, num_bands=368, dtype=np.int16)
elif modality in ['EeteS_EnMAP_10m', 'EeteS_EnMAP_30m']:
create_dummy_geotiff(file_path, num_bands=242, dtype=np.uint16)
elif modality == 'Sentinel_1':
create_dummy_geotiff(file_path, num_bands=2, dtype=np.float32)
elif modality in ['Sentinel_2', 'EeteS_Sentinel_2_10m']:
create_dummy_geotiff(file_path, num_bands=13, dtype=np.uint16)
elif modality == '3K_DSM':
create_dummy_geotiff(file_path, num_bands=1, dtype=np.float32)
elif modality == '3K_RGB':
create_dummy_geotiff(file_path, num_bands=3, dtype=np.uint8)

print(f'Dummy MDAS dataset created at {os.path.join(root_dir, ds_root_name)}')

# Create a zip archive of the dataset directory
zip_filename = f'{ds_root_name}.zip'
zip_path = os.path.join(root_dir, zip_filename)

shutil.make_archive(
base_name=os.path.splitext(zip_path)[0],
format='zip',
root_dir='.',
base_dir=ds_root_name,
)


def calculate_md5(filename: str) -> str:
hash_md5 = hashlib.md5()
with open(filename, 'rb') as f:
for chunk in iter(lambda: f.read(4096), b''):
hash_md5.update(chunk)
return hash_md5.hexdigest()


checksum = calculate_md5(zip_path)
print(f'MD5 checksum: {checksum}')
2 changes: 1 addition & 1 deletion tests/data/seasonet/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@
os.remove(archive)

for grid, comp in zip(grids, name_comps):
file_name = f"{comp[0]}_{''.join(comp[1:8])}_{'_'.join(comp[8:])}"
file_name = f'{comp[0]}_{"".join(comp[1:8])}_{"_".join(comp[8:])}'
dir = os.path.join(season, f'grid{grid}', file_name)
os.makedirs(dir)

Expand Down
4 changes: 2 additions & 2 deletions tests/data/ssl4eo_benchmark_landsat/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ def create_tarballs(directories: str) -> None:
# mask directory cdl
mask_keep = ['tm_toa', 'etm_sr', 'oli_sr']
mask_filenames = {
f"ssl4eo_l_{key.split('_')[0]}_cdl": val
f'ssl4eo_l_{key.split("_")[0]}_cdl': val
for key, val in filenames.items()
if key in mask_keep
}
Expand All @@ -203,7 +203,7 @@ def create_tarballs(directories: str) -> None:

# mask directory nlcd
mask_filenames = {
f"ssl4eo_l_{key.split('_')[0]}_nlcd": val
f'ssl4eo_l_{key.split("_")[0]}_nlcd': val
for key, val in filenames.items()
if key in mask_keep
}
Expand Down
12 changes: 6 additions & 6 deletions tests/datamodules/test_digital_typhoon.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,14 +57,14 @@ def find_max_time_per_id(
# Assert that each max value in train_max_values is lower
# than in val_max_values for each key id
for id, max_value in train_max_values.items():
assert (
id not in val_max_values or max_value < val_max_values[id]
), f'Max value for id {id} in train is not lower than in validation.'
assert id not in val_max_values or max_value < val_max_values[id], (
f'Max value for id {id} in train is not lower than in validation.'
)
else:
train_ids = {seq['id'] for seq in train_sequences}
val_ids = {seq['id'] for seq in val_sequences}

# Assert that the intersection between train_ids and val_ids is empty
assert (
len(train_ids & val_ids) == 0
), 'Train and validation datasets have overlapping ids.'
assert len(train_ids & val_ids) == 0, (
'Train and validation datasets have overlapping ids.'
)
Loading

0 comments on commit d5413cb

Please sign in to comment.