FTW U-Net Models (#2719)

isaaccorley · web-flow · commit c49deda7d225 · 2025-04-18T16:10:54.000Z
* add ftw unet models

* add ftw models to docs

* add unet to hubconf.py

* add unet to imports

* fix docs

* update licenses

* fix docs x2

* fix docs x3

* add tests

* use smp.create_model

* fix test cov

* add unet to api

* fix docs

* fix types

* fix docstring

* add typing

* change version added

* assert with unexpected keys

* missed an unexpected_keys

* Update conf.py

* Update conf.py
diff --git a/docs/api/models.rst b/docs/api/models.rst
@@ -89,6 +89,13 @@ Panopticon
 .. autofunction:: panopticon_vitb14
 .. autoclass:: Panopticon_Weights
 
+U-Net
+^^^^^
+
+.. autofunction:: unet
+.. autoclass:: Unet_Weights
+
+
 Vision Transformer
 ^^^^^^^^^^^^^^^^^^
 
diff --git a/docs/api/weights/sentinel2.csv b/docs/api/weights/sentinel2.csv
@@ -36,3 +36,7 @@ Swin_V2_B_Weights.SENTINEL2_MI_MS_SATLAS,9,`link <https://github.com/allenai/sat
 Swin_V2_B_Weights.SENTINEL2_MI_RGB_SATLAS,3,`link <https://github.com/allenai/satlas>`__,`link <https://arxiv.org/abs/2211.15660>`__,ODC-BY,,,,
 Swin_V2_B_Weights.SENTINEL2_SI_MS_SATLAS,9,`link <https://github.com/allenai/satlas>`__,`link <https://arxiv.org/abs/2211.15660>`__,ODC-BY,,,,
 Swin_V2_B_Weights.SENTINEL2_SI_RGB_SATLAS,3,`link <https://github.com/allenai/satlas>`__,`link <https://arxiv.org/abs/2211.15660>`__,ODC-BY,,,,
+Unet_Weights.SENTINEL2_2CLASS_FTW,8,`link <https://github.com/fieldsoftheworld/ftw-baselines>`__,`link <https://arxiv.org/abs/2409.16252>`__,"CC-BY-4.0",,,
+Unet_Weights.SENTINEL2_2CLASS_NC_FTW,8,`link <https://github.com/fieldsoftheworld/ftw-baselines>`__,`link <https://arxiv.org/abs/2409.16252>`__,"non-commercial",,,
+Unet_Weights.SENTINEL2_3CLASS_FTW,8,`link <https://github.com/fieldsoftheworld/ftw-baselines>`__,`link <https://arxiv.org/abs/2409.16252>`__,"CC-BY-4.0",,,
+Unet_Weights.SENTINEL2_3CLASS_NC_FTW,8,`link <https://github.com/fieldsoftheworld/ftw-baselines>`__,`link <https://arxiv.org/abs/2409.16252>`__,"non-commercial",,,
diff --git a/hubconf.py b/hubconf.py
@@ -20,6 +20,7 @@
     scalemae_large_patch16,
     swin_v2_b,
     swin_v2_t,
+    unet,
     vit_base_patch14_dinov2,
     vit_base_patch16_224,
     vit_huge_patch14_224,
@@ -41,6 +42,7 @@
     'scalemae_large_patch16',
     'swin_v2_b',
     'swin_v2_t',
+    'unet',
     'vit_base_patch14_dinov2',
     'vit_base_patch16_224',
     'vit_huge_patch14_224',
@@ -49,4 +51,4 @@
     'vit_small_patch16_224',
 )
 
-dependencies = ['timm', 'torchvision']
+dependencies = ['timm', 'torchvision', 'segmentation_models_pytorch', 'kornia']
diff --git a/tests/models/test_api.py b/tests/models/test_api.py
@@ -21,6 +21,7 @@
     ScaleMAELarge16_Weights,
     Swin_V2_B_Weights,
     Swin_V2_T_Weights,
+    Unet_Weights,
     ViTBase14_DINOv2_Weights,
     ViTBase16_Weights,
     ViTHuge14_Weights,
@@ -45,6 +46,7 @@
     scalemae_large_patch16,
     swin_v2_b,
     swin_v2_t,
+    unet,
     vit_base_patch14_dinov2,
     vit_base_patch16_224,
     vit_huge_patch14_224,
@@ -68,6 +70,7 @@
     scalemae_large_patch16,
     swin_v2_t,
     swin_v2_b,
+    unet,
     vit_base_patch14_dinov2,
     vit_base_patch16_224,
     vit_huge_patch14_224,
@@ -88,6 +91,7 @@
     ScaleMAELarge16_Weights,
     Swin_V2_T_Weights,
     Swin_V2_B_Weights,
+    Unet_Weights,
     ViTBase14_DINOv2_Weights,
     ViTBase16_Weights,
     ViTHuge14_Weights,
diff --git a/tests/models/test_unet.py b/tests/models/test_unet.py
@@ -0,0 +1,61 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+from pathlib import Path
+
+import pytest
+import segmentation_models_pytorch as smp
+import torch
+from _pytest.fixtures import SubRequest
+from pytest import MonkeyPatch
+from torchvision.models._api import WeightsEnum
+
+from torchgeo.models import Unet_Weights, unet
+
+
+class TestUnet:
+    @pytest.fixture(params=[*Unet_Weights])
+    def weights(self, request: SubRequest) -> WeightsEnum:
+        return request.param
+
+    @pytest.fixture
+    def mocked_weights(
+        self, tmp_path: Path, monkeypatch: MonkeyPatch, load_state_dict_from_url: None
+    ) -> WeightsEnum:
+        weights = Unet_Weights.SENTINEL2_2CLASS_FTW
+        path = tmp_path / f'{weights}.pth'
+        model = smp.Unet(
+            in_channels=weights.meta['in_chans'],
+            encoder_name=weights.meta['encoder'],
+            encoder_weights=None,
+            classes=weights.meta['num_classes'],
+        )
+        torch.save(model.state_dict(), path)
+        monkeypatch.setattr(weights.value, 'url', str(path))
+        return weights
+
+    def test_unet(self) -> None:
+        unet()
+
+    def test_unet_weights(self, mocked_weights: WeightsEnum) -> None:
+        unet(weights=mocked_weights)
+
+    def test_unet_weights_different_num_classes(
+        self, mocked_weights: WeightsEnum
+    ) -> None:
+        unet(weights=mocked_weights, classes=20)
+
+    def test_bands(self, weights: WeightsEnum) -> None:
+        if 'bands' in weights.meta:
+            assert len(weights.meta['bands']) == weights.meta['in_chans']
+
+    def test_transforms(self, weights: WeightsEnum) -> None:
+        c = weights.meta['in_chans']
+        sample = {
+            'image': torch.arange(c * 256 * 256, dtype=torch.float).view(c, 256, 256)
+        }
+        weights.transforms(sample)
+
+    @pytest.mark.slow
+    def test_unet_download(self, weights: WeightsEnum) -> None:
+        unet(weights=weights)
diff --git a/torchgeo/models/__init__.py b/torchgeo/models/__init__.py
@@ -31,6 +31,7 @@
 )
 from .scale_mae import ScaleMAE, ScaleMAELarge16_Weights, scalemae_large_patch16
 from .swin import Swin_V2_B_Weights, Swin_V2_T_Weights, swin_v2_b, swin_v2_t
+from .unet import Unet_Weights, unet
 from .vit import (
     ViTBase14_DINOv2_Weights,
     ViTBase16_Weights,
@@ -72,6 +73,7 @@
     'ScaleMAELarge16_Weights',
     'Swin_V2_B_Weights',
     'Swin_V2_T_Weights',
+    'Unet_Weights',
     'ViTBase14_DINOv2_Weights',
     'ViTBase16_Weights',
     'ViTHuge14_Weights',
@@ -96,6 +98,7 @@
     'scalemae_large_patch16',
     'swin_v2_b',
     'swin_v2_t',
+    'unet',
     'vit_base_patch14_dinov2',
     'vit_base_patch16_224',
     'vit_huge_patch14_224',
diff --git a/torchgeo/models/api.py b/torchgeo/models/api.py
@@ -37,6 +37,7 @@
 )
 from .scale_mae import ScaleMAELarge16_Weights, scalemae_large_patch16
 from .swin import Swin_V2_B_Weights, Swin_V2_T_Weights, swin_v2_b, swin_v2_t
+from .unet import Unet_Weights, unet
 from .vit import (
     ViTBase14_DINOv2_Weights,
     ViTBase16_Weights,
@@ -67,6 +68,7 @@
     'scalemae_large_patch16': scalemae_large_patch16,
     'swin_v2_t': swin_v2_t,
     'swin_v2_b': swin_v2_b,
+    'unet': unet,
     'vit_small_patch16_224': vit_small_patch16_224,
     'vit_base_patch14_dinov2': vit_base_patch14_dinov2,
     'vit_base_patch16_224': vit_base_patch16_224,
@@ -88,6 +90,7 @@
     scalemae_large_patch16: ScaleMAELarge16_Weights,
     swin_v2_t: Swin_V2_T_Weights,
     swin_v2_b: Swin_V2_B_Weights,
+    unet: Unet_Weights,
     vit_small_patch16_224: ViTSmall16_Weights,
     vit_base_patch14_dinov2: ViTBase14_DINOv2_Weights,
     vit_base_patch16_224: ViTBase16_Weights,
@@ -106,6 +109,7 @@
     'scalemae_large_patch16': ScaleMAELarge16_Weights,
     'swin_v2_t': Swin_V2_T_Weights,
     'swin_v2_b': Swin_V2_B_Weights,
+    'unet': Unet_Weights,
     'vit_small_patch16_224': ViTSmall16_Weights,
     'vit_base_patch14_dinov2': ViTBase14_DINOv2_Weights,
     'vit_base_patch16_224': ViTBase16_Weights,
diff --git a/torchgeo/models/unet.py b/torchgeo/models/unet.py
@@ -0,0 +1,158 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+"""Pre-trained U-Net models."""
+
+from typing import Any
+
+import kornia.augmentation as K
+import segmentation_models_pytorch as smp
+import torch
+from segmentation_models_pytorch import Unet
+from torchvision.models._api import Weights, WeightsEnum
+
+# Specified in https://github.com/fieldsoftheworld/ftw-baselines
+# First 4 S2 bands are for image t1 and last 4 bands are for image t2
+_ftw_sentinel2_bands = ['B4', 'B3', 'B2', 'B8A', 'B4', 'B3', 'B2', 'B8A']
+
+# https://github.com/fieldsoftheworld/ftw-baselines/blob/main/src/ftw/datamodules.py
+# Normalization by 3k (for S2 uint16 input)
+_ftw_transforms = K.AugmentationSequential(
+    K.Normalize(mean=torch.tensor(0.0), std=torch.tensor(3000.0)), data_keys=None
+)
+
+# https://github.com/pytorch/vision/pull/6883
+# https://github.com/pytorch/vision/pull/7107
+# Can be removed once torchvision>=0.15 is required
+Weights.__deepcopy__ = lambda *args, **kwargs: args[0]
+
+
+class Unet_Weights(WeightsEnum):  # type: ignore[misc]
+    """U-Net weights.
+
+    For `smp <https://github.com/qubvel-org/segmentation_models.pytorch>`_
+    *Unet* implementation.
+
+    .. versionadded:: 0.8
+    """
+
+    SENTINEL2_2CLASS_FTW = Weights(
+        url='https://huggingface.co/torchgeo/ftw/resolve/d2fdab6ea9d9cd38b491292cc9a5c8642533cef5/commercial/2-class/sentinel2_unet_effb3-9c04b7c6.pth',
+        transforms=_ftw_transforms,
+        meta={
+            'dataset': 'FTW',
+            'in_chans': 8,
+            'num_classes': 2,
+            'model': 'U-Net',
+            'encoder': 'efficientnet-b3',
+            'publication': 'https://arxiv.org/abs/2409.16252',
+            'repo': 'https://github.com/fieldsoftheworld/ftw-baselines',
+            'bands': _ftw_sentinel2_bands,
+            'license': 'CC-BY-4.0',
+        },
+    )
+    SENTINEL2_3CLASS_FTW = Weights(
+        url='https://huggingface.co/torchgeo/ftw/resolve/d2fdab6ea9d9cd38b491292cc9a5c8642533cef5/commercial/3-class/sentinel2_unet_effb3-5d591cbb.pth',
+        transforms=_ftw_transforms,
+        meta={
+            'dataset': 'FTW',
+            'in_chans': 8,
+            'num_classes': 3,
+            'model': 'U-Net',
+            'encoder': 'efficientnet-b3',
+            'publication': 'https://arxiv.org/abs/2409.16252',
+            'repo': 'https://github.com/fieldsoftheworld/ftw-baselines',
+            'bands': _ftw_sentinel2_bands,
+            'license': 'CC-BY-4.0',
+        },
+    )
+    SENTINEL2_2CLASS_NC_FTW = Weights(
+        url='https://huggingface.co/torchgeo/ftw/resolve/d2fdab6ea9d9cd38b491292cc9a5c8642533cef5/noncommercial/2-class/sentinel2_unet_effb3-bf010a31.pth',
+        transforms=_ftw_transforms,
+        meta={
+            'dataset': 'FTW',
+            'in_chans': 8,
+            'num_classes': 2,
+            'model': 'U-Net',
+            'encoder': 'efficientnet-b3',
+            'publication': 'https://arxiv.org/abs/2409.16252',
+            'repo': 'https://github.com/fieldsoftheworld/ftw-baselines',
+            'bands': _ftw_sentinel2_bands,
+            'license': 'non-commercial',
+        },
+    )
+    SENTINEL2_3CLASS_NC_FTW = Weights(
+        url='https://huggingface.co/torchgeo/ftw/resolve/d2fdab6ea9d9cd38b491292cc9a5c8642533cef5/noncommercial/3-class/sentinel2_unet_effb3-ed36f465.pth',
+        transforms=_ftw_transforms,
+        meta={
+            'dataset': 'FTW',
+            'in_chans': 8,
+            'num_classes': 3,
+            'model': 'U-Net',
+            'encoder': 'efficientnet-b3',
+            'publication': 'https://arxiv.org/abs/2409.16252',
+            'repo': 'https://github.com/fieldsoftheworld/ftw-baselines',
+            'bands': _ftw_sentinel2_bands,
+            'license': 'non-commercial',
+        },
+    )
+
+
+def unet(
+    weights: Unet_Weights | None = None,
+    classes: int | None = None,
+    *args: Any,
+    **kwargs: Any,
+) -> Unet:
+    """U-Net model.
+
+    If you use this model in your research, please cite the following paper:
+
+    * https://arxiv.org/abs/1505.04597
+
+    .. versionadded:: 0.8
+
+    Args:
+        weights: Pre-trained model weights to use.
+        classes: Number of output classes. If not specified, the number of
+            classes will be inferred from the weights.
+        *args: Additional arguments to pass to ``segmentation_models_pytorch.create_model``
+        **kwargs: Additional keyword arguments to pass to ``segmentation_models_pytorch.create_model``
+
+    Returns:
+        A U-Net model.
+    """
+    kwargs['arch'] = 'Unet'
+
+    if weights:
+        kwargs['encoder_weights'] = None
+        kwargs['in_channels'] = weights.meta['in_chans']
+        kwargs['encoder_name'] = weights.meta['encoder']
+        kwargs['classes'] = weights.meta['num_classes'] if classes is None else classes
+    else:
+        kwargs['classes'] = 1 if classes is None else classes
+
+    model: Unet = smp.create_model(*args, **kwargs)
+
+    if weights:
+        state_dict = weights.get_state_dict(progress=True)
+
+        # Load full pretrained model
+        if kwargs['classes'] == weights.meta['num_classes']:
+            missing_keys, unexpected_keys = model.load_state_dict(
+                state_dict, strict=True
+            )
+        # Random initialize segmentation head for new task
+        else:
+            del state_dict['segmentation_head.0.weight']
+            del state_dict['segmentation_head.0.bias']
+            missing_keys, unexpected_keys = model.load_state_dict(
+                state_dict, strict=False
+            )
+        assert set(missing_keys) <= {
+            'segmentation_head.0.weight',
+            'segmentation_head.0.bias',
+        }
+        assert not unexpected_keys
+
+    return model