microsoft · pioneerHitesh · Jul 5, 2024 · Jul 5, 2024 · Jul 5, 2024 · Jul 5, 2024
diff --git a/torchgeo/datasets/advance.py b/torchgeo/datasets/advance.py
@@ -17,7 +17,7 @@
 
 from .errors import DatasetNotFoundError
 from .geo import NonGeoDataset
-from .utils import download_and_extract_archive, lazy_import
+from .utils import Path, download_and_extract_archive, lazy_import
 
 
 class ADVANCE(NonGeoDataset):
@@ -88,7 +88,7 @@ class ADVANCE(NonGeoDataset):
 
     def __init__(
         self,
-        root: str = 'data',
+        root: Path = 'data',
         transforms: Callable[[dict[str, Tensor]], dict[str, Tensor]] | None = None,
         download: bool = False,
         checksum: bool = False,
@@ -151,7 +151,7 @@ def __len__(self) -> int:
         """
         return len(self.files)
 
-    def _load_files(self, root: str) -> list[dict[str, str]]:
+    def _load_files(self, root: Path) -> list[dict[str, str]]:
         """Return the paths of the files in the dataset.
 
         Args:

diff --git a/torchgeo/datasets/agb_live_woody_density.py b/torchgeo/datasets/agb_live_woody_density.py
@@ -5,6 +5,7 @@
 
 import json
 import os
+import pathlib
 from collections.abc import Callable, Iterable
 from typing import Any
 
@@ -14,7 +15,7 @@
 
 from .errors import DatasetNotFoundError
 from .geo import RasterDataset
-from .utils import download_url
+from .utils import Path, download_url
 
 
 class AbovegroundLiveWoodyBiomassDensity(RasterDataset):
@@ -57,7 +58,7 @@ class AbovegroundLiveWoodyBiomassDensity(RasterDataset):
 
     def __init__(
         self,
-        paths: str | Iterable[str] = 'data',
+        paths: Path | Iterable[Path] = 'data',
         crs: CRS | None = None,
         res: float | None = None,
         transforms: Callable[[dict[str, Any]], dict[str, Any]] | None = None,
@@ -105,7 +106,7 @@ def _verify(self) -> None:
 
     def _download(self) -> None:
         """Download the dataset."""
-        assert isinstance(self.paths, str)
+        assert isinstance(self.paths, str | pathlib.Path)
         download_url(self.url, self.paths, self.base_filename)
 
         with open(os.path.join(self.paths, self.base_filename)) as f:

diff --git a/torchgeo/datasets/agrifieldnet.py b/torchgeo/datasets/agrifieldnet.py
@@ -4,6 +4,7 @@
 """AgriFieldNet India Challenge dataset."""
 
 import os
+import pathlib
 import re
 from collections.abc import Callable, Iterable, Sequence
 from typing import Any, cast
@@ -16,7 +17,7 @@
 
 from .errors import RGBBandsMissingError
 from .geo import RasterDataset
-from .utils import BoundingBox
+from .utils import BoundingBox, Path
 
 
 class AgriFieldNet(RasterDataset):
@@ -115,7 +116,7 @@ class AgriFieldNet(RasterDataset):
 
     def __init__(
         self,
-        paths: str | Iterable[str] = 'data',
+        paths: Path | Iterable[Path] = 'data',
         crs: CRS | None = None,
         classes: list[int] = list(cmap.keys()),
         bands: Sequence[str] = all_bands,
@@ -167,7 +168,7 @@ def __getitem__(self, query: BoundingBox) -> dict[str, Any]:
         Returns:
             data, label, and field ids at that index
         """
-        assert isinstance(self.paths, str)
+        assert isinstance(self.paths, str | pathlib.Path)
 
         hits = self.index.intersection(tuple(query), objects=True)
         filepaths = cast(list[str], [hit.object for hit in hits])

diff --git a/torchgeo/datasets/astergdem.py b/torchgeo/datasets/astergdem.py
@@ -12,6 +12,7 @@
 
 from .errors import DatasetNotFoundError
 from .geo import RasterDataset
+from .utils import Path
 
 
 class AsterGDEM(RasterDataset):
@@ -47,7 +48,7 @@ class AsterGDEM(RasterDataset):
 
     def __init__(
         self,
-        paths: str | list[str] = 'data',
+        paths: Path | list[Path] = 'data',
         crs: CRS | None = None,
         res: float | None = None,
         transforms: Callable[[dict[str, Any]], dict[str, Any]] | None = None,

diff --git a/torchgeo/datasets/benin_cashews.py b/torchgeo/datasets/benin_cashews.py
@@ -19,7 +19,7 @@
 
 from .errors import DatasetNotFoundError, RGBBandsMissingError
 from .geo import NonGeoDataset
-from .utils import which
+from .utils import Path, which
 
 
 class BeninSmallHolderCashews(NonGeoDataset):
@@ -163,7 +163,7 @@ class BeninSmallHolderCashews(NonGeoDataset):
 
     def __init__(
         self,
-        root: str = 'data',
+        root: Path = 'data',
         chip_size: int = 256,
         stride: int = 128,
         bands: Sequence[str] = all_bands,

diff --git a/torchgeo/datasets/bigearthnet.py b/torchgeo/datasets/bigearthnet.py
@@ -18,7 +18,7 @@
 
 from .errors import DatasetNotFoundError
 from .geo import NonGeoDataset
-from .utils import download_url, extract_archive, sort_sentinel2_bands
+from .utils import Path, download_url, extract_archive, sort_sentinel2_bands
 
 
 class BigEarthNet(NonGeoDataset):
@@ -267,7 +267,7 @@ class BigEarthNet(NonGeoDataset):
 
     def __init__(
         self,
-        root: str = 'data',
+        root: Path = 'data',
         split: str = 'train',
         bands: str = 'all',
         num_classes: int = 19,

diff --git a/torchgeo/datasets/biomassters.py b/torchgeo/datasets/biomassters.py
@@ -16,7 +16,7 @@
 
 from .errors import DatasetNotFoundError
 from .geo import NonGeoDataset
-from .utils import percentile_normalization
+from .utils import Path, percentile_normalization
 
 
 class BioMassters(NonGeoDataset):
@@ -57,7 +57,7 @@ class BioMassters(NonGeoDataset):
 
     def __init__(
         self,
-        root: str = 'data',
+        root: Path = 'data',
         split: str = 'train',
         sensors: Sequence[str] = ['S1', 'S2'],
         as_time_series: bool = False,

diff --git a/torchgeo/datasets/cbf.py b/torchgeo/datasets/cbf.py
@@ -4,6 +4,7 @@
 """Canadian Building Footprints dataset."""
 
 import os
+import pathlib
 from collections.abc import Callable, Iterable
 from typing import Any
 
@@ -13,7 +14,7 @@
 
 from .errors import DatasetNotFoundError
 from .geo import VectorDataset
-from .utils import check_integrity, download_and_extract_archive
+from .utils import Path, check_integrity, download_and_extract_archive
 
 
 class CanadianBuildingFootprints(VectorDataset):
@@ -62,7 +63,7 @@ class CanadianBuildingFootprints(VectorDataset):
 
     def __init__(
         self,
-        paths: str | Iterable[str] = 'data',
+        paths: Path | Iterable[Path] = 'data',
         crs: CRS | None = None,
         res: float = 0.00001,
         transforms: Callable[[dict[str, Any]], dict[str, Any]] | None = None,
@@ -104,7 +105,7 @@ def _check_integrity(self) -> bool:
         Returns:
             True if dataset files are found and/or MD5s match, else False
         """
-        assert isinstance(self.paths, str)
+        assert isinstance(self.paths, str | pathlib.Path)
         for prov_terr, md5 in zip(self.provinces_territories, self.md5s):
             filepath = os.path.join(self.paths, prov_terr + '.zip')
             if not check_integrity(filepath, md5 if self.checksum else None):
@@ -116,7 +117,7 @@ def _download(self) -> None:
         if self._check_integrity():
             print('Files already downloaded and verified')
             return
-        assert isinstance(self.paths, str)
+        assert isinstance(self.paths, str | pathlib.Path)
         for prov_terr, md5 in zip(self.provinces_territories, self.md5s):
             download_and_extract_archive(
                 self.url + prov_terr + '.zip',

diff --git a/torchgeo/datasets/cdl.py b/torchgeo/datasets/cdl.py
@@ -4,6 +4,7 @@
 """CDL dataset."""
 
 import os
+import pathlib
 from collections.abc import Callable, Iterable
 from typing import Any
 
@@ -14,7 +15,7 @@
 
 from .errors import DatasetNotFoundError
 from .geo import RasterDataset
-from .utils import BoundingBox, download_url, extract_archive
+from .utils import BoundingBox, Path, download_url, extract_archive
 
 
 class CDL(RasterDataset):
@@ -207,7 +208,7 @@ class CDL(RasterDataset):
 
     def __init__(
         self,
-        paths: str | Iterable[str] = 'data',
+        paths: Path | Iterable[Path] = 'data',
         crs: CRS | None = None,
         res: float | None = None,
         years: list[int] = [2023],
@@ -294,7 +295,7 @@ def _verify(self) -> None:
 
         # Check if the zip files have already been downloaded
         exists = []
-        assert isinstance(self.paths, str)
+        assert isinstance(self.paths, str | pathlib.Path)
         for year in self.years:
             pathname = os.path.join(
                 self.paths, self.zipfile_glob.replace('*', str(year))
@@ -327,11 +328,11 @@ def _download(self) -> None:
 
     def _extract(self) -> None:
         """Extract the dataset."""
-        assert isinstance(self.paths, str)
+        assert isinstance(self.paths, str | pathlib.Path)
         for year in self.years:
             zipfile_name = self.zipfile_glob.replace('*', str(year))
             pathname = os.path.join(self.paths, zipfile_name)
-            extract_archive(pathname, self.paths)
+            extract_archive(pathname, str(self.paths))
 
     def plot(
         self,

diff --git a/torchgeo/datasets/chabud.py b/torchgeo/datasets/chabud.py
@@ -14,7 +14,7 @@
 
 from .errors import DatasetNotFoundError
 from .geo import NonGeoDataset
-from .utils import download_url, lazy_import, percentile_normalization
+from .utils import Path, download_url, lazy_import, percentile_normalization
 
 
 class ChaBuD(NonGeoDataset):
@@ -75,7 +75,7 @@ class ChaBuD(NonGeoDataset):
 
     def __init__(
         self,
-        root: str = 'data',
+        root: Path = 'data',
         split: str = 'train',
         bands: list[str] = all_bands,
         transforms: Callable[[dict[str, Tensor]], dict[str, Tensor]] | None = None,

diff --git a/torchgeo/datasets/chesapeake.py b/torchgeo/datasets/chesapeake.py
@@ -5,6 +5,7 @@
 
 import abc
 import os
+import pathlib
 import sys
 from collections.abc import Callable, Iterable, Sequence
 from typing import Any, cast
@@ -26,7 +27,7 @@
 from .errors import DatasetNotFoundError
 from .geo import GeoDataset, RasterDataset
 from .nlcd import NLCD
-from .utils import BoundingBox, download_url, extract_archive
+from .utils import BoundingBox, Path, download_url, extract_archive
 
 
 class Chesapeake(RasterDataset, abc.ABC):
@@ -91,7 +92,7 @@ def url(self) -> str:
 
     def __init__(
         self,
-        paths: str | Iterable[str] = 'data',
+        paths: Path | Iterable[Path] = 'data',
         crs: CRS | None = None,
         res: float | None = None,
         transforms: Callable[[dict[str, Any]], dict[str, Any]] | None = None,
@@ -145,7 +146,7 @@ def _verify(self) -> None:
             return
 
         # Check if the zip file has already been downloaded
-        assert isinstance(self.paths, str)
+        assert isinstance(self.paths, str | pathlib.Path)
         if os.path.exists(os.path.join(self.paths, self.zipfile)):
             self._extract()
             return
@@ -164,7 +165,7 @@ def _download(self) -> None:
 
     def _extract(self) -> None:
         """Extract the dataset."""
-        assert isinstance(self.paths, str)
+        assert isinstance(self.paths, str | pathlib.Path)
         extract_archive(os.path.join(self.paths, self.zipfile))
 
     def plot(
@@ -510,7 +511,7 @@ class ChesapeakeCVPR(GeoDataset):
 
     def __init__(
         self,
-        root: str = 'data',
+        root: Path = 'data',
         splits: Sequence[str] = ['de-train'],
         layers: Sequence[str] = ['naip-new', 'lc'],
         transforms: Callable[[dict[str, Any]], dict[str, Any]] | None = None,

diff --git a/torchgeo/datasets/cloud_cover.py b/torchgeo/datasets/cloud_cover.py
@@ -16,7 +16,7 @@
 
 from .errors import DatasetNotFoundError, RGBBandsMissingError
 from .geo import NonGeoDataset
-from .utils import which
+from .utils import Path, which
 
 
 class CloudCoverDetection(NonGeoDataset):
@@ -61,7 +61,7 @@ class CloudCoverDetection(NonGeoDataset):
 
     def __init__(
         self,
-        root: str = 'data',
+        root: Path = 'data',
         split: str = 'train',
         bands: Sequence[str] = all_bands,
         transforms: Callable[[dict[str, Tensor]], dict[str, Tensor]] | None = None,

diff --git a/torchgeo/datasets/cms_mangrove_canopy.py b/torchgeo/datasets/cms_mangrove_canopy.py
@@ -4,6 +4,7 @@
 """CMS Global Mangrove Canopy dataset."""
 
 import os
+import pathlib
 from collections.abc import Callable
 from typing import Any
 
@@ -13,7 +14,7 @@
 
 from .errors import DatasetNotFoundError
 from .geo import RasterDataset
-from .utils import check_integrity, extract_archive
+from .utils import Path, check_integrity, extract_archive
 
 
 class CMSGlobalMangroveCanopy(RasterDataset):
@@ -169,7 +170,7 @@ class CMSGlobalMangroveCanopy(RasterDataset):
 
     def __init__(
         self,
-        paths: str | list[str] = 'data',
+        paths: Path | list[Path] = 'data',
         crs: CRS | None = None,
         res: float | None = None,
         measurement: str = 'agb',
@@ -228,7 +229,7 @@ def _verify(self) -> None:
             return
 
         # Check if the zip file has already been downloaded
-        assert isinstance(self.paths, str)
+        assert isinstance(self.paths, str | pathlib.Path)
         pathname = os.path.join(self.paths, self.zipfile)
         if os.path.exists(pathname):
             if self.checksum and not check_integrity(pathname, self.md5):
@@ -240,7 +241,7 @@ def _verify(self) -> None:
 
     def _extract(self) -> None:
         """Extract the dataset."""
-        assert isinstance(self.paths, str)
+        assert isinstance(self.paths, str | pathlib.Path)
         pathname = os.path.join(self.paths, self.zipfile)
         extract_archive(pathname)