apache · jiayuasu · Mar 31, 2025 · Oct 13, 2024 · jbampton · Mar 28, 2025
@@ -114,6 +114,11 @@ repos:
           - --license-filepath
           - .github/workflows/license-templates/LICENSE.txt
           - --fuzzy-match-generates-todo
+  - repo: https://github.com/asottile/pyupgrade
+    rev: v3.19.1
+    hooks:
+      - id: pyupgrade
+        args: [--py37-plus]
   - repo: https://github.com/psf/black-pre-commit-mirror
     rev: 25.1.0
     hooks:

@@ -28,7 +28,7 @@
 
 from sedona.utils.decorators import classproperty
 
-string_types = (type(b""), type(""))
+string_types = (bytes, str)
 
 
 def is_greater_or_equal_version(version_a: str, version_b: str) -> bool:
@@ -190,7 +190,7 @@ def get_spark_java_config(
         try:
             used_jar_files = java_spark_conf.get(value)
         except Py4JJavaError:
-            error_message = "Didn't find the value of {} from SparkConf".format(value)
+            error_message = f"Didn't find the value of {value} from SparkConf"
             logging.info(error_message)
 
         return used_jar_files, error_message

@@ -40,7 +40,7 @@
 bool_type = bool
 
 
-class GeoFrame(object, metaclass=ABCMeta):
+class GeoFrame(metaclass=ABCMeta):
     """
     A base class for both GeoDataFrame and GeoSeries.
     """

@@ -174,31 +174,31 @@ def __init__(
 
     def _reduce_for_geostat_function(
         self,
-        sfun: Callable[["GeoSeries"], Column],
+        sfun: Callable[[GeoSeries], Column],
         name: str,
-        axis: Optional[Axis] = None,
+        axis: Axis | None = None,
         numeric_only: bool = True,
         skipna: bool = True,
         **kwargs: Any,
-    ) -> Union["GeoSeries", Scalar]:
+    ) -> GeoSeries | Scalar:
         # Implementation of the abstract method
         raise NotImplementedError("This method is not implemented yet.")
 
     @property
-    def dtypes(self) -> Union[gpd.GeoSeries, pd.Series, Dtype]:
+    def dtypes(self) -> gpd.GeoSeries | pd.Series | Dtype:
         # Implementation of the abstract method
         raise NotImplementedError("This method is not implemented yet.")
 
-    def to_geopandas(self) -> Union[gpd.GeoDataFrame, pd.Series]:
+    def to_geopandas(self) -> gpd.GeoDataFrame | pd.Series:
         # Implementation of the abstract method
         raise NotImplementedError("This method is not implemented yet.")
 
-    def _to_geopandas(self) -> Union[gpd.GeoDataFrame, pd.Series]:
+    def _to_geopandas(self) -> gpd.GeoDataFrame | pd.Series:
         # Implementation of the abstract method
         raise NotImplementedError("This method is not implemented yet.")
 
     @property
-    def geoindex(self) -> "GeoIndex":
+    def geoindex(self) -> GeoIndex:
         # Implementation of the abstract method
         raise NotImplementedError("This method is not implemented yet.")
 
@@ -232,7 +232,7 @@ def copy(self, deep=False):
             return self
 
     @property
-    def area(self) -> "GeoDataFrame":
+    def area(self) -> GeoDataFrame:
         """
         Returns a GeoDataFrame containing the area of each geometry expressed in the units of the CRS.
 
@@ -517,7 +517,7 @@ def buffer(
         mitre_limit=5.0,
         single_sided=False,
         **kwargs,
-    ) -> "GeoDataFrame":
+    ) -> GeoDataFrame:
         """
         Returns a GeoDataFrame with all geometries buffered by the specified distance.
 

@@ -315,7 +315,7 @@ def _create_default_fill_color_(cls, gdf, plot_col):
         :return: fill_color string for pydeck map
         """
         plot_max = gdf[plot_col].max()
-        return "[85, 183, 177, ({0} / {1}) * 255 + 15]".format(plot_col, plot_max)
+        return f"[85, 183, 177, ({plot_col} / {plot_max}) * 255 + 15]"
 
     @classmethod
     def _create_coord_column_(cls, gdf, geometry_col, add_points=False):

@@ -104,8 +104,8 @@ def _do_change_pixel_anchor(self, from_anchor: PixelAnchor, to_anchor: PixelAnch
 
     def __repr__(self):
         return (
-            "[ {} {} {}\n".format(self.scale_x, self.skew_x, self.ip_x)
-            + "  {} {} {}\n".format(self.skew_y, self.scale_y, self.ip_y)
+            f"[ {self.scale_x} {self.skew_x} {self.ip_x}\n"
+            + f"  {self.skew_y} {self.scale_y} {self.ip_y}\n"
             + "   0  0  1 ]"
         )
 

@@ -63,7 +63,7 @@ def _deserialize(bio: BytesIO, raster_type: int) -> SedonaRaster:
             width, height, bands_meta, affine_trans, crs_wkt, awt_raster
         )
     else:
-        raise ValueError("unsupported raster_type: {}".format(raster_type))
+        raise ValueError(f"unsupported raster_type: {raster_type}")
 
 
 def _read_grid_envelope(bio: BytesIO) -> Tuple[int, int, int, int]:
@@ -183,7 +183,7 @@ def _read_data_buffer(bio: BytesIO) -> DataBuffer:
         elif data_type == DataBuffer.TYPE_DOUBLE:
             np_array = np.frombuffer(bio.read(8 * bank_size), dtype=np.float64)
         else:
-            raise ValueError("unknown data_type {}".format(data_type))
+            raise ValueError(f"unknown data_type {data_type}")
 
         banks.append(np_array)
 

@@ -123,7 +123,7 @@ def _get_readable_name_for_type(type: Type) -> str:
     :rtype: str
     """
     if isinstance(type, typing._GenericAlias) and type.__origin__._name == "Union":
-        return f"Union[{', '.join((_strip_extra_from_class_name(str(x)) for x in type.__args__))}]"
+        return f"Union[{', '.join(_strip_extra_from_class_name(str(x)) for x in type.__args__)}]"
     return _strip_extra_from_class_name(str(type))
 
 

@@ -20,7 +20,7 @@
 T = TypeVar("T")
 
 
-class classproperty(object):
+class classproperty:
 
     def __init__(self, f):
         self.f = f

@@ -42,9 +42,7 @@ def find_geos_c_dll():
                     ".dll"
                 ):
                     return os.path.join(lib_dirpath, filename)
-        raise RuntimeError(
-            "geos_c DLL not found in {}\\[S|s]hapely.libs".format(packages_dir)
-        )
+        raise RuntimeError(f"geos_c DLL not found in {packages_dir}\\[S|s]hapely.libs")
 
     if shapely.__version__.startswith("2."):
         if sys.platform != "win32":

@@ -82,7 +82,7 @@ def type_of(geom) -> int:
         elif geom._ndim == 3:
             return CoordinateType.XYZ
         else:
-            raise ValueError("Invalid coordinate dimension: {}".format(geom._ndim))
+            raise ValueError(f"Invalid coordinate dimension: {geom._ndim}")
 
     @staticmethod
     def bytes_per_coord(coord_type: int) -> int:
@@ -233,7 +233,7 @@ def deserialize(buffer: bytes) -> Optional[BaseGeometry]:
     elif geom_type == GeometryTypeID.GEOMETRYCOLLECTION:
         geom = deserialize_geometry_collection(geom_buffer)
     else:
-        raise ValueError("Unsupported geometry type ID: {}".format(geom_type))
+        raise ValueError(f"Unsupported geometry type ID: {geom_type}")
     return geom, geom_buffer.ints_offset
 
 
@@ -546,7 +546,7 @@ def serialize_shapely_1_empty_geom(geom: BaseGeometry) -> bytearray:
         geom_type = GeometryTypeID.MULTIPOLYGON
         total_size = 12
     else:
-        raise ValueError("Invalid empty geometry collection object: {}".format(geom))
+        raise ValueError(f"Invalid empty geometry collection object: {geom}")
     return create_buffer_for_geom(geom_type, CoordinateType.XY, total_size, 0)
 
 

@@ -79,7 +79,7 @@ def register(self, meth):
 
             if parm.annotation is inspect.Parameter.empty:
                 raise InvalidParametersException(
-                    "Argument {} must be annotated with a type".format(name)
+                    f"Argument {name} must be annotated with a type"
                 )
             if parm.default is not inspect.Parameter.empty:
                 self._methods[tuple(types)] = meth

@@ -21,7 +21,7 @@
 
 from sedona import version
 
-with open("README.md", "r") as fh:
+with open("README.md") as fh:
     long_description = fh.read()
 
 extension_args = {}

@@ -165,24 +165,20 @@ def test_spatial_join_query_flat_to_df(self):
         right_geometries = self.__row_to_list(right_geometries_raw)
 
         # Ignore the ordering of these
-        assert set(geom[0] for geom in left_geometries) == set(
-            [
-                "POLYGON ((0 4, -3 3, -8 6, -6 8, -2 9, 0 4))",
-                "POLYGON ((10 3, 10 6, 14 6, 14 3, 10 3))",
-                "POLYGON ((2 2, 2 4, 3 5, 7 5, 9 3, 8 1, 4 1, 2 2))",
-                "POLYGON ((-1 -1, -1 -3, -2 -5, -6 -8, -5 -2, -3 -2, -1 -1))",
-                "POLYGON ((-1 -1, -1 -3, -2 -5, -6 -8, -5 -2, -3 -2, -1 -1))",
-            ]
-        )
-        assert set(geom[0] for geom in right_geometries) == set(
-            [
-                "POINT (-3 5)",
-                "POINT (11 5)",
-                "POINT (4 3)",
-                "POINT (-1 -1)",
-                "POINT (-4 -5)",
-            ]
-        )
+        assert {geom[0] for geom in left_geometries} == {
+            "POLYGON ((0 4, -3 3, -8 6, -6 8, -2 9, 0 4))",
+            "POLYGON ((10 3, 10 6, 14 6, 14 3, 10 3))",
+            "POLYGON ((2 2, 2 4, 3 5, 7 5, 9 3, 8 1, 4 1, 2 2))",
+            "POLYGON ((-1 -1, -1 -3, -2 -5, -6 -8, -5 -2, -3 -2, -1 -1))",
+            "POLYGON ((-1 -1, -1 -3, -2 -5, -6 -8, -5 -2, -3 -2, -1 -1))",
+        }
+        assert {geom[0] for geom in right_geometries} == {
+            "POINT (-3 5)",
+            "POINT (11 5)",
+            "POINT (4 3)",
+            "POINT (-1 -1)",
+            "POINT (-4 -5)",
+        }
 
     def test_range_query_flat_to_df(self):
         poi_point_rdd = WktReader.readToGeometryRDD(

@@ -56,7 +56,7 @@ def test_psdf(self):
             },
             index=[10, 20, 30, 40, 50, 60],
         )
-        assert psdf.count().count() is 3
+        assert psdf.count().count() == 3
 
     def test_type_single_geometry_column(self):
         # Create a GeoDataFrame with a single geometry column and additional attributes

@@ -56,7 +56,7 @@ def test_psdf(self):
             },
             index=[10, 20, 30, 40, 50, 60],
         )
-        assert psdf.count().count() is 3
+        assert psdf.count().count() == 3
 
     def test_internal_st_function(self):
         # this is to make sure the spark session works with internal sedona udfs
@@ -82,19 +82,19 @@ def test_area(self):
         area = self.g1.area
         assert area is not None
         assert type(area) is GeoSeries
-        assert area.count() is 2
+        assert area.count() == 2
 
     def test_buffer(self):
         buffer = self.g1.buffer(0.2)
         assert buffer is not None
         assert type(buffer) is GeoSeries
-        assert buffer.count() is 2
+        assert buffer.count() == 2
 
     def test_buffer_then_area(self):
         area = self.g1.buffer(0.2).area
         assert area is not None
         assert type(area) is GeoSeries
-        assert area.count() is 2
+        assert area.count() == 2
 
     def test_buffer_then_geoparquet(self):
         temp_file_path = os.path.join(

@@ -42,12 +42,12 @@ def test_sjoin_method1(self):
         joined = sjoin(left, right)
         assert joined is not None
         assert type(joined) is GeoSeries
-        assert joined.count() is 4
+        assert joined.count() == 4
 
     def test_sjoin_method2(self):
         left = self.g1
         right = self.g2
         joined = left.sjoin(right)
         assert joined is not None
         assert type(joined) is GeoSeries
-        assert joined.count() is 4
+        assert joined.count() == 4
@@ -1967,14 +1967,10 @@ def test_st_collect_on_array_type(self):
         )
 
         # then result should be as expected
-        assert set(
-            [
-                el[0]
-                for el in geometry_df_collected.selectExpr(
-                    "ST_AsText(collected)"
-                ).collect()
-            ]
-        ) == {
+        assert {
+            el[0]
+            for el in geometry_df_collected.selectExpr("ST_AsText(collected)").collect()
+        } == {
             "MULTILINESTRING ((1 2, 3 4), (3 4, 4 5))",
             "MULTIPOINT ((1 2), (-2 3))",
             "MULTIPOLYGON (((1 2, 1 4, 3 4, 3 2, 1 2)), ((0.5 0.5, 5 0, 5 5, 0 5, 0.5 0.5)))",
@@ -2000,14 +1996,10 @@ def test_st_collect_on_multiple_columns(self):
         )
 
         # then result should be calculated
-        assert set(
-            [
-                el[0]
-                for el in geometry_df_collected.selectExpr(
-                    "ST_AsText(collected)"
-                ).collect()
-            ]
-        ) == {
+        assert {
+            el[0]
+            for el in geometry_df_collected.selectExpr("ST_AsText(collected)").collect()
+        } == {
             "MULTILINESTRING ((1 2, 3 4), (3 4, 4 5))",
             "MULTIPOINT ((1 2), (-2 3))",
             "MULTIPOLYGON (((1 2, 1 4, 3 4, 3 2, 1 2)), ((0.5 0.5, 5 0, 5 5, 0 5, 0.5 0.5)))",
@@ -2036,7 +2028,7 @@ def test_st_reverse(self):
         }
         for input_geom, expected_geom in test_cases.items():
             reversed_geometry = self.spark.sql(
-                "select ST_AsText(ST_Reverse(ST_GeomFromText({})))".format(input_geom)
+                f"select ST_AsText(ST_Reverse(ST_GeomFromText({input_geom})))"
             )
             assert reversed_geometry.take(1)[0][0] == expected_geom
 
@@ -2134,7 +2126,7 @@ def test_st_force_2d(self):
 
         for input_geom, expected_geom in tests1.items():
             geom_2d = self.spark.sql(
-                "select ST_AsText(ST_Force_2D(ST_GeomFromText({})))".format(input_geom)
+                f"select ST_AsText(ST_Force_2D(ST_GeomFromText({input_geom})))"
             )
             assert geom_2d.take(1)[0][0] == expected_geom
 
@@ -2147,7 +2139,7 @@ def test_st_force2d(self):
 
         for input_geom, expected_geom in tests1.items():
             geom_2d = self.spark.sql(
-                "select ST_AsText(ST_Force2D(ST_GeomFromText({})))".format(input_geom)
+                f"select ST_AsText(ST_Force2D(ST_GeomFromText({input_geom})))"
             )
             assert geom_2d.take(1)[0][0] == expected_geom
 
@@ -2171,7 +2163,7 @@ def test_st_buildarea(self):
 
         for input_geom, expected_geom in tests.items():
             areal_geom = self.spark.sql(
-                "select ST_AsText(ST_BuildArea(ST_GeomFromText({})))".format(input_geom)
+                f"select ST_AsText(ST_BuildArea(ST_GeomFromText({input_geom})))"
             )
             assert areal_geom.take(1)[0][0] == expected_geom
 
@@ -2244,7 +2236,7 @@ def test_st_s2_cell_ids(self):
         ]
         for input_geom in test_cases:
             cell_ids = self.spark.sql(
-                "select ST_S2CellIDs(ST_GeomFromText({}), 6)".format(input_geom)
+                f"select ST_S2CellIDs(ST_GeomFromText({input_geom}), 6)"
             ).take(1)[0][0]
             assert isinstance(cell_ids, list)
             assert isinstance(cell_ids[0], int)
@@ -2272,7 +2264,7 @@ def test_st_h3_cell_ids(self):
         ]
         for input_geom in test_cases:
             cell_ids = self.spark.sql(
-                "select ST_H3CellIDs(ST_GeomFromText({}), 6, true)".format(input_geom)
+                f"select ST_H3CellIDs(ST_GeomFromText({input_geom}), 6, true)"
             ).take(1)[0][0]
             assert isinstance(cell_ids, list)
             assert isinstance(cell_ids[0], int)