Skip to content

[CI] Add pre-commit hook pyupgrade to auto upgrade Python syntax #1638

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Mar 31, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,11 @@ repos:
- --license-filepath
- .github/workflows/license-templates/LICENSE.txt
- --fuzzy-match-generates-todo
- repo: https://github.com/asottile/pyupgrade
rev: v3.19.1
hooks:
- id: pyupgrade
args: [--py37-plus]
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

- repo: https://github.com/psf/black-pre-commit-mirror
rev: 25.1.0
hooks:
Expand Down
4 changes: 2 additions & 2 deletions python/sedona/core/jvm/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@

from sedona.utils.decorators import classproperty

string_types = (type(b""), type(""))
string_types = (bytes, str)


def is_greater_or_equal_version(version_a: str, version_b: str) -> bool:
Expand Down Expand Up @@ -190,7 +190,7 @@ def get_spark_java_config(
try:
used_jar_files = java_spark_conf.get(value)
except Py4JJavaError:
error_message = "Didn't find the value of {} from SparkConf".format(value)
error_message = f"Didn't find the value of {value} from SparkConf"
logging.info(error_message)

return used_jar_files, error_message
Expand Down
2 changes: 1 addition & 1 deletion python/sedona/geopandas/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
bool_type = bool


class GeoFrame(object, metaclass=ABCMeta):
class GeoFrame(metaclass=ABCMeta):
"""
A base class for both GeoDataFrame and GeoSeries.
"""
Expand Down
18 changes: 9 additions & 9 deletions python/sedona/geopandas/geodataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,31 +174,31 @@ def __init__(

def _reduce_for_geostat_function(
self,
sfun: Callable[["GeoSeries"], Column],
sfun: Callable[[GeoSeries], Column],
name: str,
axis: Optional[Axis] = None,
axis: Axis | None = None,
numeric_only: bool = True,
skipna: bool = True,
**kwargs: Any,
) -> Union["GeoSeries", Scalar]:
) -> GeoSeries | Scalar:
# Implementation of the abstract method
raise NotImplementedError("This method is not implemented yet.")

@property
def dtypes(self) -> Union[gpd.GeoSeries, pd.Series, Dtype]:
def dtypes(self) -> gpd.GeoSeries | pd.Series | Dtype:
# Implementation of the abstract method
raise NotImplementedError("This method is not implemented yet.")

def to_geopandas(self) -> Union[gpd.GeoDataFrame, pd.Series]:
def to_geopandas(self) -> gpd.GeoDataFrame | pd.Series:
# Implementation of the abstract method
raise NotImplementedError("This method is not implemented yet.")

def _to_geopandas(self) -> Union[gpd.GeoDataFrame, pd.Series]:
def _to_geopandas(self) -> gpd.GeoDataFrame | pd.Series:
# Implementation of the abstract method
raise NotImplementedError("This method is not implemented yet.")

@property
def geoindex(self) -> "GeoIndex":
def geoindex(self) -> GeoIndex:
# Implementation of the abstract method
raise NotImplementedError("This method is not implemented yet.")

Expand Down Expand Up @@ -232,7 +232,7 @@ def copy(self, deep=False):
return self

@property
def area(self) -> "GeoDataFrame":
def area(self) -> GeoDataFrame:
"""
Returns a GeoDataFrame containing the area of each geometry expressed in the units of the CRS.

Expand Down Expand Up @@ -517,7 +517,7 @@ def buffer(
mitre_limit=5.0,
single_sided=False,
**kwargs,
) -> "GeoDataFrame":
) -> GeoDataFrame:
"""
Returns a GeoDataFrame with all geometries buffered by the specified distance.

Expand Down
2 changes: 1 addition & 1 deletion python/sedona/maps/SedonaPyDeck.py
Original file line number Diff line number Diff line change
Expand Up @@ -315,7 +315,7 @@ def _create_default_fill_color_(cls, gdf, plot_col):
:return: fill_color string for pydeck map
"""
plot_max = gdf[plot_col].max()
return "[85, 183, 177, ({0} / {1}) * 255 + 15]".format(plot_col, plot_max)
return f"[85, 183, 177, ({plot_col} / {plot_max}) * 255 + 15]"

@classmethod
def _create_coord_column_(cls, gdf, geometry_col, add_points=False):
Expand Down
4 changes: 2 additions & 2 deletions python/sedona/raster/meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,8 +104,8 @@ def _do_change_pixel_anchor(self, from_anchor: PixelAnchor, to_anchor: PixelAnch

def __repr__(self):
return (
"[ {} {} {}\n".format(self.scale_x, self.skew_x, self.ip_x)
+ " {} {} {}\n".format(self.skew_y, self.scale_y, self.ip_y)
f"[ {self.scale_x} {self.skew_x} {self.ip_x}\n"
+ f" {self.skew_y} {self.scale_y} {self.ip_y}\n"
+ " 0 0 1 ]"
)

Expand Down
4 changes: 2 additions & 2 deletions python/sedona/raster/raster_serde.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def _deserialize(bio: BytesIO, raster_type: int) -> SedonaRaster:
width, height, bands_meta, affine_trans, crs_wkt, awt_raster
)
else:
raise ValueError("unsupported raster_type: {}".format(raster_type))
raise ValueError(f"unsupported raster_type: {raster_type}")


def _read_grid_envelope(bio: BytesIO) -> Tuple[int, int, int, int]:
Expand Down Expand Up @@ -183,7 +183,7 @@ def _read_data_buffer(bio: BytesIO) -> DataBuffer:
elif data_type == DataBuffer.TYPE_DOUBLE:
np_array = np.frombuffer(bio.read(8 * bank_size), dtype=np.float64)
else:
raise ValueError("unknown data_type {}".format(data_type))
raise ValueError(f"unknown data_type {data_type}")

banks.append(np_array)

Expand Down
2 changes: 1 addition & 1 deletion python/sedona/sql/dataframe_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ def _get_readable_name_for_type(type: Type) -> str:
:rtype: str
"""
if isinstance(type, typing._GenericAlias) and type.__origin__._name == "Union":
return f"Union[{', '.join((_strip_extra_from_class_name(str(x)) for x in type.__args__))}]"
return f"Union[{', '.join(_strip_extra_from_class_name(str(x)) for x in type.__args__)}]"
return _strip_extra_from_class_name(str(type))


Expand Down
2 changes: 1 addition & 1 deletion python/sedona/utils/decorators.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
T = TypeVar("T")


class classproperty(object):
class classproperty:

def __init__(self, f):
self.f = f
Expand Down
4 changes: 1 addition & 3 deletions python/sedona/utils/geometry_serde.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,7 @@ def find_geos_c_dll():
".dll"
):
return os.path.join(lib_dirpath, filename)
raise RuntimeError(
"geos_c DLL not found in {}\\[S|s]hapely.libs".format(packages_dir)
)
raise RuntimeError(f"geos_c DLL not found in {packages_dir}\\[S|s]hapely.libs")

if shapely.__version__.startswith("2."):
if sys.platform != "win32":
Expand Down
6 changes: 3 additions & 3 deletions python/sedona/utils/geometry_serde_general.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ def type_of(geom) -> int:
elif geom._ndim == 3:
return CoordinateType.XYZ
else:
raise ValueError("Invalid coordinate dimension: {}".format(geom._ndim))
raise ValueError(f"Invalid coordinate dimension: {geom._ndim}")

@staticmethod
def bytes_per_coord(coord_type: int) -> int:
Expand Down Expand Up @@ -233,7 +233,7 @@ def deserialize(buffer: bytes) -> Optional[BaseGeometry]:
elif geom_type == GeometryTypeID.GEOMETRYCOLLECTION:
geom = deserialize_geometry_collection(geom_buffer)
else:
raise ValueError("Unsupported geometry type ID: {}".format(geom_type))
raise ValueError(f"Unsupported geometry type ID: {geom_type}")
return geom, geom_buffer.ints_offset


Expand Down Expand Up @@ -546,7 +546,7 @@ def serialize_shapely_1_empty_geom(geom: BaseGeometry) -> bytearray:
geom_type = GeometryTypeID.MULTIPOLYGON
total_size = 12
else:
raise ValueError("Invalid empty geometry collection object: {}".format(geom))
raise ValueError(f"Invalid empty geometry collection object: {geom}")
return create_buffer_for_geom(geom_type, CoordinateType.XY, total_size, 0)


Expand Down
2 changes: 1 addition & 1 deletion python/sedona/utils/meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ def register(self, meth):

if parm.annotation is inspect.Parameter.empty:
raise InvalidParametersException(
"Argument {} must be annotated with a type".format(name)
f"Argument {name} must be annotated with a type"
)
if parm.default is not inspect.Parameter.empty:
self._methods[tuple(types)] = meth
Expand Down
2 changes: 1 addition & 1 deletion python/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@

from sedona import version

with open("README.md", "r") as fh:
with open("README.md") as fh:
long_description = fh.read()

extension_args = {}
Expand Down
32 changes: 14 additions & 18 deletions python/tests/core/test_avoiding_python_jvm_serde_df.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,24 +165,20 @@ def test_spatial_join_query_flat_to_df(self):
right_geometries = self.__row_to_list(right_geometries_raw)

# Ignore the ordering of these
assert set(geom[0] for geom in left_geometries) == set(
[
"POLYGON ((0 4, -3 3, -8 6, -6 8, -2 9, 0 4))",
"POLYGON ((10 3, 10 6, 14 6, 14 3, 10 3))",
"POLYGON ((2 2, 2 4, 3 5, 7 5, 9 3, 8 1, 4 1, 2 2))",
"POLYGON ((-1 -1, -1 -3, -2 -5, -6 -8, -5 -2, -3 -2, -1 -1))",
"POLYGON ((-1 -1, -1 -3, -2 -5, -6 -8, -5 -2, -3 -2, -1 -1))",
]
)
assert set(geom[0] for geom in right_geometries) == set(
[
"POINT (-3 5)",
"POINT (11 5)",
"POINT (4 3)",
"POINT (-1 -1)",
"POINT (-4 -5)",
]
)
assert {geom[0] for geom in left_geometries} == {
"POLYGON ((0 4, -3 3, -8 6, -6 8, -2 9, 0 4))",
"POLYGON ((10 3, 10 6, 14 6, 14 3, 10 3))",
"POLYGON ((2 2, 2 4, 3 5, 7 5, 9 3, 8 1, 4 1, 2 2))",
"POLYGON ((-1 -1, -1 -3, -2 -5, -6 -8, -5 -2, -3 -2, -1 -1))",
"POLYGON ((-1 -1, -1 -3, -2 -5, -6 -8, -5 -2, -3 -2, -1 -1))",
}
assert {geom[0] for geom in right_geometries} == {
"POINT (-3 5)",
"POINT (11 5)",
"POINT (4 3)",
"POINT (-1 -1)",
"POINT (-4 -5)",
}

def test_range_query_flat_to_df(self):
poi_point_rdd = WktReader.readToGeometryRDD(
Expand Down
2 changes: 1 addition & 1 deletion python/tests/geopandas/test_geodataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def test_psdf(self):
},
index=[10, 20, 30, 40, 50, 60],
)
assert psdf.count().count() is 3
assert psdf.count().count() == 3

def test_type_single_geometry_column(self):
# Create a GeoDataFrame with a single geometry column and additional attributes
Expand Down
8 changes: 4 additions & 4 deletions python/tests/geopandas/test_geoseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def test_psdf(self):
},
index=[10, 20, 30, 40, 50, 60],
)
assert psdf.count().count() is 3
assert psdf.count().count() == 3

def test_internal_st_function(self):
# this is to make sure the spark session works with internal sedona udfs
Expand All @@ -82,19 +82,19 @@ def test_area(self):
area = self.g1.area
assert area is not None
assert type(area) is GeoSeries
assert area.count() is 2
assert area.count() == 2

def test_buffer(self):
buffer = self.g1.buffer(0.2)
assert buffer is not None
assert type(buffer) is GeoSeries
assert buffer.count() is 2
assert buffer.count() == 2

def test_buffer_then_area(self):
area = self.g1.buffer(0.2).area
assert area is not None
assert type(area) is GeoSeries
assert area.count() is 2
assert area.count() == 2

def test_buffer_then_geoparquet(self):
temp_file_path = os.path.join(
Expand Down
4 changes: 2 additions & 2 deletions python/tests/geopandas/test_sjoin.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,12 +42,12 @@ def test_sjoin_method1(self):
joined = sjoin(left, right)
assert joined is not None
assert type(joined) is GeoSeries
assert joined.count() is 4
assert joined.count() == 4

def test_sjoin_method2(self):
left = self.g1
right = self.g2
joined = left.sjoin(right)
assert joined is not None
assert type(joined) is GeoSeries
assert joined.count() is 4
assert joined.count() == 4
36 changes: 14 additions & 22 deletions python/tests/sql/test_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -1967,14 +1967,10 @@ def test_st_collect_on_array_type(self):
)

# then result should be as expected
assert set(
[
el[0]
for el in geometry_df_collected.selectExpr(
"ST_AsText(collected)"
).collect()
]
) == {
assert {
el[0]
for el in geometry_df_collected.selectExpr("ST_AsText(collected)").collect()
} == {
"MULTILINESTRING ((1 2, 3 4), (3 4, 4 5))",
"MULTIPOINT ((1 2), (-2 3))",
"MULTIPOLYGON (((1 2, 1 4, 3 4, 3 2, 1 2)), ((0.5 0.5, 5 0, 5 5, 0 5, 0.5 0.5)))",
Expand All @@ -2000,14 +1996,10 @@ def test_st_collect_on_multiple_columns(self):
)

# then result should be calculated
assert set(
[
el[0]
for el in geometry_df_collected.selectExpr(
"ST_AsText(collected)"
).collect()
]
) == {
assert {
el[0]
for el in geometry_df_collected.selectExpr("ST_AsText(collected)").collect()
} == {
"MULTILINESTRING ((1 2, 3 4), (3 4, 4 5))",
"MULTIPOINT ((1 2), (-2 3))",
"MULTIPOLYGON (((1 2, 1 4, 3 4, 3 2, 1 2)), ((0.5 0.5, 5 0, 5 5, 0 5, 0.5 0.5)))",
Expand Down Expand Up @@ -2036,7 +2028,7 @@ def test_st_reverse(self):
}
for input_geom, expected_geom in test_cases.items():
reversed_geometry = self.spark.sql(
"select ST_AsText(ST_Reverse(ST_GeomFromText({})))".format(input_geom)
f"select ST_AsText(ST_Reverse(ST_GeomFromText({input_geom})))"
)
assert reversed_geometry.take(1)[0][0] == expected_geom

Expand Down Expand Up @@ -2134,7 +2126,7 @@ def test_st_force_2d(self):

for input_geom, expected_geom in tests1.items():
geom_2d = self.spark.sql(
"select ST_AsText(ST_Force_2D(ST_GeomFromText({})))".format(input_geom)
f"select ST_AsText(ST_Force_2D(ST_GeomFromText({input_geom})))"
)
assert geom_2d.take(1)[0][0] == expected_geom

Expand All @@ -2147,7 +2139,7 @@ def test_st_force2d(self):

for input_geom, expected_geom in tests1.items():
geom_2d = self.spark.sql(
"select ST_AsText(ST_Force2D(ST_GeomFromText({})))".format(input_geom)
f"select ST_AsText(ST_Force2D(ST_GeomFromText({input_geom})))"
)
assert geom_2d.take(1)[0][0] == expected_geom

Expand All @@ -2171,7 +2163,7 @@ def test_st_buildarea(self):

for input_geom, expected_geom in tests.items():
areal_geom = self.spark.sql(
"select ST_AsText(ST_BuildArea(ST_GeomFromText({})))".format(input_geom)
f"select ST_AsText(ST_BuildArea(ST_GeomFromText({input_geom})))"
)
assert areal_geom.take(1)[0][0] == expected_geom

Expand Down Expand Up @@ -2244,7 +2236,7 @@ def test_st_s2_cell_ids(self):
]
for input_geom in test_cases:
cell_ids = self.spark.sql(
"select ST_S2CellIDs(ST_GeomFromText({}), 6)".format(input_geom)
f"select ST_S2CellIDs(ST_GeomFromText({input_geom}), 6)"
).take(1)[0][0]
assert isinstance(cell_ids, list)
assert isinstance(cell_ids[0], int)
Expand Down Expand Up @@ -2272,7 +2264,7 @@ def test_st_h3_cell_ids(self):
]
for input_geom in test_cases:
cell_ids = self.spark.sql(
"select ST_H3CellIDs(ST_GeomFromText({}), 6, true)".format(input_geom)
f"select ST_H3CellIDs(ST_GeomFromText({input_geom}), 6, true)"
).take(1)[0][0]
assert isinstance(cell_ids, list)
assert isinstance(cell_ids[0], int)
Expand Down
Loading
Loading