Skip to content

Commit 657700d

Browse files
feat: Support numpy 2.0 (#2863)
1 parent 3c38d63 commit 657700d

File tree

8 files changed

+140
-114
lines changed

8 files changed

+140
-114
lines changed

Diff for: awswrangler/_utils.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -856,7 +856,7 @@ def split_pandas_frame(df: pd.DataFrame, splits: int) -> list[pd.DataFrame]:
856856
total = len(df)
857857
each_section, extras = divmod(total, splits)
858858
section_sizes = [0] + extras * [each_section + 1] + (splits - extras) * [each_section]
859-
div_points = _nx.array(section_sizes, dtype=_nx.intp).cumsum() # type: ignore[attr-defined]
859+
div_points = _nx.array(section_sizes, dtype=_nx.intp).cumsum()
860860

861861
sub_dfs = []
862862
for i in range(splits):

Diff for: awswrangler/s3/_write_dataset.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ def _get_bucketing_series(df: pd.DataFrame, bucketing_info: typing.BucketingInfo
2828
axis="columns",
2929
)
3030
)
31-
return bucket_number_series.astype(pd.CategoricalDtype(range(bucketing_info[1])))
31+
return bucket_number_series.astype(np.array([pd.CategoricalDtype(range(bucketing_info[1]))]))
3232

3333

3434
def _simulate_overflow(value: int, bits: int = 31, signed: bool = False) -> int:

Diff for: poetry.lock

+116-99
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Diff for: pyproject.toml

+4-4
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,8 @@ pandas = [
3636
{ version = ">=1.2.0,<3.0.0", markers = "python_version >= \"3.9\"" },
3737
]
3838
numpy = [
39-
{version = "^1.18", markers = "python_version < \"3.12\""},
40-
{version = "^1.26", markers = "python_version >= \"3.12\""}
39+
{ version = ">=1.18,<2.0", markers = "python_version < \"3.9\"" },
40+
{ version = ">=1.26,<3.0", markers = "python_version >= \"3.9\"" }
4141
]
4242
pyarrow = ">=8.0.0"
4343
typing-extensions = "^4.4.0"
@@ -77,7 +77,7 @@ geopandas = [
7777
# Distributed
7878
modin = [
7979
{ version = "0.23.1post0", markers = "python_version < \"3.9\"", optional = true },
80-
{ version = "^0.26.0", markers = "python_version >= \"3.9\"", optional = true }
80+
{ version = "^0.31.0", markers = "python_version >= \"3.9\"", optional = true }
8181
]
8282
ray = { version = "^2.30.0", extras = ["default", "data"], optional = true }
8383

@@ -158,7 +158,7 @@ line-length = 120
158158
target-version = "py38"
159159

160160
[tool.ruff.lint]
161-
select = ["D", "E", "F", "I", "PL", "RUF100", "W", "FA", "UP", "PYI036"]
161+
select = ["D", "E", "F", "I", "PL", "RUF100", "W", "FA", "UP", "PYI036", "NPY"]
162162
ignore = ["E501", "PLR2004", "UP037"]
163163
fixable = ["ALL"]
164164

Diff for: tests/load/test_s3.py

+7-5
Original file line numberDiff line numberDiff line change
@@ -238,14 +238,16 @@ def test_wait_object_not_exists(path: str, benchmark_time: int, request: pytest.
238238

239239
@pytest.mark.parametrize("size", [(5000, 5000), (1, 5000), (5000, 1), (1, 1)])
240240
def test_wide_df(size: tuple[int, int], path: str) -> None:
241-
df = pd.DataFrame(np.random.randint(0, 100, size=size))
241+
rand_gen = np.random.Generator()
242+
243+
df = pd.DataFrame(rand_gen.integers(0, 100, size=size))
242244
df.columns = df.columns.map(str)
243245

244246
num_cols = size[0]
245-
df["int"] = np.random.choice(["1", "2", None], num_cols)
246-
df["decimal"] = np.random.choice(["1.0", "2.0", None], num_cols)
247-
df["date"] = np.random.choice(["2020-01-01", "2020-01-02", None], num_cols)
248-
df["par0"] = np.random.choice(["A", "B"], num_cols)
247+
df["int"] = rand_gen.choice(["1", "2", None], num_cols)
248+
df["decimal"] = rand_gen.choice(["1.0", "2.0", None], num_cols)
249+
df["date"] = rand_gen.choice(["2020-01-01", "2020-01-02", None], num_cols)
250+
df["par0"] = rand_gen.choice(["A", "B"], num_cols)
249251

250252
partitions_shape = np.array(unwrap_partitions(df)).shape
251253
assert partitions_shape[1] == min(math.ceil(len(df.columns) / cfg.MinPartitionSize.get()), cfg.NPartitions.get())

Diff for: tests/unit/test_athena.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -969,8 +969,8 @@ def test_athena_nan_inf(glue_database, ctas_approach, data_source):
969969
assert df.shape == (1, 4)
970970
assert df.dtypes.to_list() == ["float64", "float64", "float64", "float64"]
971971
assert np.isnan(df.nan.iloc[0])
972-
assert df.inf.iloc[0] == np.PINF
973-
assert df.inf_n.iloc[0] == np.NINF
972+
assert df.inf.iloc[0] == np.inf
973+
assert df.inf_n.iloc[0] == -np.inf
974974
assert df.regular.iloc[0] == 1.2
975975

976976

Diff for: tests/unit/test_postgresql.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
import awswrangler as wr
1313
import awswrangler.pandas as pd
1414

15-
from .._utils import ensure_data_types, get_df, pandas_equals
15+
from .._utils import ensure_data_types, get_df, is_ray_modin, pandas_equals
1616

1717
logging.getLogger("awswrangler").setLevel(logging.DEBUG)
1818

@@ -96,6 +96,7 @@ def test_unknown_overwrite_method_error(postgresql_table, postgresql_con):
9696
)
9797

9898

99+
@pytest.mark.xfail(is_ray_modin, raises=ProgrammingError, reason="Broken export of values in Modin")
99100
def test_sql_types(postgresql_table, postgresql_con):
100101
table = postgresql_table
101102
df = get_df()

Diff for: tests/unit/test_s3_parquet.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -730,17 +730,23 @@ def test_parquet_compression(path, compression) -> None:
730730
"schema", [None, pa.schema([pa.field("c0", pa.int64()), pa.field("c1", pa.int64()), pa.field("par", pa.string())])]
731731
)
732732
def test_empty_file(path, use_threads, schema):
733+
from awswrangler import _utils
734+
733735
df = pd.DataFrame({"c0": [1, 2, 3], "c1": [None, None, None], "par": ["a", "b", "c"]})
734736
df.index = df.index.astype("Int64")
735737
df["c0"] = df["c0"].astype("Int64")
736738
df["par"] = df["par"].astype("string")
737739
wr.s3.to_parquet(df, path, index=True, dataset=True, partition_cols=["par"])
738-
bucket, key = wr._utils.parse_path(f"{path}test.csv")
740+
741+
bucket, key = _utils.parse_path(f"{path}test.csv")
739742
boto3.client("s3").put_object(Body=b"", Bucket=bucket, Key=key)
740743
with pytest.raises(wr.exceptions.InvalidFile):
741744
wr.s3.read_parquet(path, use_threads=use_threads, ignore_empty=False, schema=schema)
745+
742746
df2 = wr.s3.read_parquet(path, dataset=True, use_threads=use_threads)
747+
df2 = df2.sort_values(by=["c0"])
743748
df2["par"] = df2["par"].astype("string")
749+
744750
assert_pandas_equals(df, df2)
745751

746752

0 commit comments

Comments
 (0)