Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use pyupgrade --py39-plus for remaining files #7437

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
[project]
requires-python = ">=3.9"

[tool.ruff]
line-length = 119

Expand Down
2 changes: 1 addition & 1 deletion src/datasets/packaged_modules/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ def _hash_python_lines(lines: list[str]) -> str:
_MODULE_TO_EXTENSIONS[_module].append(".zip")

# Used to filter data files based on file names
_MODULE_TO_METADATA_FILE_NAMES: Dict[str, List[str]] = {}
_MODULE_TO_METADATA_FILE_NAMES: dict[str, list[str]] = {}
for _module in _MODULE_TO_EXTENSIONS:
_MODULE_TO_METADATA_FILE_NAMES[_module] = []
_MODULE_TO_METADATA_FILE_NAMES["imagefolder"] = imagefolder.ImageFolder.METADATA_FILENAMES
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@
import io
import itertools
import os
from collections.abc import Iterator
from dataclasses import dataclass
from typing import Any, Callable, Iterator, Optional, Union
from typing import Any, Callable, Optional, Union

import pandas as pd
import pyarrow as pa
Expand Down
1 change: 0 additions & 1 deletion templates/new_dataset_script.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
"""TODO: Add a description here."""


import csv
import json
import os

Expand Down
3 changes: 1 addition & 2 deletions tests/distributed_scripts/run_torch_distributed.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import os
from argparse import ArgumentParser
from typing import List

import torch.utils.data

Expand All @@ -16,7 +15,7 @@ class FailedTestError(RuntimeError):
pass


def gen(shards: List[str]):
def gen(shards: list[str]):
for shard in shards:
for i in range(NUM_ITEMS_PER_SHARD):
yield {"i": i, "shard": shard}
Expand Down
3 changes: 1 addition & 2 deletions tests/features/test_features.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import datetime
from typing import List, Tuple
from unittest import TestCase
from unittest.mock import MagicMock, patch

Expand Down Expand Up @@ -858,7 +857,7 @@ def test_features_to_arrow_schema(features: Features):


@pytest.mark.parametrize("features", NESTED_COMPARISON)
def test_features_alignment(features: Tuple[List[Features], Features]):
def test_features_alignment(features: tuple[list[Features], Features]):
inputs, expected = features
_check_if_features_can_be_aligned(inputs) # Check that we can align, will raise otherwise.
assert _align_features(inputs) == expected
Expand Down
3 changes: 1 addition & 2 deletions tests/io/test_sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,7 @@ def iter_sql_file(sqlite_path):
with contextlib.closing(sqlite3.connect(sqlite_path)) as con:
cur = con.cursor()
cur.execute("SELECT * FROM dataset")
for row in cur:
yield row
yield from cur


@require_sqlalchemy
Expand Down
2 changes: 1 addition & 1 deletion tests/packaged_modules/test_webdataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ def test_gzipped_text_webdataset(gzipped_text_wds_file, text_path):
_, examples = zip(*generator)
assert len(examples) == 3
assert isinstance(examples[0]["txt.gz"], str)
with open(text_path, "r") as f:
with open(text_path) as f:
assert examples[0]["txt.gz"].replace("\r\n", "\n") == f.read().replace("\r\n", "\n")


Expand Down
14 changes: 6 additions & 8 deletions tests/test_arrow_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -3142,12 +3142,11 @@ def test_tf_dataset_options(self, in_memory):
self.assertEqual(len(tf_dataset), 2) # One batch of 3 and one batch of 1
self.assertEqual(len(tf_dataset_with_drop), 1) # Incomplete batch of 1 is dropped
# Test that `NotImplementedError` is raised `batch_size` is None and `num_workers` is > 0
if sys.version_info >= (3, 8):
with self._create_dummy_dataset(in_memory, tmp_dir.name, multiple_columns=True) as dset:
with self.assertRaisesRegex(
NotImplementedError, "`batch_size` must be specified when using multiple workers"
):
dset.to_tf_dataset(columns="col_1", batch_size=None, num_workers=2)
with self._create_dummy_dataset(in_memory, tmp_dir.name, multiple_columns=True) as dset:
with self.assertRaisesRegex(
NotImplementedError, "`batch_size` must be specified when using multiple workers"
):
dset.to_tf_dataset(columns="col_1", batch_size=None, num_workers=2)
del tf_dataset # For correct cleanup
del tf_dataset_with_drop

Expand Down Expand Up @@ -3927,8 +3926,7 @@ def _gen():
{"col_1": "2", "col_2": 2, "col_3": 2.0},
{"col_1": "3", "col_2": 3, "col_3": 3.0},
]
for item in data:
yield item
yield from data

return _gen

Expand Down
3 changes: 1 addition & 2 deletions tests/test_data_files.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import copy
import os
from pathlib import Path
from typing import List
from unittest.mock import patch

import fsspec
Expand Down Expand Up @@ -509,7 +508,7 @@ def test_DataFilesPatternsDict(text_file):
assert isinstance(data_files_dict["train"], DataFilesList)


def mock_fs(file_paths: List[str]):
def mock_fs(file_paths: list[str]):
"""
Set up a mock filesystem for fsspec containing the provided files

Expand Down
2 changes: 1 addition & 1 deletion tests/test_file_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -361,7 +361,7 @@ def _readd_double_slash_removed_by_path(path_as_posix: str) -> str:
(
str(Path().resolve()),
("file.txt",),
str((Path().resolve() / "file.txt")),
str(Path().resolve() / "file.txt"),
),
],
)
Expand Down
6 changes: 3 additions & 3 deletions tests/test_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import pickle
from decimal import Decimal
from functools import partial
from typing import List, Union
from typing import Union
from unittest.mock import MagicMock

import numpy as np
Expand Down Expand Up @@ -40,7 +40,7 @@ def in_memory_pa_table(arrow_file) -> pa.Table:
return pa.ipc.open_stream(arrow_file).read_all()


def _to_testing_blocks(table: TableBlock) -> List[List[TableBlock]]:
def _to_testing_blocks(table: TableBlock) -> list[list[TableBlock]]:
assert len(table) > 2
blocks = [
[table.slice(0, 2)],
Expand Down Expand Up @@ -1049,7 +1049,7 @@ def test_concat_tables(arrow_file, in_memory_pa_table):
assert isinstance(concatenated_table.blocks[0][2], InMemoryTable)


def _interpolation_search_ground_truth(arr: List[int], x: int) -> Union[int, IndexError]:
def _interpolation_search_ground_truth(arr: list[int], x: int) -> Union[int, IndexError]:
for i in range(len(arr) - 1):
if arr[i] <= x < arr[i + 1]:
return i
Expand Down
4 changes: 2 additions & 2 deletions utils/release.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@

def update_version_in_file(fname, version, pattern):
"""Update the version in one file using a specific pattern."""
with open(fname, "r", encoding="utf-8", newline="\n") as f:
with open(fname, encoding="utf-8", newline="\n") as f:
code = f.read()
re_pattern, replace = REPLACE_PATTERNS[pattern]
replace = replace.replace("VERSION", version)
Expand All @@ -47,7 +47,7 @@ def global_version_update(version):

def get_version():
"""Reads the current version in the __init__."""
with open(REPLACE_FILES["init"], "r") as f:
with open(REPLACE_FILES["init"]) as f:
code = f.read()
default_version = REPLACE_PATTERNS["init"][0].search(code).groups()[0]
return packaging.version.parse(default_version)
Expand Down