Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
[project]
requires-python = ">=3.9"

[tool.ruff]
line-length = 119

Expand Down
2 changes: 1 addition & 1 deletion src/datasets/features/pdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ def decode_example(self, value: dict, token_per_repo_id=None) -> "pdfplumber.pdf

return pdf

def flatten(self) -> Union["FeatureType", Dict[str, "FeatureType"]]:
def flatten(self) -> Union["FeatureType", dict[str, "FeatureType"]]:
"""If in the decodable state, return the feature itself, otherwise flatten the feature into a dictionary."""
from .features import Value

Expand Down
2 changes: 1 addition & 1 deletion src/datasets/packaged_modules/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ def _hash_python_lines(lines: list[str]) -> str:
_MODULE_TO_EXTENSIONS[_module].append(".zip")

# Used to filter data files based on file names
_MODULE_TO_METADATA_FILE_NAMES: Dict[str, List[str]] = {}
_MODULE_TO_METADATA_FILE_NAMES: dict[str, list[str]] = {}
for _module in _MODULE_TO_EXTENSIONS:
_MODULE_TO_METADATA_FILE_NAMES[_module] = []
_MODULE_TO_METADATA_FILE_NAMES["imagefolder"] = imagefolder.ImageFolder.METADATA_FILENAMES
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@
import io
import itertools
import os
from collections.abc import Iterator
from dataclasses import dataclass
from typing import Any, Callable, Iterator, Optional, Union
from typing import Any, Callable, Optional, Union

import pandas as pd
import pyarrow as pa
Expand Down
3 changes: 1 addition & 2 deletions tests/distributed_scripts/run_torch_distributed.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import os
from argparse import ArgumentParser
from typing import List

import torch.utils.data

Expand All @@ -16,7 +15,7 @@ class FailedTestError(RuntimeError):
pass


def gen(shards: List[str]):
def gen(shards: list[str]):
for shard in shards:
for i in range(NUM_ITEMS_PER_SHARD):
yield {"i": i, "shard": shard}
Expand Down
3 changes: 1 addition & 2 deletions tests/io/test_sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,7 @@ def iter_sql_file(sqlite_path):
with contextlib.closing(sqlite3.connect(sqlite_path)) as con:
cur = con.cursor()
cur.execute("SELECT * FROM dataset")
for row in cur:
yield row
yield from cur


@require_sqlalchemy
Expand Down
2 changes: 1 addition & 1 deletion tests/packaged_modules/test_webdataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ def test_gzipped_text_webdataset(gzipped_text_wds_file, text_path):
_, examples = zip(*generator)
assert len(examples) == 3
assert isinstance(examples[0]["txt.gz"], str)
with open(text_path, "r") as f:
with open(text_path) as f:
assert examples[0]["txt.gz"].replace("\r\n", "\n") == f.read().replace("\r\n", "\n")


Expand Down
3 changes: 1 addition & 2 deletions tests/test_arrow_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -4040,8 +4040,7 @@ def _gen():
{"col_1": "2", "col_2": 2, "col_3": 2.0},
{"col_1": "3", "col_2": 3, "col_3": 3.0},
]
for item in data:
yield item
yield from data

return _gen

Expand Down
3 changes: 1 addition & 2 deletions tests/test_data_files.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import copy
import os
from pathlib import Path
from typing import List
from unittest.mock import patch

import fsspec
Expand Down Expand Up @@ -509,7 +508,7 @@ def test_DataFilesPatternsDict(text_file):
assert isinstance(data_files_dict["train"], DataFilesList)


def mock_fs(file_paths: List[str]):
def mock_fs(file_paths: list[str]):
"""
Set up a mock filesystem for fsspec containing the provided files

Expand Down
2 changes: 1 addition & 1 deletion tests/test_file_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -361,7 +361,7 @@ def _readd_double_slash_removed_by_path(path_as_posix: str) -> str:
(
str(Path().resolve()),
("file.txt",),
str((Path().resolve() / "file.txt")),
str(Path().resolve() / "file.txt"),
),
],
)
Expand Down
4 changes: 2 additions & 2 deletions utils/release.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@

def update_version_in_file(fname, version, pattern):
"""Update the version in one file using a specific pattern."""
with open(fname, "r", encoding="utf-8", newline="\n") as f:
with open(fname, encoding="utf-8", newline="\n") as f:
code = f.read()
re_pattern, replace = REPLACE_PATTERNS[pattern]
replace = replace.replace("VERSION", version)
Expand All @@ -47,7 +47,7 @@ def global_version_update(version):

def get_version():
"""Reads the current version in the __init__."""
with open(REPLACE_FILES["init"], "r") as f:
with open(REPLACE_FILES["init"]) as f:
code = f.read()
default_version = REPLACE_PATTERNS["init"][0].search(code).groups()[0]
return packaging.version.parse(default_version)
Expand Down