Skip to content

Commit

Permalink
Format code with ruff (huggingface#5519)
Browse files Browse the repository at this point in the history
* Update config files

* Format code

* Some manual fixes

* Fix
  • Loading branch information
mariosasko authored Feb 14, 2023
1 parent 819bc6e commit 06ae3f6
Show file tree
Hide file tree
Showing 51 changed files with 52 additions and 105 deletions.
3 changes: 1 addition & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,7 @@ jobs:
- name: Check quality
run: |
black --check tests src benchmarks metrics
isort --check-only tests src benchmarks metrics
flake8 tests src benchmarks metrics
ruff tests src benchmarks metrics
test:
needs: check_code_quality
Expand Down
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -61,4 +61,7 @@ docs/source/_build/

# Benchmark results
report.json
report.md
report.md

# Ruff
.ruff_cache
2 changes: 1 addition & 1 deletion CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ If you want to add a dataset see specific instructions in the section [*How to a

5. Develop the features on your branch.

6. Format your code. Run black and isort so that your newly added files look nice with the following command:
6. Format your code. Run black and ruff so that your newly added files look nice with the following command:

```bash
make style
Expand Down
9 changes: 5 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,17 +1,18 @@
.PHONY: quality style test

check_dirs := tests src benchmarks metrics

# Check that source code meets quality standards

quality:
black --check tests src benchmarks metrics
isort --check-only tests src benchmarks metrics
flake8 tests src benchmarks metrics
black --check $(check_dirs)
ruff $(check_dirs)

# Format source code automatically

style:
black tests src benchmarks metrics
isort tests src benchmarks metrics
ruff $(check_dirs) --fix

# Run tests for the library

Expand Down
1 change: 0 additions & 1 deletion benchmarks/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ def format_json_to_md(input_json_file, output_md_file):
output_md = ["<details>", "<summary>Show updated benchmarks!</summary>", " "]

for benchmark_name in sorted(results):

benchmark_res = results[benchmark_name]

benchmark_file_name = benchmark_name.split("/")[-1]
Expand Down
2 changes: 0 additions & 2 deletions metrics/bleurt/bleurt.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,6 @@
@datasets.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
class BLEURT(datasets.Metric):
def _info(self):

return datasets.MetricInfo(
description=_DESCRIPTION,
citation=_CITATION,
Expand All @@ -95,7 +94,6 @@ def _info(self):
)

def _download_and_prepare(self, dl_manager):

# check that config name specifies a valid BLEURT model
if self.config_name == "default":
logger.warning(
Expand Down
2 changes: 0 additions & 2 deletions metrics/code_eval/execute.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,7 @@ def check_correctness(check_program, timeout, task_id, completion_id):


def unsafe_execute(check_program, result, timeout):

with create_tempdir():

# These system calls are needed when cleaning up tempdir.
import os
import shutil
Expand Down
1 change: 0 additions & 1 deletion metrics/comet/comet.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,6 @@
@datasets.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
class COMET(datasets.Metric):
def _info(self):

return datasets.MetricInfo(
description=_DESCRIPTION,
citation=_CITATION,
Expand Down
3 changes: 1 addition & 2 deletions metrics/coval/coval.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
""" CoVal metric. """
import coval # From: git+https://github.com/ns-moosavi/coval.git noqa: F401
import coval # From: git+https://github.com/ns-moosavi/coval.git # noqa: F401
from coval.conll import reader, util
from coval.eval import evaluator

Expand Down Expand Up @@ -167,7 +167,6 @@
def get_coref_infos(
key_lines, sys_lines, NP_only=False, remove_nested=False, keep_singletons=True, min_span=False, doc="dummy_doc"
):

key_doc_lines = {doc: key_lines}
sys_doc_lines = {doc: sys_lines}

Expand Down
1 change: 0 additions & 1 deletion metrics/exact_match/exact_match.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,6 @@ def _compute(
ignore_punctuation=False,
ignore_numbers=False,
):

if regexes_to_ignore is not None:
for s in regexes_to_ignore:
predictions = np.array([re.sub(s, "", x) for x in predictions])
Expand Down
1 change: 0 additions & 1 deletion metrics/indic_glue/indic_glue.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@

import numpy as np
from scipy.spatial.distance import cdist
from scipy.stats import pearsonr, spearmanr
from sklearn.metrics import f1_score

import datasets
Expand Down
1 change: 0 additions & 1 deletion metrics/mae/mae.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,6 @@ def _get_feature_types(self):
}

def _compute(self, predictions, references, sample_weight=None, multioutput="uniform_average"):

mae_score = mean_absolute_error(references, predictions, sample_weight=sample_weight, multioutput=multioutput)

return {"mae": mae_score}
1 change: 0 additions & 1 deletion metrics/mahalanobis/mahalanobis.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,6 @@ def _info(self):
)

def _compute(self, X, reference_distribution):

# convert to numpy arrays
X = np.array(X)
reference_distribution = np.array(reference_distribution)
Expand Down
10 changes: 5 additions & 5 deletions metrics/mauve/mauve.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,11 @@
# limitations under the License.
""" MAUVE metric from https://github.com/krishnap25/mauve. """

import faiss # Here to have a nice missing dependency error message early on
import numpy # Here to have a nice missing dependency error message early on
import requests # Here to have a nice missing dependency error message early on
import sklearn # Here to have a nice missing dependency error message early on
import tqdm # Here to have a nice missing dependency error message early on
import faiss # noqa: F401 # Here to have a nice missing dependency error message early on
import numpy # noqa: F401 # Here to have a nice missing dependency error message early on
import requests # noqa: F401 # Here to have a nice missing dependency error message early on
import sklearn # noqa: F401 # Here to have a nice missing dependency error message early on
import tqdm # noqa: F401 # Here to have a nice missing dependency error message early on
from mauve import compute_mauve # From: mauve-text

import datasets
Expand Down
1 change: 0 additions & 1 deletion metrics/mse/mse.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,6 @@ def _get_feature_types(self):
}

def _compute(self, predictions, references, sample_weight=None, multioutput="uniform_average", squared=True):

mse = mean_squared_error(
references, predictions, sample_weight=sample_weight, multioutput=multioutput, squared=squared
)
Expand Down
1 change: 0 additions & 1 deletion metrics/perplexity/perplexity.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,6 @@ def _info(self):
)

def _compute(self, input_texts, model_id, batch_size: int = 16, add_start_token: bool = True, device=None):

if device is not None:
assert device in ["gpu", "cpu", "cuda"], "device should be either gpu or cpu."
if device == "gpu":
Expand Down
8 changes: 4 additions & 4 deletions metrics/rouge/rouge.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,10 @@
""" ROUGE metric from Google Research github repo. """

# The dependencies in https://github.com/google-research/google-research/blob/master/rouge/requirements.txt
import absl # Here to have a nice missing dependency error message early on
import nltk # Here to have a nice missing dependency error message early on
import numpy # Here to have a nice missing dependency error message early on
import six # Here to have a nice missing dependency error message early on
import absl # noqa: F401 # Here to have a nice missing dependency error message early on
import nltk # noqa: F401 # Here to have a nice missing dependency error message early on
import numpy # noqa: F401 # Here to have a nice missing dependency error message early on
import six # noqa: F401 # Here to have a nice missing dependency error message early on
from rouge_score import rouge_scorer, scoring

import datasets
Expand Down
2 changes: 0 additions & 2 deletions metrics/sari/sari.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,6 @@ def SARIsent(ssent, csent, rsents):


def normalize(sentence, lowercase: bool = True, tokenizer: str = "13a", return_str: bool = True):

# Normalization is requried for the ASSET dataset (one of the primary
# datasets in sentence simplification) to allow using space
# to split the sentence. Even though Wiki-Auto and TURK datasets,
Expand Down Expand Up @@ -278,7 +277,6 @@ def _info(self):
)

def _compute(self, sources, predictions, references):

if not (len(sources) == len(predictions) == len(references)):
raise ValueError("Sources length must match predictions and references lengths.")
sari_score = 0
Expand Down
2 changes: 1 addition & 1 deletion metrics/super_glue/super_glue.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ def evaluate_multirc(ids_preds, labels):
question_preds, question_labels = zip(*preds_labels)
f1 = f1_score(y_true=question_labels, y_pred=question_preds, average="macro")
f1s.append(f1)
em = int(sum(p == l for p, l in preds_labels) == len(preds_labels))
em = int(sum(pred == label for pred, label in preds_labels) == len(preds_labels))
ems.append(em)
f1_m = float(sum(f1s) / len(f1s))
em = sum(ems) / len(ems)
Expand Down
2 changes: 0 additions & 2 deletions metrics/wiki_split/wiki_split.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,6 @@ def SARIsent(ssent, csent, rsents):


def normalize(sentence, lowercase: bool = True, tokenizer: str = "13a", return_str: bool = True):

# Normalization is requried for the ASSET dataset (one of the primary
# datasets in sentence simplification) to allow using space
# to split the sentence. Even though Wiki-Auto and TURK datasets,
Expand Down Expand Up @@ -284,7 +283,6 @@ def normalize(sentence, lowercase: bool = True, tokenizer: str = "13a", return_s


def compute_sari(sources, predictions, references):

if not (len(sources) == len(predictions) == len(references)):
raise ValueError("Sources length must match predictions and references lengths.")
sari_score = 0
Expand Down
1 change: 0 additions & 1 deletion metrics/xtreme_s/xtreme_s.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,6 @@ def _info(self):
)

def _compute(self, predictions, references, bleu_kwargs=None, wer_kwargs=None):

bleu_kwargs = bleu_kwargs if bleu_kwargs is not None else {}
wer_kwargs = wer_kwargs if wer_kwargs is not None else {}

Expand Down
12 changes: 12 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,3 +1,15 @@
[tool.black]
line-length = 119
target_version = ['py37']

[tool.ruff]
# Ignored rules:
# "E501" -> line length violation
# "F821" -> undefined named in type annotation (e.g. Literal["something"])
ignore = ["E501", "F821"]
select = ["E", "F", "I", "W"]
line-length = 119

[tool.ruff.isort]
lines-after-imports = 2
known-first-party = ["datasets"]
18 changes: 0 additions & 18 deletions setup.cfg
Original file line number Diff line number Diff line change
@@ -1,24 +1,6 @@
[metadata]
license_file = LICENSE

[isort]
ensure_newline_before_comments = True
force_grid_wrap = 0
include_trailing_comma = True
line_length = 119
lines_after_imports = 2
multi_line_output = 3
use_parentheses = True

[flake8]
ignore = E203, E501, W503
max-line-length = 119
exclude =
src/datasets/datasets
src/datasets/metrics
per-file-ignores =
metrics/*:F401

[tool:pytest]
markers =
unit: unit test
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,7 @@
TESTS_REQUIRE.extend(VISION_REQUIRE)
TESTS_REQUIRE.extend(AUDIO_REQUIRE)

QUALITY_REQUIRE = ["black~=22.0", "flake8>=3.8.3", "isort>=5.0.0", "pyyaml>=5.3.1"]
QUALITY_REQUIRE = ["black~=23.1", "ruff>=0.0.241", "pyyaml>=5.3.1"]

DOCS_REQUIRE = [
# Might need to add doc-builder and some specific deps in the future
Expand Down
1 change: 0 additions & 1 deletion src/datasets/commands/dummy_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -394,7 +394,6 @@ def _print_dummy_data_instructions(self, dataset_builder, mock_dl_manager):
try:
generator_splits = dataset_builder._split_generators(mock_dl_manager)
except FileNotFoundError as e:

print(
f"Dataset {self._dataset_name} with config {mock_dl_manager.config} seems to already open files in the method `_split_generators(...)`. You might consider to instead only open files in the method `_generate_examples(...)` instead. If this is not possible the dummy data has to be created with less guidance. Make sure you create the file {e.filename}."
)
Expand Down
4 changes: 1 addition & 3 deletions src/datasets/features/features.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,8 @@
from dataclasses import InitVar, dataclass, field, fields
from functools import reduce, wraps
from operator import mul
from typing import Any, ClassVar, Dict, List, Optional
from typing import Any, ClassVar, Dict, List, Optional, Tuple, Union
from typing import Sequence as Sequence_
from typing import Tuple, Union

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -1763,7 +1762,6 @@ def unsimplify(feature: dict) -> dict:
return feature

def from_yaml_inner(obj: Union[dict, list]) -> Union[dict, list]:

if isinstance(obj, dict):
if not obj:
return {}
Expand Down
2 changes: 0 additions & 2 deletions src/datasets/fingerprint.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,6 @@ def get_temporary_cache_files_directory() -> str:
"""Return a directory that is deleted when session closes."""
global _TEMP_DIR_FOR_TEMP_CACHE_FILES
if _TEMP_DIR_FOR_TEMP_CACHE_FILES is None:

# Avoids a PermissionError on Windows caused by the datasets referencing
# the files from the cache directory on clean-up
def cleanup_func():
Expand Down Expand Up @@ -466,7 +465,6 @@ def fingerprint_transform(
fingerprint_names = fingerprint_names if fingerprint_names is not None else ["new_fingerprint"]

def _fingerprint(func):

if not inplace and not all(name in func.__code__.co_varnames for name in fingerprint_names):
raise ValueError("function {func} is missing parameters {fingerprint_names} in signature")

Expand Down
1 change: 0 additions & 1 deletion src/datasets/formatting/np_formatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@ def _consolidate(self, column):
return column

def _tensorize(self, value):

if isinstance(value, (str, bytes, type(None))):
return value
elif isinstance(value, (np.character, np.ndarray)) and np.issubdtype(value.dtype, np.character):
Expand Down
1 change: 0 additions & 1 deletion src/datasets/io/csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,6 @@ def __init__(
num_proc: Optional[int] = None,
**to_csv_kwargs,
):

if num_proc is not None and num_proc <= 0:
raise ValueError(f"num_proc {num_proc} must be an integer > 0.")

Expand Down
1 change: 0 additions & 1 deletion src/datasets/io/sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,6 @@ def __init__(
num_proc: Optional[int] = None,
**to_sql_kwargs,
):

if num_proc is not None and num_proc <= 0:
raise ValueError(f"num_proc {num_proc} must be an integer > 0.")

Expand Down
1 change: 0 additions & 1 deletion src/datasets/naming.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,6 @@ def filepattern_for_dataset_split(dataset_name, split, data_dir, filetype_suffix


def filenames_for_dataset_split(path, dataset_name, split, filetype_suffix=None, shard_lengths=None):

prefix = filename_prefix_for_split(dataset_name, split)
prefix = os.path.join(path, prefix)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ def analyze(files_or_archives, downloaded_files_or_dirs, split):

if metadata_files:
# add metadata if `metadata_files` are found and `drop_metadata` is None (default) or False
add_metadata = not (self.config.drop_metadata is True)
add_metadata = not self.config.drop_metadata
# if `metadata_files` are found, add labels only if
# `drop_labels` is set up to False explicitly (not-default behavior)
add_labels = self.config.drop_labels is False
Expand Down
1 change: 0 additions & 1 deletion src/datasets/packaged_modules/json/json.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,6 @@ def _cast_table(self, pa_table: pa.Table) -> pa.Table:

def _generate_tables(self, files):
for file_idx, file in enumerate(itertools.chain.from_iterable(files)):

# If the file is one json object and if we need to look at the list of items in one specific field
if self.config.field is not None:
with open(file, encoding="utf-8") as f:
Expand Down
2 changes: 0 additions & 2 deletions src/datasets/utils/file_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,6 @@ def head_hf_s3(


def hf_github_url(path: str, name: str, dataset=True, revision: Optional[str] = None) -> str:

default_revision = "main" if version.parse(__version__).is_devrelease else __version__
revision = revision or default_revision
if dataset:
Expand Down Expand Up @@ -547,7 +546,6 @@ def get_from_cache(
# Prevent parallel downloads of the same file with a lock.
lock_path = cache_path + ".lock"
with FileLock(lock_path):

if resume_download:
incomplete_path = cache_path + ".incomplete"

Expand Down
Loading

0 comments on commit 06ae3f6

Please sign in to comment.