From 47c52886678546987f92f6053fec47187b228d54 Mon Sep 17 00:00:00 2001 From: Erik Bernhardsson Date: Fri, 7 Apr 2023 13:07:31 -0400 Subject: [PATCH] Misc Ruff fixes --- ann_benchmarks/algorithms/definitions.py | 4 ---- ann_benchmarks/algorithms/diskann.py | 1 - ann_benchmarks/algorithms/dolphinnpy.py | 1 - ann_benchmarks/algorithms/elastiknn.py | 1 - ann_benchmarks/algorithms/faiss.py | 1 - ann_benchmarks/algorithms/faiss_gpu.py | 1 - ann_benchmarks/algorithms/faiss_hnsw.py | 3 --- ann_benchmarks/algorithms/hnswlib.py | 2 -- ann_benchmarks/algorithms/luceneknn.py | 4 ++-- ann_benchmarks/algorithms/milvus.py | 1 - ann_benchmarks/algorithms/onng_ngt.py | 7 ++----- ann_benchmarks/algorithms/opensearchknn.py | 2 -- ann_benchmarks/algorithms/panng_ngt.py | 3 --- ann_benchmarks/algorithms/pynndescent.py | 2 +- ann_benchmarks/algorithms/qg_ngt.py | 5 +---- ann_benchmarks/algorithms/qsg_ngt.py | 5 +---- ann_benchmarks/algorithms/scann.py | 1 - ann_benchmarks/algorithms/vearch.py | 2 -- ann_benchmarks/algorithms/vespa.py | 1 - ann_benchmarks/datasets.py | 6 ++---- ann_benchmarks/distance.py | 1 - ann_benchmarks/main.py | 1 - ann_benchmarks/plotting/plot_variants.py | 1 - ann_benchmarks/runner.py | 5 ++--- 24 files changed, 11 insertions(+), 50 deletions(-) diff --git a/ann_benchmarks/algorithms/definitions.py b/ann_benchmarks/algorithms/definitions.py index 901890fa2..cabd1446c 100644 --- a/ann_benchmarks/algorithms/definitions.py +++ b/ann_benchmarks/algorithms/definitions.py @@ -1,10 +1,6 @@ from __future__ import absolute_import -from os import sep as pathsep import collections import importlib -import os -import sys -import traceback import yaml from enum import Enum from itertools import product diff --git a/ann_benchmarks/algorithms/diskann.py b/ann_benchmarks/algorithms/diskann.py index 083c2e23b..7502141c9 100644 --- a/ann_benchmarks/algorithms/diskann.py +++ b/ann_benchmarks/algorithms/diskann.py @@ -1,4 +1,3 @@ -import sys import os import vamanapy as vp import numpy as np diff --git a/ann_benchmarks/algorithms/dolphinnpy.py b/ann_benchmarks/algorithms/dolphinnpy.py index 3d7dc24e5..34e7192cc 100644 --- a/ann_benchmarks/algorithms/dolphinnpy.py +++ b/ann_benchmarks/algorithms/dolphinnpy.py @@ -2,7 +2,6 @@ import sys sys.path.append("install/lib-dolphinnpy") # noqa import numpy -import ctypes from dolphinn import Dolphinn from utils import findmean, isotropize from ann_benchmarks.algorithms.base import BaseANN diff --git a/ann_benchmarks/algorithms/elastiknn.py b/ann_benchmarks/algorithms/elastiknn.py index e3dd00bf2..8add0d8f2 100644 --- a/ann_benchmarks/algorithms/elastiknn.py +++ b/ann_benchmarks/algorithms/elastiknn.py @@ -4,7 +4,6 @@ To install a local copy of the client, run `pip install --upgrade -e /path/to/elastiknn/client-python/` To monitor the Elasticsearch JVM using Visualvm, add `ports={ "8097": 8097 }` to the `containers.run` call in runner.py. """ -from sys import stderr from urllib.error import URLError import numpy as np diff --git a/ann_benchmarks/algorithms/faiss.py b/ann_benchmarks/algorithms/faiss.py index e0a528e6c..9d6244400 100644 --- a/ann_benchmarks/algorithms/faiss.py +++ b/ann_benchmarks/algorithms/faiss.py @@ -3,7 +3,6 @@ sys.path.append("install/lib-faiss") # noqa import numpy import sklearn.preprocessing -import ctypes import faiss from ann_benchmarks.algorithms.base import BaseANN diff --git a/ann_benchmarks/algorithms/faiss_gpu.py b/ann_benchmarks/algorithms/faiss_gpu.py index c841936c6..b30423abc 100644 --- a/ann_benchmarks/algorithms/faiss_gpu.py +++ b/ann_benchmarks/algorithms/faiss_gpu.py @@ -3,7 +3,6 @@ # Assumes local installation of FAISS sys.path.append("faiss") # noqa import numpy -import ctypes import faiss from ann_benchmarks.algorithms.base import BaseANN diff --git a/ann_benchmarks/algorithms/faiss_hnsw.py b/ann_benchmarks/algorithms/faiss_hnsw.py index f877df0a2..38414dfc2 100644 --- a/ann_benchmarks/algorithms/faiss_hnsw.py +++ b/ann_benchmarks/algorithms/faiss_hnsw.py @@ -1,9 +1,6 @@ from __future__ import absolute_import -import os import faiss import numpy as np -from ann_benchmarks.constants import INDEX_DIR -from ann_benchmarks.algorithms.base import BaseANN from ann_benchmarks.algorithms.faiss import Faiss diff --git a/ann_benchmarks/algorithms/hnswlib.py b/ann_benchmarks/algorithms/hnswlib.py index f3ea8e32d..8526eaef4 100644 --- a/ann_benchmarks/algorithms/hnswlib.py +++ b/ann_benchmarks/algorithms/hnswlib.py @@ -1,8 +1,6 @@ from __future__ import absolute_import -import os import hnswlib import numpy as np -from ann_benchmarks.constants import INDEX_DIR from ann_benchmarks.algorithms.base import BaseANN diff --git a/ann_benchmarks/algorithms/luceneknn.py b/ann_benchmarks/algorithms/luceneknn.py index 02cafa15e..6941d636b 100644 --- a/ann_benchmarks/algorithms/luceneknn.py +++ b/ann_benchmarks/algorithms/luceneknn.py @@ -12,7 +12,7 @@ from org.apache.lucene.search import KnnVectorQuery, IndexSearcher from org.apache.lucene.index import IndexWriter, IndexWriterConfig, VectorSimilarityFunction, DirectoryReader from org.apache.lucene.codecs.lucene94 import Lucene94HnswVectorsFormat -from org.apache.lucene.document import Document, FieldType, KnnVectorField, StoredField +from org.apache.lucene.document import Document, KnnVectorField, StoredField from org.apache.pylucene.codecs import PyLucene94Codec from ann_benchmarks.algorithms.base import BaseANN @@ -39,7 +39,7 @@ def __init__(self, metric: str, dimension: int, param): try: lucene.initVM(vmargs=['-Djava.awt.headless=true -Xmx6g -Xms6g']) except ValueError: - print(f'VM already initialized') + print('VM already initialized') self.metric = metric self.dimension = dimension self.param = param diff --git a/ann_benchmarks/algorithms/milvus.py b/ann_benchmarks/algorithms/milvus.py index bbb67b1ee..55440ae85 100644 --- a/ann_benchmarks/algorithms/milvus.py +++ b/ann_benchmarks/algorithms/milvus.py @@ -1,7 +1,6 @@ from __future__ import absolute_import import numpy import pyknowhere -import sklearn.preprocessing from ann_benchmarks.algorithms.base import BaseANN diff --git a/ann_benchmarks/algorithms/onng_ngt.py b/ann_benchmarks/algorithms/onng_ngt.py index 3dfced3d4..826e22e28 100644 --- a/ann_benchmarks/algorithms/onng_ngt.py +++ b/ann_benchmarks/algorithms/onng_ngt.py @@ -1,12 +1,9 @@ from __future__ import absolute_import -import sys import os import ngtpy -import numpy as np import subprocess import time from ann_benchmarks.algorithms.base import BaseANN -from ann_benchmarks.constants import INDEX_DIR class ONNG(BaseANN): @@ -18,8 +15,8 @@ def __init__(self, metric, object_type, epsilon, param): self._metric = metrics[metric] self._object_type = object_type self._edge_size_for_search = int(param['search_edge']) if 'search_edge' in param.keys() else 0 - self._tree_disabled = (param['tree'] == False) if 'tree' in param.keys() else False - self._refine_enabled = (param['refine'] == True) if 'refine' in param.keys() else False + self._tree_disabled = (param['tree'] is False) if 'tree' in param.keys() else False + self._refine_enabled = (param['refine'] is True) if 'refine' in param.keys() else False self._build_time_limit = 4 self._epsilon = epsilon print('ONNG: edge_size=' + str(self._edge_size)) diff --git a/ann_benchmarks/algorithms/opensearchknn.py b/ann_benchmarks/algorithms/opensearchknn.py index 70fd239fd..f1e8ebf6b 100644 --- a/ann_benchmarks/algorithms/opensearchknn.py +++ b/ann_benchmarks/algorithms/opensearchknn.py @@ -1,6 +1,4 @@ import logging -from time import sleep -from urllib.error import URLError from urllib.request import Request, urlopen from elasticsearch import Elasticsearch diff --git a/ann_benchmarks/algorithms/panng_ngt.py b/ann_benchmarks/algorithms/panng_ngt.py index 84c32996e..e3f7bdadb 100644 --- a/ann_benchmarks/algorithms/panng_ngt.py +++ b/ann_benchmarks/algorithms/panng_ngt.py @@ -1,12 +1,9 @@ from __future__ import absolute_import -import sys import os import ngtpy -import numpy as np import subprocess import time from ann_benchmarks.algorithms.base import BaseANN -from ann_benchmarks.constants import INDEX_DIR class PANNG(BaseANN): diff --git a/ann_benchmarks/algorithms/pynndescent.py b/ann_benchmarks/algorithms/pynndescent.py index a747aa354..92f470538 100644 --- a/ann_benchmarks/algorithms/pynndescent.py +++ b/ann_benchmarks/algorithms/pynndescent.py @@ -27,7 +27,7 @@ def __init__(self, metric, index_param_dict, n_search_trees=1): if "leaf_size" in index_param_dict: self._leaf_size = int(index_param_dict["leaf_size"]) else: - leaf_size = 32 + pass self._n_search_trees = int(n_search_trees) diff --git a/ann_benchmarks/algorithms/qg_ngt.py b/ann_benchmarks/algorithms/qg_ngt.py index 85726ec8e..b097d5793 100644 --- a/ann_benchmarks/algorithms/qg_ngt.py +++ b/ann_benchmarks/algorithms/qg_ngt.py @@ -1,12 +1,9 @@ from __future__ import absolute_import -import sys import os import ngtpy -import numpy as np import subprocess import time from ann_benchmarks.algorithms.base import BaseANN -from ann_benchmarks.constants import INDEX_DIR class QG(BaseANN): def __init__(self, metric, object_type, epsilon, param): @@ -18,7 +15,7 @@ def __init__(self, metric, object_type, epsilon, param): self._metric = metrics[metric] self._object_type = object_type self._edge_size_for_search = int(param['search_edge']) if 'search_edge' in param.keys() else -2 - self._tree_disabled = (param['tree'] == False) if 'tree' in param.keys() else False + self._tree_disabled = (param['tree'] is False) if 'tree' in param.keys() else False self._build_time_limit = 4 self._epsilon = epsilon print('QG: edge_size=' + str(self._edge_size)) diff --git a/ann_benchmarks/algorithms/qsg_ngt.py b/ann_benchmarks/algorithms/qsg_ngt.py index 55e0a68a7..8ed27f1a9 100644 --- a/ann_benchmarks/algorithms/qsg_ngt.py +++ b/ann_benchmarks/algorithms/qsg_ngt.py @@ -1,14 +1,11 @@ from __future__ import absolute_import -import sys import os import ngtpy -import numpy as np import subprocess import struct from sklearn import preprocessing import time from ann_benchmarks.algorithms.base import BaseANN -from ann_benchmarks.constants import INDEX_DIR class QSG(BaseANN): @@ -21,7 +18,7 @@ def __init__(self, metric, object_type, epsilon, param): self._metric = metrics[metric] self._object_type = object_type self._edge_size_for_search = int(param['search_edge']) if 'search_edge' in param.keys() else -2 - self._tree_disabled = (param['tree'] == False) if 'tree' in param.keys() else False + self._tree_disabled = (param['tree'] is False) if 'tree' in param.keys() else False self._build_time_limit = 4 self._epsilon = epsilon self._paramE = param['paramE'] diff --git a/ann_benchmarks/algorithms/scann.py b/ann_benchmarks/algorithms/scann.py index bfb2871c6..d2ceb0db2 100644 --- a/ann_benchmarks/algorithms/scann.py +++ b/ann_benchmarks/algorithms/scann.py @@ -1,5 +1,4 @@ from __future__ import absolute_import -import os import numpy as np import scann from ann_benchmarks.algorithms.base import BaseANN diff --git a/ann_benchmarks/algorithms/vearch.py b/ann_benchmarks/algorithms/vearch.py index 3104f3cdf..bf16dbe5f 100644 --- a/ann_benchmarks/algorithms/vearch.py +++ b/ann_benchmarks/algorithms/vearch.py @@ -1,6 +1,4 @@ from __future__ import absolute_import -import sys -import os import time import numpy as np import vearch diff --git a/ann_benchmarks/algorithms/vespa.py b/ann_benchmarks/algorithms/vespa.py index 2d448bbf3..cf5ba3af7 100644 --- a/ann_benchmarks/algorithms/vespa.py +++ b/ann_benchmarks/algorithms/vespa.py @@ -1,6 +1,5 @@ from ann_benchmarks.algorithms.base import BaseANN from vespa_ann_benchmark import DistanceMetric, HnswIndexParams, HnswIndex -import time # Class using the Vespa implementation of an HNSW index for nearest neighbor # search over data points in a high dimensional vector space. diff --git a/ann_benchmarks/datasets.py b/ann_benchmarks/datasets.py index a4582a6e3..d2aa566cc 100644 --- a/ann_benchmarks/datasets.py +++ b/ann_benchmarks/datasets.py @@ -6,7 +6,6 @@ from urllib.request import urlopen from urllib.request import urlretrieve -from ann_benchmarks.distance import dataset_transform def download(src, dst): @@ -48,7 +47,6 @@ def get_dataset(which): def write_output(train, test, fn, distance, point_type='float', count=100): from ann_benchmarks.algorithms.bruteforce import BruteForceBLAS - n = 0 f = h5py.File(fn, 'w') f.attrs['type'] = 'dense' f.attrs['distance'] = distance @@ -115,7 +113,7 @@ def write_sparse_output(train, test, fn, distance, dimension, count=100): def train_test_split(X, test_size=10000, dimension=None): import sklearn.model_selection - if dimension == None: + if dimension is None: dimension = X.shape[1] print('Splitting %d*%d into train/test' % (X.shape[0], dimension)) return sklearn.model_selection.train_test_split( @@ -451,7 +449,7 @@ def movielens(fn, ratings_file, out_fn, separator='::', ignore_header=False): if rating < 3: # We only keep ratings >= 3 continue - if not userId in users: + if userId not in users: users[userId] = len(users) X.append([]) diff --git a/ann_benchmarks/distance.py b/ann_benchmarks/distance.py index fddf78e15..d649a2769 100644 --- a/ann_benchmarks/distance.py +++ b/ann_benchmarks/distance.py @@ -1,5 +1,4 @@ from __future__ import absolute_import -import itertools import numpy as np # Need own implementation of jaccard because scipy's diff --git a/ann_benchmarks/main.py b/ann_benchmarks/main.py index bcd75a6e2..45889143f 100644 --- a/ann_benchmarks/main.py +++ b/ann_benchmarks/main.py @@ -10,7 +10,6 @@ import random import shutil import sys -import traceback from ann_benchmarks.datasets import get_dataset, DATASETS from ann_benchmarks.constants import INDEX_DIR diff --git a/ann_benchmarks/plotting/plot_variants.py b/ann_benchmarks/plotting/plot_variants.py index a30d06dfd..e8777ee47 100644 --- a/ann_benchmarks/plotting/plot_variants.py +++ b/ann_benchmarks/plotting/plot_variants.py @@ -1,4 +1,3 @@ -from ann_benchmarks.plotting.metrics import all_metrics as metrics all_plot_variants = { "recall/time": ("k-nn", "qps"), diff --git a/ann_benchmarks/runner.py b/ann_benchmarks/runner.py index d2e896d44..8aaddc5cd 100644 --- a/ann_benchmarks/runner.py +++ b/ann_benchmarks/runner.py @@ -113,9 +113,8 @@ def run(definition, dataset, count, run_count, batch): X_train, X_test = dataset_transform(D) try: - prepared_queries = False if hasattr(algo, "supports_prepared_queries"): - prepared_queries = algo.supports_prepared_queries() + algo.supports_prepared_queries() t0 = time.time() memory_usage_before = algo.get_memory_usage() @@ -157,7 +156,7 @@ def run_from_cmdline(): parser.add_argument( '--dataset', choices=DATASETS.keys(), - help=f'Dataset to benchmark on.', + help='Dataset to benchmark on.', required=True) parser.add_argument( '--algorithm',