Misc Ruff fixes

erikbern · Apr 7, 2023 · 47c5288 · 47c5288
1 parent 28cdde8
commit 47c5288
Show file tree

Hide file tree

Showing 24 changed files with 11 additions and 50 deletions.
diff --git a/ann_benchmarks/algorithms/definitions.py b/ann_benchmarks/algorithms/definitions.py
@@ -1,10 +1,6 @@
 from __future__ import absolute_import
-from os import sep as pathsep
 import collections
 import importlib
-import os
-import sys
-import traceback
 import yaml
 from enum import Enum
 from itertools import product

diff --git a/ann_benchmarks/algorithms/diskann.py b/ann_benchmarks/algorithms/diskann.py
@@ -1,4 +1,3 @@
-import sys
 import os
 import vamanapy as vp
 import numpy as np

diff --git a/ann_benchmarks/algorithms/dolphinnpy.py b/ann_benchmarks/algorithms/dolphinnpy.py
@@ -2,7 +2,6 @@
 import sys
 sys.path.append("install/lib-dolphinnpy")  # noqa
 import numpy
-import ctypes
 from dolphinn import Dolphinn
 from utils import findmean, isotropize
 from ann_benchmarks.algorithms.base import BaseANN

diff --git a/ann_benchmarks/algorithms/elastiknn.py b/ann_benchmarks/algorithms/elastiknn.py
@@ -4,7 +4,6 @@
 To install a local copy of the client, run `pip install --upgrade -e /path/to/elastiknn/client-python/`
 To monitor the Elasticsearch JVM using Visualvm, add `ports={ "8097": 8097 }` to the `containers.run` call in runner.py.
 """
-from sys import stderr
 from urllib.error import URLError
 
 import numpy as np

diff --git a/ann_benchmarks/algorithms/faiss.py b/ann_benchmarks/algorithms/faiss.py
@@ -3,7 +3,6 @@
 sys.path.append("install/lib-faiss")  # noqa
 import numpy
 import sklearn.preprocessing
-import ctypes
 import faiss
 from ann_benchmarks.algorithms.base import BaseANN
 

diff --git a/ann_benchmarks/algorithms/faiss_gpu.py b/ann_benchmarks/algorithms/faiss_gpu.py
@@ -3,7 +3,6 @@
 # Assumes local installation of FAISS
 sys.path.append("faiss")  # noqa
 import numpy
-import ctypes
 import faiss
 from ann_benchmarks.algorithms.base import BaseANN
 

diff --git a/ann_benchmarks/algorithms/faiss_hnsw.py b/ann_benchmarks/algorithms/faiss_hnsw.py
@@ -1,9 +1,6 @@
 from __future__ import absolute_import
-import os
 import faiss
 import numpy as np
-from ann_benchmarks.constants import INDEX_DIR
-from ann_benchmarks.algorithms.base import BaseANN
 from ann_benchmarks.algorithms.faiss import Faiss
 
 

diff --git a/ann_benchmarks/algorithms/hnswlib.py b/ann_benchmarks/algorithms/hnswlib.py
@@ -1,8 +1,6 @@
 from __future__ import absolute_import
-import os
 import hnswlib
 import numpy as np
-from ann_benchmarks.constants import INDEX_DIR
 from ann_benchmarks.algorithms.base import BaseANN
 
 

diff --git a/ann_benchmarks/algorithms/luceneknn.py b/ann_benchmarks/algorithms/luceneknn.py
@@ -12,7 +12,7 @@
 from org.apache.lucene.search import KnnVectorQuery, IndexSearcher
 from org.apache.lucene.index import IndexWriter, IndexWriterConfig, VectorSimilarityFunction, DirectoryReader
 from org.apache.lucene.codecs.lucene94 import Lucene94HnswVectorsFormat
-from org.apache.lucene.document import Document, FieldType, KnnVectorField, StoredField
+from org.apache.lucene.document import Document, KnnVectorField, StoredField
 from org.apache.pylucene.codecs import PyLucene94Codec
 from ann_benchmarks.algorithms.base import BaseANN
 
@@ -39,7 +39,7 @@ def __init__(self, metric: str, dimension: int, param):
         try:
             lucene.initVM(vmargs=['-Djava.awt.headless=true -Xmx6g -Xms6g'])
         except ValueError:
-            print(f'VM already initialized')
+            print('VM already initialized')
         self.metric = metric
         self.dimension = dimension
         self.param = param

diff --git a/ann_benchmarks/algorithms/milvus.py b/ann_benchmarks/algorithms/milvus.py
@@ -1,7 +1,6 @@
 from __future__ import absolute_import
 import numpy
 import pyknowhere
-import sklearn.preprocessing
 from ann_benchmarks.algorithms.base import BaseANN
 
 

diff --git a/ann_benchmarks/algorithms/onng_ngt.py b/ann_benchmarks/algorithms/onng_ngt.py
@@ -1,12 +1,9 @@
 from __future__ import absolute_import
-import sys
 import os
 import ngtpy
-import numpy as np
 import subprocess
 import time
 from ann_benchmarks.algorithms.base import BaseANN
-from ann_benchmarks.constants import INDEX_DIR
 
 
 class ONNG(BaseANN):
@@ -18,8 +15,8 @@ def __init__(self, metric, object_type, epsilon, param):
         self._metric = metrics[metric]
         self._object_type = object_type
         self._edge_size_for_search = int(param['search_edge']) if 'search_edge' in param.keys() else 0
-        self._tree_disabled = (param['tree'] == False) if 'tree' in param.keys() else False
-        self._refine_enabled = (param['refine'] == True) if 'refine' in param.keys() else False
+        self._tree_disabled = (param['tree'] is False) if 'tree' in param.keys() else False
+        self._refine_enabled = (param['refine'] is True) if 'refine' in param.keys() else False
         self._build_time_limit = 4
         self._epsilon = epsilon
         print('ONNG: edge_size=' + str(self._edge_size))

diff --git a/ann_benchmarks/algorithms/opensearchknn.py b/ann_benchmarks/algorithms/opensearchknn.py
@@ -1,6 +1,4 @@
 import logging
-from time import sleep
-from urllib.error import URLError
 from urllib.request import Request, urlopen
 
 from elasticsearch import Elasticsearch

diff --git a/ann_benchmarks/algorithms/panng_ngt.py b/ann_benchmarks/algorithms/panng_ngt.py
@@ -1,12 +1,9 @@
 from __future__ import absolute_import
-import sys
 import os
 import ngtpy
-import numpy as np
 import subprocess
 import time
 from ann_benchmarks.algorithms.base import BaseANN
-from ann_benchmarks.constants import INDEX_DIR
 
 
 class PANNG(BaseANN):

diff --git a/ann_benchmarks/algorithms/pynndescent.py b/ann_benchmarks/algorithms/pynndescent.py
@@ -27,7 +27,7 @@ def __init__(self, metric, index_param_dict, n_search_trees=1):
         if "leaf_size" in index_param_dict:
             self._leaf_size = int(index_param_dict["leaf_size"])
         else:
-            leaf_size = 32
+            pass
 
         self._n_search_trees = int(n_search_trees)
 

diff --git a/ann_benchmarks/algorithms/qg_ngt.py b/ann_benchmarks/algorithms/qg_ngt.py
@@ -1,12 +1,9 @@
 from __future__ import absolute_import
-import sys
 import os
 import ngtpy
-import numpy as np
 import subprocess
 import time
 from ann_benchmarks.algorithms.base import BaseANN
-from ann_benchmarks.constants import INDEX_DIR
 
 class QG(BaseANN):
     def __init__(self, metric, object_type, epsilon, param):
@@ -18,7 +15,7 @@ def __init__(self, metric, object_type, epsilon, param):
         self._metric = metrics[metric]
         self._object_type = object_type
         self._edge_size_for_search = int(param['search_edge']) if 'search_edge' in param.keys() else -2
-        self._tree_disabled = (param['tree'] == False) if 'tree' in param.keys() else False
+        self._tree_disabled = (param['tree'] is False) if 'tree' in param.keys() else False
         self._build_time_limit = 4
         self._epsilon = epsilon
         print('QG: edge_size=' + str(self._edge_size))

diff --git a/ann_benchmarks/algorithms/qsg_ngt.py b/ann_benchmarks/algorithms/qsg_ngt.py
@@ -1,14 +1,11 @@
 from __future__ import absolute_import
-import sys
 import os
 import ngtpy
-import numpy as np
 import subprocess
 import struct
 from sklearn import preprocessing
 import time
 from ann_benchmarks.algorithms.base import BaseANN
-from ann_benchmarks.constants import INDEX_DIR
 
 
 class QSG(BaseANN):
@@ -21,7 +18,7 @@ def __init__(self, metric, object_type, epsilon, param):
         self._metric = metrics[metric]
         self._object_type = object_type
         self._edge_size_for_search = int(param['search_edge']) if 'search_edge' in param.keys() else -2
-        self._tree_disabled = (param['tree'] == False) if 'tree' in param.keys() else False
+        self._tree_disabled = (param['tree'] is False) if 'tree' in param.keys() else False
         self._build_time_limit = 4
         self._epsilon = epsilon
         self._paramE = param['paramE']

diff --git a/ann_benchmarks/algorithms/scann.py b/ann_benchmarks/algorithms/scann.py
@@ -1,5 +1,4 @@
 from __future__ import absolute_import
-import os
 import numpy as np
 import scann
 from ann_benchmarks.algorithms.base import BaseANN

diff --git a/ann_benchmarks/algorithms/vearch.py b/ann_benchmarks/algorithms/vearch.py
@@ -1,6 +1,4 @@
 from __future__ import absolute_import
-import sys
-import os
 import time
 import numpy as np
 import vearch

diff --git a/ann_benchmarks/algorithms/vespa.py b/ann_benchmarks/algorithms/vespa.py
@@ -1,6 +1,5 @@
 from ann_benchmarks.algorithms.base import BaseANN
 from vespa_ann_benchmark import DistanceMetric, HnswIndexParams, HnswIndex
-import time
 
 # Class using the Vespa implementation of an HNSW index for nearest neighbor
 # search over data points in a high dimensional vector space.

diff --git a/ann_benchmarks/datasets.py b/ann_benchmarks/datasets.py
@@ -6,7 +6,6 @@
 from urllib.request import urlopen
 from urllib.request import urlretrieve
 
-from ann_benchmarks.distance import dataset_transform
 
 
 def download(src, dst):
@@ -48,7 +47,6 @@ def get_dataset(which):
 
 def write_output(train, test, fn, distance, point_type='float', count=100):
     from ann_benchmarks.algorithms.bruteforce import BruteForceBLAS
-    n = 0
     f = h5py.File(fn, 'w')
     f.attrs['type'] = 'dense'
     f.attrs['distance'] = distance
@@ -115,7 +113,7 @@ def write_sparse_output(train, test, fn, distance, dimension, count=100):
 
 def train_test_split(X, test_size=10000, dimension=None):
     import sklearn.model_selection
-    if dimension == None:
+    if dimension is None:
         dimension = X.shape[1]
     print('Splitting %d*%d into train/test' % (X.shape[0], dimension))
     return sklearn.model_selection.train_test_split(
@@ -451,7 +449,7 @@ def movielens(fn, ratings_file, out_fn, separator='::', ignore_header=False):
             if rating < 3: # We only keep ratings >= 3
                 continue
 
-            if not userId in users:
+            if userId not in users:
                 users[userId] = len(users)
                 X.append([])
 

diff --git a/ann_benchmarks/distance.py b/ann_benchmarks/distance.py
@@ -1,5 +1,4 @@
 from __future__ import absolute_import
-import itertools
 import numpy as np
 
 # Need own implementation of jaccard because scipy's

diff --git a/ann_benchmarks/main.py b/ann_benchmarks/main.py
@@ -10,7 +10,6 @@
 import random
 import shutil
 import sys
-import traceback
 
 from ann_benchmarks.datasets import get_dataset, DATASETS
 from ann_benchmarks.constants import INDEX_DIR

diff --git a/ann_benchmarks/plotting/plot_variants.py b/ann_benchmarks/plotting/plot_variants.py
@@ -1,4 +1,3 @@
-from ann_benchmarks.plotting.metrics import all_metrics as metrics
 
 all_plot_variants = {
     "recall/time": ("k-nn", "qps"),

diff --git a/ann_benchmarks/runner.py b/ann_benchmarks/runner.py
@@ -113,9 +113,8 @@ def run(definition, dataset, count, run_count, batch):
     X_train, X_test = dataset_transform(D)
 
     try:
-        prepared_queries = False
         if hasattr(algo, "supports_prepared_queries"):
-            prepared_queries = algo.supports_prepared_queries()
+            algo.supports_prepared_queries()
 
         t0 = time.time()
         memory_usage_before = algo.get_memory_usage()
@@ -157,7 +156,7 @@ def run_from_cmdline():
     parser.add_argument(
         '--dataset',
         choices=DATASETS.keys(),
-        help=f'Dataset to benchmark on.',
+        help='Dataset to benchmark on.',
         required=True)
     parser.add_argument(
         '--algorithm',