From 47c52886678546987f92f6053fec47187b228d54 Mon Sep 17 00:00:00 2001
From: Erik Bernhardsson <mail@erikbern.com>
Date: Fri, 7 Apr 2023 13:07:31 -0400
Subject: [PATCH] Misc Ruff fixes

---
 ann_benchmarks/algorithms/definitions.py   | 4 ----
 ann_benchmarks/algorithms/diskann.py       | 1 -
 ann_benchmarks/algorithms/dolphinnpy.py    | 1 -
 ann_benchmarks/algorithms/elastiknn.py     | 1 -
 ann_benchmarks/algorithms/faiss.py         | 1 -
 ann_benchmarks/algorithms/faiss_gpu.py     | 1 -
 ann_benchmarks/algorithms/faiss_hnsw.py    | 3 ---
 ann_benchmarks/algorithms/hnswlib.py       | 2 --
 ann_benchmarks/algorithms/luceneknn.py     | 4 ++--
 ann_benchmarks/algorithms/milvus.py        | 1 -
 ann_benchmarks/algorithms/onng_ngt.py      | 7 ++-----
 ann_benchmarks/algorithms/opensearchknn.py | 2 --
 ann_benchmarks/algorithms/panng_ngt.py     | 3 ---
 ann_benchmarks/algorithms/pynndescent.py   | 2 +-
 ann_benchmarks/algorithms/qg_ngt.py        | 5 +----
 ann_benchmarks/algorithms/qsg_ngt.py       | 5 +----
 ann_benchmarks/algorithms/scann.py         | 1 -
 ann_benchmarks/algorithms/vearch.py        | 2 --
 ann_benchmarks/algorithms/vespa.py         | 1 -
 ann_benchmarks/datasets.py                 | 6 ++----
 ann_benchmarks/distance.py                 | 1 -
 ann_benchmarks/main.py                     | 1 -
 ann_benchmarks/plotting/plot_variants.py   | 1 -
 ann_benchmarks/runner.py                   | 5 ++---
 24 files changed, 11 insertions(+), 50 deletions(-)

diff --git a/ann_benchmarks/algorithms/definitions.py b/ann_benchmarks/algorithms/definitions.py
index 901890fa2..cabd1446c 100644
--- a/ann_benchmarks/algorithms/definitions.py
+++ b/ann_benchmarks/algorithms/definitions.py
@@ -1,10 +1,6 @@
 from __future__ import absolute_import
-from os import sep as pathsep
 import collections
 import importlib
-import os
-import sys
-import traceback
 import yaml
 from enum import Enum
 from itertools import product
diff --git a/ann_benchmarks/algorithms/diskann.py b/ann_benchmarks/algorithms/diskann.py
index 083c2e23b..7502141c9 100644
--- a/ann_benchmarks/algorithms/diskann.py
+++ b/ann_benchmarks/algorithms/diskann.py
@@ -1,4 +1,3 @@
-import sys
 import os
 import vamanapy as vp
 import numpy as np
diff --git a/ann_benchmarks/algorithms/dolphinnpy.py b/ann_benchmarks/algorithms/dolphinnpy.py
index 3d7dc24e5..34e7192cc 100644
--- a/ann_benchmarks/algorithms/dolphinnpy.py
+++ b/ann_benchmarks/algorithms/dolphinnpy.py
@@ -2,7 +2,6 @@
 import sys
 sys.path.append("install/lib-dolphinnpy")  # noqa
 import numpy
-import ctypes
 from dolphinn import Dolphinn
 from utils import findmean, isotropize
 from ann_benchmarks.algorithms.base import BaseANN
diff --git a/ann_benchmarks/algorithms/elastiknn.py b/ann_benchmarks/algorithms/elastiknn.py
index e3dd00bf2..8add0d8f2 100644
--- a/ann_benchmarks/algorithms/elastiknn.py
+++ b/ann_benchmarks/algorithms/elastiknn.py
@@ -4,7 +4,6 @@
 To install a local copy of the client, run `pip install --upgrade -e /path/to/elastiknn/client-python/`
 To monitor the Elasticsearch JVM using Visualvm, add `ports={ "8097": 8097 }` to the `containers.run` call in runner.py.
 """
-from sys import stderr
 from urllib.error import URLError
 
 import numpy as np
diff --git a/ann_benchmarks/algorithms/faiss.py b/ann_benchmarks/algorithms/faiss.py
index e0a528e6c..9d6244400 100644
--- a/ann_benchmarks/algorithms/faiss.py
+++ b/ann_benchmarks/algorithms/faiss.py
@@ -3,7 +3,6 @@
 sys.path.append("install/lib-faiss")  # noqa
 import numpy
 import sklearn.preprocessing
-import ctypes
 import faiss
 from ann_benchmarks.algorithms.base import BaseANN
 
diff --git a/ann_benchmarks/algorithms/faiss_gpu.py b/ann_benchmarks/algorithms/faiss_gpu.py
index c841936c6..b30423abc 100644
--- a/ann_benchmarks/algorithms/faiss_gpu.py
+++ b/ann_benchmarks/algorithms/faiss_gpu.py
@@ -3,7 +3,6 @@
 # Assumes local installation of FAISS
 sys.path.append("faiss")  # noqa
 import numpy
-import ctypes
 import faiss
 from ann_benchmarks.algorithms.base import BaseANN
 
diff --git a/ann_benchmarks/algorithms/faiss_hnsw.py b/ann_benchmarks/algorithms/faiss_hnsw.py
index f877df0a2..38414dfc2 100644
--- a/ann_benchmarks/algorithms/faiss_hnsw.py
+++ b/ann_benchmarks/algorithms/faiss_hnsw.py
@@ -1,9 +1,6 @@
 from __future__ import absolute_import
-import os
 import faiss
 import numpy as np
-from ann_benchmarks.constants import INDEX_DIR
-from ann_benchmarks.algorithms.base import BaseANN
 from ann_benchmarks.algorithms.faiss import Faiss
 
 
diff --git a/ann_benchmarks/algorithms/hnswlib.py b/ann_benchmarks/algorithms/hnswlib.py
index f3ea8e32d..8526eaef4 100644
--- a/ann_benchmarks/algorithms/hnswlib.py
+++ b/ann_benchmarks/algorithms/hnswlib.py
@@ -1,8 +1,6 @@
 from __future__ import absolute_import
-import os
 import hnswlib
 import numpy as np
-from ann_benchmarks.constants import INDEX_DIR
 from ann_benchmarks.algorithms.base import BaseANN
 
 
diff --git a/ann_benchmarks/algorithms/luceneknn.py b/ann_benchmarks/algorithms/luceneknn.py
index 02cafa15e..6941d636b 100644
--- a/ann_benchmarks/algorithms/luceneknn.py
+++ b/ann_benchmarks/algorithms/luceneknn.py
@@ -12,7 +12,7 @@
 from org.apache.lucene.search import KnnVectorQuery, IndexSearcher
 from org.apache.lucene.index import IndexWriter, IndexWriterConfig, VectorSimilarityFunction, DirectoryReader
 from org.apache.lucene.codecs.lucene94 import Lucene94HnswVectorsFormat
-from org.apache.lucene.document import Document, FieldType, KnnVectorField, StoredField
+from org.apache.lucene.document import Document, KnnVectorField, StoredField
 from org.apache.pylucene.codecs import PyLucene94Codec
 from ann_benchmarks.algorithms.base import BaseANN
 
@@ -39,7 +39,7 @@ def __init__(self, metric: str, dimension: int, param):
         try:
             lucene.initVM(vmargs=['-Djava.awt.headless=true -Xmx6g -Xms6g'])
         except ValueError:
-            print(f'VM already initialized')
+            print('VM already initialized')
         self.metric = metric
         self.dimension = dimension
         self.param = param
diff --git a/ann_benchmarks/algorithms/milvus.py b/ann_benchmarks/algorithms/milvus.py
index bbb67b1ee..55440ae85 100644
--- a/ann_benchmarks/algorithms/milvus.py
+++ b/ann_benchmarks/algorithms/milvus.py
@@ -1,7 +1,6 @@
 from __future__ import absolute_import
 import numpy
 import pyknowhere
-import sklearn.preprocessing
 from ann_benchmarks.algorithms.base import BaseANN
 
 
diff --git a/ann_benchmarks/algorithms/onng_ngt.py b/ann_benchmarks/algorithms/onng_ngt.py
index 3dfced3d4..826e22e28 100644
--- a/ann_benchmarks/algorithms/onng_ngt.py
+++ b/ann_benchmarks/algorithms/onng_ngt.py
@@ -1,12 +1,9 @@
 from __future__ import absolute_import
-import sys
 import os
 import ngtpy
-import numpy as np
 import subprocess
 import time
 from ann_benchmarks.algorithms.base import BaseANN
-from ann_benchmarks.constants import INDEX_DIR
 
 
 class ONNG(BaseANN):
@@ -18,8 +15,8 @@ def __init__(self, metric, object_type, epsilon, param):
         self._metric = metrics[metric]
         self._object_type = object_type
         self._edge_size_for_search = int(param['search_edge']) if 'search_edge' in param.keys() else 0
-        self._tree_disabled = (param['tree'] == False) if 'tree' in param.keys() else False
-        self._refine_enabled = (param['refine'] == True) if 'refine' in param.keys() else False
+        self._tree_disabled = (param['tree'] is False) if 'tree' in param.keys() else False
+        self._refine_enabled = (param['refine'] is True) if 'refine' in param.keys() else False
         self._build_time_limit = 4
         self._epsilon = epsilon
         print('ONNG: edge_size=' + str(self._edge_size))
diff --git a/ann_benchmarks/algorithms/opensearchknn.py b/ann_benchmarks/algorithms/opensearchknn.py
index 70fd239fd..f1e8ebf6b 100644
--- a/ann_benchmarks/algorithms/opensearchknn.py
+++ b/ann_benchmarks/algorithms/opensearchknn.py
@@ -1,6 +1,4 @@
 import logging
-from time import sleep
-from urllib.error import URLError
 from urllib.request import Request, urlopen
 
 from elasticsearch import Elasticsearch
diff --git a/ann_benchmarks/algorithms/panng_ngt.py b/ann_benchmarks/algorithms/panng_ngt.py
index 84c32996e..e3f7bdadb 100644
--- a/ann_benchmarks/algorithms/panng_ngt.py
+++ b/ann_benchmarks/algorithms/panng_ngt.py
@@ -1,12 +1,9 @@
 from __future__ import absolute_import
-import sys
 import os
 import ngtpy
-import numpy as np
 import subprocess
 import time
 from ann_benchmarks.algorithms.base import BaseANN
-from ann_benchmarks.constants import INDEX_DIR
 
 
 class PANNG(BaseANN):
diff --git a/ann_benchmarks/algorithms/pynndescent.py b/ann_benchmarks/algorithms/pynndescent.py
index a747aa354..92f470538 100644
--- a/ann_benchmarks/algorithms/pynndescent.py
+++ b/ann_benchmarks/algorithms/pynndescent.py
@@ -27,7 +27,7 @@ def __init__(self, metric, index_param_dict, n_search_trees=1):
         if "leaf_size" in index_param_dict:
             self._leaf_size = int(index_param_dict["leaf_size"])
         else:
-            leaf_size = 32
+            pass
 
         self._n_search_trees = int(n_search_trees)
 
diff --git a/ann_benchmarks/algorithms/qg_ngt.py b/ann_benchmarks/algorithms/qg_ngt.py
index 85726ec8e..b097d5793 100644
--- a/ann_benchmarks/algorithms/qg_ngt.py
+++ b/ann_benchmarks/algorithms/qg_ngt.py
@@ -1,12 +1,9 @@
 from __future__ import absolute_import
-import sys
 import os
 import ngtpy
-import numpy as np
 import subprocess
 import time
 from ann_benchmarks.algorithms.base import BaseANN
-from ann_benchmarks.constants import INDEX_DIR
 
 class QG(BaseANN):
     def __init__(self, metric, object_type, epsilon, param):
@@ -18,7 +15,7 @@ def __init__(self, metric, object_type, epsilon, param):
         self._metric = metrics[metric]
         self._object_type = object_type
         self._edge_size_for_search = int(param['search_edge']) if 'search_edge' in param.keys() else -2
-        self._tree_disabled = (param['tree'] == False) if 'tree' in param.keys() else False
+        self._tree_disabled = (param['tree'] is False) if 'tree' in param.keys() else False
         self._build_time_limit = 4
         self._epsilon = epsilon
         print('QG: edge_size=' + str(self._edge_size))
diff --git a/ann_benchmarks/algorithms/qsg_ngt.py b/ann_benchmarks/algorithms/qsg_ngt.py
index 55e0a68a7..8ed27f1a9 100644
--- a/ann_benchmarks/algorithms/qsg_ngt.py
+++ b/ann_benchmarks/algorithms/qsg_ngt.py
@@ -1,14 +1,11 @@
 from __future__ import absolute_import
-import sys
 import os
 import ngtpy
-import numpy as np
 import subprocess
 import struct
 from sklearn import preprocessing
 import time
 from ann_benchmarks.algorithms.base import BaseANN
-from ann_benchmarks.constants import INDEX_DIR
 
 
 class QSG(BaseANN):
@@ -21,7 +18,7 @@ def __init__(self, metric, object_type, epsilon, param):
         self._metric = metrics[metric]
         self._object_type = object_type
         self._edge_size_for_search = int(param['search_edge']) if 'search_edge' in param.keys() else -2
-        self._tree_disabled = (param['tree'] == False) if 'tree' in param.keys() else False
+        self._tree_disabled = (param['tree'] is False) if 'tree' in param.keys() else False
         self._build_time_limit = 4
         self._epsilon = epsilon
         self._paramE = param['paramE']
diff --git a/ann_benchmarks/algorithms/scann.py b/ann_benchmarks/algorithms/scann.py
index bfb2871c6..d2ceb0db2 100644
--- a/ann_benchmarks/algorithms/scann.py
+++ b/ann_benchmarks/algorithms/scann.py
@@ -1,5 +1,4 @@
 from __future__ import absolute_import
-import os
 import numpy as np
 import scann
 from ann_benchmarks.algorithms.base import BaseANN
diff --git a/ann_benchmarks/algorithms/vearch.py b/ann_benchmarks/algorithms/vearch.py
index 3104f3cdf..bf16dbe5f 100644
--- a/ann_benchmarks/algorithms/vearch.py
+++ b/ann_benchmarks/algorithms/vearch.py
@@ -1,6 +1,4 @@
 from __future__ import absolute_import
-import sys
-import os
 import time
 import numpy as np
 import vearch
diff --git a/ann_benchmarks/algorithms/vespa.py b/ann_benchmarks/algorithms/vespa.py
index 2d448bbf3..cf5ba3af7 100644
--- a/ann_benchmarks/algorithms/vespa.py
+++ b/ann_benchmarks/algorithms/vespa.py
@@ -1,6 +1,5 @@
 from ann_benchmarks.algorithms.base import BaseANN
 from vespa_ann_benchmark import DistanceMetric, HnswIndexParams, HnswIndex
-import time
 
 # Class using the Vespa implementation of an HNSW index for nearest neighbor
 # search over data points in a high dimensional vector space.
diff --git a/ann_benchmarks/datasets.py b/ann_benchmarks/datasets.py
index a4582a6e3..d2aa566cc 100644
--- a/ann_benchmarks/datasets.py
+++ b/ann_benchmarks/datasets.py
@@ -6,7 +6,6 @@
 from urllib.request import urlopen
 from urllib.request import urlretrieve
 
-from ann_benchmarks.distance import dataset_transform
 
 
 def download(src, dst):
@@ -48,7 +47,6 @@ def get_dataset(which):
 
 def write_output(train, test, fn, distance, point_type='float', count=100):
     from ann_benchmarks.algorithms.bruteforce import BruteForceBLAS
-    n = 0
     f = h5py.File(fn, 'w')
     f.attrs['type'] = 'dense'
     f.attrs['distance'] = distance
@@ -115,7 +113,7 @@ def write_sparse_output(train, test, fn, distance, dimension, count=100):
 
 def train_test_split(X, test_size=10000, dimension=None):
     import sklearn.model_selection
-    if dimension == None:
+    if dimension is None:
         dimension = X.shape[1]
     print('Splitting %d*%d into train/test' % (X.shape[0], dimension))
     return sklearn.model_selection.train_test_split(
@@ -451,7 +449,7 @@ def movielens(fn, ratings_file, out_fn, separator='::', ignore_header=False):
             if rating < 3: # We only keep ratings >= 3
                 continue
 
-            if not userId in users:
+            if userId not in users:
                 users[userId] = len(users)
                 X.append([])
 
diff --git a/ann_benchmarks/distance.py b/ann_benchmarks/distance.py
index fddf78e15..d649a2769 100644
--- a/ann_benchmarks/distance.py
+++ b/ann_benchmarks/distance.py
@@ -1,5 +1,4 @@
 from __future__ import absolute_import
-import itertools
 import numpy as np
 
 # Need own implementation of jaccard because scipy's
diff --git a/ann_benchmarks/main.py b/ann_benchmarks/main.py
index bcd75a6e2..45889143f 100644
--- a/ann_benchmarks/main.py
+++ b/ann_benchmarks/main.py
@@ -10,7 +10,6 @@
 import random
 import shutil
 import sys
-import traceback
 
 from ann_benchmarks.datasets import get_dataset, DATASETS
 from ann_benchmarks.constants import INDEX_DIR
diff --git a/ann_benchmarks/plotting/plot_variants.py b/ann_benchmarks/plotting/plot_variants.py
index a30d06dfd..e8777ee47 100644
--- a/ann_benchmarks/plotting/plot_variants.py
+++ b/ann_benchmarks/plotting/plot_variants.py
@@ -1,4 +1,3 @@
-from ann_benchmarks.plotting.metrics import all_metrics as metrics
 
 all_plot_variants = {
     "recall/time": ("k-nn", "qps"),
diff --git a/ann_benchmarks/runner.py b/ann_benchmarks/runner.py
index d2e896d44..8aaddc5cd 100644
--- a/ann_benchmarks/runner.py
+++ b/ann_benchmarks/runner.py
@@ -113,9 +113,8 @@ def run(definition, dataset, count, run_count, batch):
     X_train, X_test = dataset_transform(D)
 
     try:
-        prepared_queries = False
         if hasattr(algo, "supports_prepared_queries"):
-            prepared_queries = algo.supports_prepared_queries()
+            algo.supports_prepared_queries()
 
         t0 = time.time()
         memory_usage_before = algo.get_memory_usage()
@@ -157,7 +156,7 @@ def run_from_cmdline():
     parser.add_argument(
         '--dataset',
         choices=DATASETS.keys(),
-        help=f'Dataset to benchmark on.',
+        help='Dataset to benchmark on.',
         required=True)
     parser.add_argument(
         '--algorithm',