From 0c67adf859478d2c67d3047c6abfa431d768c623 Mon Sep 17 00:00:00 2001
From: Erik Bernhardsson <mail@erikbern.com>
Date: Fri, 7 Apr 2023 13:08:38 -0400
Subject: [PATCH] Black

---
 ann_benchmarks/__init__.py                 |   1 +
 ann_benchmarks/algorithms/annoy.py         |   3 +-
 ann_benchmarks/algorithms/balltree.py      |  10 +-
 ann_benchmarks/algorithms/base.py          |   4 +-
 ann_benchmarks/algorithms/bruteforce.py    |  64 ++--
 ann_benchmarks/algorithms/ckdtree.py       |   2 +-
 ann_benchmarks/algorithms/datasketch.py    |  15 +-
 ann_benchmarks/algorithms/definitions.py   |  49 ++-
 ann_benchmarks/algorithms/diskann.py       |  92 +++---
 ann_benchmarks/algorithms/dolphinnpy.py    |   3 +-
 ann_benchmarks/algorithms/dummy_algo.py    |   4 +-
 ann_benchmarks/algorithms/elasticsearch.py |  32 +-
 ann_benchmarks/algorithms/elastiknn.py     |  23 +-
 ann_benchmarks/algorithms/faiss.py         |  34 +-
 ann_benchmarks/algorithms/faiss_gpu.py     |   7 +-
 ann_benchmarks/algorithms/faiss_hnsw.py    |   4 +-
 ann_benchmarks/algorithms/flann.py         |  14 +-
 ann_benchmarks/algorithms/hnswlib.py       |  10 +-
 ann_benchmarks/algorithms/kdtree.py        |  10 +-
 ann_benchmarks/algorithms/kgraph.py        |   7 +-
 ann_benchmarks/algorithms/lshf.py          |  16 +-
 ann_benchmarks/algorithms/luceneknn.py     |  28 +-
 ann_benchmarks/algorithms/milvus.py        |   5 +-
 ann_benchmarks/algorithms/mrpt.py          |  25 +-
 ann_benchmarks/algorithms/n2.py            |  12 +-
 ann_benchmarks/algorithms/nearpy.py        |  26 +-
 ann_benchmarks/algorithms/nmslib.py        |  43 ++-
 ann_benchmarks/algorithms/onng_ngt.py      | 118 ++++---
 ann_benchmarks/algorithms/opensearchknn.py |  67 ++--
 ann_benchmarks/algorithms/panng_ngt.py     |  64 ++--
 ann_benchmarks/algorithms/pgvector.py      |   7 +-
 ann_benchmarks/algorithms/puffinn.py       |  35 ++-
 ann_benchmarks/algorithms/pynndescent.py   |  12 +-
 ann_benchmarks/algorithms/qdrant.py        |  87 +++---
 ann_benchmarks/algorithms/qg_ngt.py        | 142 +++++----
 ann_benchmarks/algorithms/qsg_ngt.py       | 245 +++++++++------
 ann_benchmarks/algorithms/rpforest.py      |   2 +-
 ann_benchmarks/algorithms/scann.py         |  50 +--
 ann_benchmarks/algorithms/sptag.py         |  11 +-
 ann_benchmarks/algorithms/subprocess.py    |  88 ++++--
 ann_benchmarks/algorithms/vald.py          | 132 ++++----
 ann_benchmarks/algorithms/vearch.py        |  33 +-
 ann_benchmarks/algorithms/vespa.py         |  18 +-
 ann_benchmarks/constants.py                |   2 +-
 ann_benchmarks/data.py                     |  12 +-
 ann_benchmarks/datasets.py                 | 348 ++++++++++-----------
 ann_benchmarks/distance.py                 |  46 +--
 ann_benchmarks/main.py                     | 176 +++++------
 ann_benchmarks/plotting/metrics.py         | 137 ++++----
 ann_benchmarks/plotting/plot_variants.py   |   1 -
 ann_benchmarks/plotting/utils.py           | 123 ++++----
 ann_benchmarks/results.py                  |  36 +--
 ann_benchmarks/runner.py                   | 202 ++++++------
 create_dataset.py                          |   5 +-
 create_website.py                          | 202 +++++-------
 data_export.py                             |  17 +-
 install.py                                 |  55 ++--
 plot.py                                    | 155 ++++-----
 58 files changed, 1594 insertions(+), 1577 deletions(-)

diff --git a/ann_benchmarks/__init__.py b/ann_benchmarks/__init__.py
index 75db8ab95..c8e118fcb 100644
--- a/ann_benchmarks/__init__.py
+++ b/ann_benchmarks/__init__.py
@@ -1,2 +1,3 @@
 from __future__ import absolute_import
+
 # from ann_benchmarks.main import *
diff --git a/ann_benchmarks/algorithms/annoy.py b/ann_benchmarks/algorithms/annoy.py
index 280ef9003..b5f64c48a 100644
--- a/ann_benchmarks/algorithms/annoy.py
+++ b/ann_benchmarks/algorithms/annoy.py
@@ -22,5 +22,4 @@ def query(self, v, n):
         return self._annoy.get_nns_by_vector(v.tolist(), n, self._search_k)
 
     def __str__(self):
-        return 'Annoy(n_trees=%d, search_k=%d)' % (self._n_trees,
-                                                   self._search_k)
+        return "Annoy(n_trees=%d, search_k=%d)" % (self._n_trees, self._search_k)
diff --git a/ann_benchmarks/algorithms/balltree.py b/ann_benchmarks/algorithms/balltree.py
index 634dc691a..2f612b9ff 100644
--- a/ann_benchmarks/algorithms/balltree.py
+++ b/ann_benchmarks/algorithms/balltree.py
@@ -8,15 +8,15 @@ class BallTree(BaseANN):
     def __init__(self, metric, leaf_size=20):
         self._leaf_size = leaf_size
         self._metric = metric
-        self.name = 'BallTree(leaf_size=%d)' % self._leaf_size
+        self.name = "BallTree(leaf_size=%d)" % self._leaf_size
 
     def fit(self, X):
-        if self._metric == 'angular':
-            X = sklearn.preprocessing.normalize(X, axis=1, norm='l2')
+        if self._metric == "angular":
+            X = sklearn.preprocessing.normalize(X, axis=1, norm="l2")
         self._tree = sklearn.neighbors.BallTree(X, leaf_size=self._leaf_size)
 
     def query(self, v, n):
-        if self._metric == 'angular':
-            v = sklearn.preprocessing.normalize([v], axis=1, norm='l2')[0]
+        if self._metric == "angular":
+            v = sklearn.preprocessing.normalize([v], axis=1, norm="l2")[0]
         dist, ind = self._tree.query([v], k=n)
         return ind[0]
diff --git a/ann_benchmarks/algorithms/base.py b/ann_benchmarks/algorithms/base.py
index a6fae26a2..2fdcc6479 100644
--- a/ann_benchmarks/algorithms/base.py
+++ b/ann_benchmarks/algorithms/base.py
@@ -21,8 +21,8 @@ def query(self, q, n):
 
     def batch_query(self, X, n):
         """Provide all queries at once and let algorithm figure out
-           how to handle it. Default implementation uses a ThreadPool
-           to parallelize query processing."""
+        how to handle it. Default implementation uses a ThreadPool
+        to parallelize query processing."""
         pool = ThreadPool()
         self.res = pool.map(lambda q: self.query(q, n), X)
 
diff --git a/ann_benchmarks/algorithms/bruteforce.py b/ann_benchmarks/algorithms/bruteforce.py
index 6d11d327e..b93a32959 100644
--- a/ann_benchmarks/algorithms/bruteforce.py
+++ b/ann_benchmarks/algorithms/bruteforce.py
@@ -7,26 +7,21 @@
 
 class BruteForce(BaseANN):
     def __init__(self, metric):
-        if metric not in ('angular', 'euclidean', 'hamming'):
-            raise NotImplementedError(
-                "BruteForce doesn't support metric %s" % metric)
+        if metric not in ("angular", "euclidean", "hamming"):
+            raise NotImplementedError("BruteForce doesn't support metric %s" % metric)
         self._metric = metric
-        self.name = 'BruteForce()'
+        self.name = "BruteForce()"
 
     def fit(self, X):
-        metric = {'angular': 'cosine', 'euclidean': 'l2',
-                  'hamming': 'hamming'}[self._metric]
-        self._nbrs = sklearn.neighbors.NearestNeighbors(
-            algorithm='brute', metric=metric)
+        metric = {"angular": "cosine", "euclidean": "l2", "hamming": "hamming"}[self._metric]
+        self._nbrs = sklearn.neighbors.NearestNeighbors(algorithm="brute", metric=metric)
         self._nbrs.fit(X)
 
     def query(self, v, n):
-        return list(self._nbrs.kneighbors(
-            [v], return_distance=False, n_neighbors=n)[0])
+        return list(self._nbrs.kneighbors([v], return_distance=False, n_neighbors=n)[0])
 
     def query_with_distances(self, v, n):
-        (distances, positions) = self._nbrs.kneighbors(
-            [v], return_distance=True, n_neighbors=n)
+        (distances, positions) = self._nbrs.kneighbors([v], return_distance=True, n_neighbors=n)
         return zip(list(positions[0]), list(distances[0]))
 
 
@@ -34,38 +29,37 @@ class BruteForceBLAS(BaseANN):
     """kNN search that uses a linear scan = brute force."""
 
     def __init__(self, metric, precision=numpy.float32):
-        if metric not in ('angular', 'euclidean', 'hamming', 'jaccard'):
-            raise NotImplementedError(
-                "BruteForceBLAS doesn't support metric %s" % metric)
-        elif metric == 'hamming' and precision != numpy.bool_:
+        if metric not in ("angular", "euclidean", "hamming", "jaccard"):
+            raise NotImplementedError("BruteForceBLAS doesn't support metric %s" % metric)
+        elif metric == "hamming" and precision != numpy.bool_:
             raise NotImplementedError(
-                "BruteForceBLAS doesn't support precision"
-                " %s with Hamming distances" % precision)
+                "BruteForceBLAS doesn't support precision" " %s with Hamming distances" % precision
+            )
         self._metric = metric
         self._precision = precision
-        self.name = 'BruteForceBLAS()'
+        self.name = "BruteForceBLAS()"
 
     def fit(self, X):
         """Initialize the search index."""
-        if self._metric == 'angular':
+        if self._metric == "angular":
             # precompute (squared) length of each vector
-            lens = (X ** 2).sum(-1)
+            lens = (X**2).sum(-1)
             # normalize index vectors to unit length
             X /= numpy.sqrt(lens)[..., numpy.newaxis]
             self.index = numpy.ascontiguousarray(X, dtype=self._precision)
-        elif self._metric == 'hamming':
+        elif self._metric == "hamming":
             # Regarding bitvectors as vectors in l_2 is faster for blas
             X = X.astype(numpy.float32)
             # precompute (squared) length of each vector
-            lens = (X ** 2).sum(-1)
+            lens = (X**2).sum(-1)
             self.index = numpy.ascontiguousarray(X, dtype=numpy.float32)
             self.lengths = numpy.ascontiguousarray(lens, dtype=numpy.float32)
-        elif self._metric == 'euclidean':
+        elif self._metric == "euclidean":
             # precompute (squared) length of each vector
-            lens = (X ** 2).sum(-1)
+            lens = (X**2).sum(-1)
             self.index = numpy.ascontiguousarray(X, dtype=self._precision)
             self.lengths = numpy.ascontiguousarray(lens, dtype=self._precision)
-        elif self._metric == 'jaccard':
+        elif self._metric == "jaccard":
             self.index = X
         else:
             # shouldn't get past the constructor!
@@ -78,33 +72,33 @@ def query_with_distances(self, v, n):
         """Find indices of `n` most similar vectors from the index to query
         vector `v`."""
 
-        if self._metric != 'jaccard':
+        if self._metric != "jaccard":
             # use same precision for query as for index
             v = numpy.ascontiguousarray(v, dtype=self.index.dtype)
 
         # HACK we ignore query length as that's a constant
         # not affecting the final ordering
-        if self._metric == 'angular':
+        if self._metric == "angular":
             # argmax_a cossim(a, b) = argmax_a dot(a, b) / |a||b| = argmin_a -dot(a, b)  # noqa
             dists = -numpy.dot(self.index, v)
-        elif self._metric == 'euclidean':
+        elif self._metric == "euclidean":
             # argmin_a (a - b)^2 = argmin_a a^2 - 2ab + b^2 = argmin_a a^2 - 2ab  # noqa
             dists = self.lengths - 2 * numpy.dot(self.index, v)
-        elif self._metric == 'hamming':
+        elif self._metric == "hamming":
             # Just compute hamming distance using euclidean distance
             dists = self.lengths - 2 * numpy.dot(self.index, v)
-        elif self._metric == 'jaccard':
-            dists = [pd[self._metric]['distance'](v, e) for e in self.index]
+        elif self._metric == "jaccard":
+            dists = [pd[self._metric]["distance"](v, e) for e in self.index]
         else:
             # shouldn't get past the constructor!
             assert False, "invalid metric"
         # partition-sort by distance, get `n` closest
         nearest_indices = numpy.argpartition(dists, n)[:n]
-        indices = [idx for idx in nearest_indices if pd[self._metric]
-                   ["distance_valid"](dists[idx])]
+        indices = [idx for idx in nearest_indices if pd[self._metric]["distance_valid"](dists[idx])]
 
         def fix(index):
             ep = self.index[index]
             ev = v
-            return (index, pd[self._metric]['distance'](ep, ev))
+            return (index, pd[self._metric]["distance"](ep, ev))
+
         return map(fix, indices)
diff --git a/ann_benchmarks/algorithms/ckdtree.py b/ann_benchmarks/algorithms/ckdtree.py
index 901373d5d..7b3d40b66 100644
--- a/ann_benchmarks/algorithms/ckdtree.py
+++ b/ann_benchmarks/algorithms/ckdtree.py
@@ -7,7 +7,7 @@ class CKDTree(BaseANN):
     def __init__(self, metric, leaf_size=20):
         self._leaf_size = leaf_size
         self._metric = metric
-        self.name = 'CKDTree(leaf_size=%d)' % self._leaf_size
+        self.name = "CKDTree(leaf_size=%d)" % self._leaf_size
 
     def fit(self, X):
         self._tree = cKDTree(X, leafsize=self._leaf_size)
diff --git a/ann_benchmarks/algorithms/datasketch.py b/ann_benchmarks/algorithms/datasketch.py
index 949a7f6df..391ee0df4 100644
--- a/ann_benchmarks/algorithms/datasketch.py
+++ b/ann_benchmarks/algorithms/datasketch.py
@@ -8,13 +8,12 @@
 
 class DataSketch(BaseANN):
     def __init__(self, metric, n_perm, n_rep):
-        if metric not in ('jaccard'):
-            raise NotImplementedError(
-                "Datasketch doesn't support metric %s" % metric)
+        if metric not in ("jaccard"):
+            raise NotImplementedError("Datasketch doesn't support metric %s" % metric)
         self._n_perm = n_perm
         self._n_rep = n_rep
         self._metric = metric
-        self.name = 'Datasketch(n_perm=%d, n_rep=%d)' % (n_perm, n_rep)
+        self.name = "Datasketch(n_perm=%d, n_rep=%d)" % (n_perm, n_rep)
 
     def fit(self, X):
         self._index = MinHashLSHForest(num_perm=self._n_perm, l=self._n_rep)
@@ -22,10 +21,10 @@ def fit(self, X):
             m = MinHash(num_perm=self._n_perm)
             if x.dtype == np.bool_:
                 for e in np.flatnonzero(x):
-                    m.update(str(e).encode('utf8'))
+                    m.update(str(e).encode("utf8"))
             else:
                 for e in x:
-                    m.update(str(e).encode('utf8'))
+                    m.update(str(e).encode("utf8"))
             self._index.add(str(i), m)
         self._index.index()
 
@@ -33,8 +32,8 @@ def query(self, v, n):
         m = MinHash(num_perm=self._n_perm)
         if v.dtype == np.bool_:
             for e in np.flatnonzero(v):
-                m.update(str(e).encode('utf8'))
+                m.update(str(e).encode("utf8"))
         else:
             for e in v:
-                m.update(str(e).encode('utf8'))
+                m.update(str(e).encode("utf8"))
         return map(int, self._index.query(m, n))
diff --git a/ann_benchmarks/algorithms/definitions.py b/ann_benchmarks/algorithms/definitions.py
index cabd1446c..655b2b69d 100644
--- a/ann_benchmarks/algorithms/definitions.py
+++ b/ann_benchmarks/algorithms/definitions.py
@@ -7,14 +7,12 @@
 
 
 Definition = collections.namedtuple(
-    'Definition',
-    ['algorithm', 'constructor', 'module', 'docker_tag',
-     'arguments', 'query_argument_groups', 'disabled'])
+    "Definition", ["algorithm", "constructor", "module", "docker_tag", "arguments", "query_argument_groups", "disabled"]
+)
 
 
 def instantiate_algorithm(definition):
-    print('Trying to instantiate %s.%s(%s)' %
-          (definition.module, definition.constructor, definition.arguments))
+    print("Trying to instantiate %s.%s(%s)" % (definition.module, definition.constructor, definition.arguments))
     module = importlib.import_module(definition.module)
     constructor = getattr(module, definition.constructor)
     return constructor(*definition.arguments)
@@ -55,8 +53,7 @@ def _generate_combinations(args):
 
 def _substitute_variables(arg, vs):
     if isinstance(arg, dict):
-        return dict([(k, _substitute_variables(v, vs))
-                     for k, v in arg.items()])
+        return dict([(k, _substitute_variables(v, vs)) for k, v in arg.items()])
     elif isinstance(arg, list):
         return [_substitute_variables(a, vs) for a in arg]
     elif isinstance(arg, str) and arg in vs:
@@ -73,13 +70,13 @@ def _get_definitions(definition_file):
 def list_algorithms(definition_file):
     definitions = _get_definitions(definition_file)
 
-    print('The following algorithms are supported...')
+    print("The following algorithms are supported...")
     for point in definitions:
         print('\t... for the point type "%s"...' % point)
         for metric in definitions[point]:
             print('\t\t... and the distance metric "%s":' % metric)
             for algorithm in definitions[point][metric]:
-                print('\t\t\t%s' % algorithm)
+                print("\t\t\t%s" % algorithm)
 
 
 def get_unique_algorithms(definition_file):
@@ -92,8 +89,7 @@ def get_unique_algorithms(definition_file):
     return list(sorted(algos))
 
 
-def get_definitions(definition_file, dimension, point_type="float",
-                    distance_metric="euclidean", count=10):
+def get_definitions(definition_file, dimension, point_type="float", distance_metric="euclidean", count=10):
     definitions = _get_definitions(definition_file)
 
     algorithm_definitions = {}
@@ -103,10 +99,9 @@ def get_definitions(definition_file, dimension, point_type="float",
 
     definitions = []
     for (name, algo) in algorithm_definitions.items():
-        for k in ['docker-tag', 'module', 'constructor']:
+        for k in ["docker-tag", "module", "constructor"]:
             if k not in algo:
-                raise Exception(
-                    'algorithm %s does not define a "%s" property' % (name, k))
+                raise Exception('algorithm %s does not define a "%s" property' % (name, k))
 
         base_args = []
         if "base-args" in algo:
@@ -150,20 +145,18 @@ def get_definitions(definition_file, dimension, point_type="float",
                 else:
                     aargs.append(arg_group)
 
-                vs = {
-                    "@count": count,
-                    "@metric": distance_metric,
-                    "@dimension": dimension
-                }
+                vs = {"@count": count, "@metric": distance_metric, "@dimension": dimension}
                 aargs = [_substitute_variables(arg, vs) for arg in aargs]
-                definitions.append(Definition(
-                    algorithm=name,
-                    docker_tag=algo['docker-tag'],
-                    module=algo['module'],
-                    constructor=algo['constructor'],
-                    arguments=aargs,
-                    query_argument_groups=query_args,
-                    disabled=algo.get('disabled', False)
-                ))
+                definitions.append(
+                    Definition(
+                        algorithm=name,
+                        docker_tag=algo["docker-tag"],
+                        module=algo["module"],
+                        constructor=algo["constructor"],
+                        arguments=aargs,
+                        query_argument_groups=query_args,
+                        disabled=algo.get("disabled", False),
+                    )
+                )
 
     return definitions
diff --git a/ann_benchmarks/algorithms/diskann.py b/ann_benchmarks/algorithms/diskann.py
index 7502141c9..327304be7 100644
--- a/ann_benchmarks/algorithms/diskann.py
+++ b/ann_benchmarks/algorithms/diskann.py
@@ -8,7 +8,7 @@
 
 class Vamana(BaseANN):
     def __init__(self, metric, param):
-        self.metric = {'angular': 'cosine', 'euclidean': 'l2'}[metric]
+        self.metric = {"angular": "cosine", "euclidean": "l2"}[metric]
         self.l_build = int(param["l_build"])
         self.max_outdegree = int(param["max_outdegree"])
         self.alpha = float(param["alpha"])
@@ -24,59 +24,59 @@ def __init__(self, metric, param):
         self.params.set("num_threads", 1)
 
     def fit(self, X):
-
         def bin_to_float(binary):
-            return struct.unpack('!f',struct.pack('!I', int(binary, 2)))[0]
+            return struct.unpack("!f", struct.pack("!I", int(binary, 2)))[0]
 
         print("Vamana: Starting Fit...")
-        index_dir = 'indices'
+        index_dir = "indices"
 
         if not os.path.exists(index_dir):
             os.makedirs(index_dir)
 
-        data_path = os.path.join(index_dir, 'base.bin')
-        self.name = 'Vamana-{}-{}-{}'.format(self.l_build,
-                                             self.max_outdegree, self.alpha)
+        data_path = os.path.join(index_dir, "base.bin")
+        self.name = "Vamana-{}-{}-{}".format(self.l_build, self.max_outdegree, self.alpha)
         save_path = os.path.join(index_dir, self.name)
-        print('Vamana: Index Stored At: ' + save_path)
-        shape = [np.float32(bin_to_float('{:032b}'.format(X.shape[0]))),
-                 np.float32(bin_to_float('{:032b}'.format(X.shape[1])))]
+        print("Vamana: Index Stored At: " + save_path)
+        shape = [
+            np.float32(bin_to_float("{:032b}".format(X.shape[0]))),
+            np.float32(bin_to_float("{:032b}".format(X.shape[1]))),
+        ]
         X = X.flatten()
         X = np.insert(X, 0, shape)
         X.tofile(data_path)
 
         if not os.path.exists(save_path):
-            print('Vamana: Creating Index')
+            print("Vamana: Creating Index")
             s = time.time()
-            if self.metric == 'l2':
+            if self.metric == "l2":
                 index = vp.SinglePrecisionIndex(vp.Metric.FAST_L2, data_path)
-            elif self.metric == 'cosine':
+            elif self.metric == "cosine":
                 index = vp.SinglePrecisionIndex(vp.Metric.INNER_PRODUCT, data_path)
             else:
-                print('Vamana: Unknown Metric Error!')
+                print("Vamana: Unknown Metric Error!")
             index.build(self.params, [])
             t = time.time()
-            print('Vamana: Index Build Time (sec) = ' + str(t - s))
+            print("Vamana: Index Build Time (sec) = " + str(t - s))
             index.save(save_path)
         if os.path.exists(save_path):
-            print('Vamana: Loading Index: ' + str(save_path))
+            print("Vamana: Loading Index: " + str(save_path))
             s = time.time()
-            if self.metric == 'l2':
+            if self.metric == "l2":
                 self.index = vp.SinglePrecisionIndex(vp.Metric.FAST_L2, data_path)
-            elif self.metric == 'cosine':
+            elif self.metric == "cosine":
                 self.index = vp.SinglePrecisionIndex(vp.Metric.INNER_PRODUCT, data_path)
             else:
-                print('Vamana: Unknown Metric Error!')
-            self.index.load(file_name = save_path)
+                print("Vamana: Unknown Metric Error!")
+            self.index.load(file_name=save_path)
             print("Vamana: Index Loaded")
             self.index.optimize_graph()
             print("Vamana: Graph Optimization Completed")
             t = time.time()
-            print('Vamana: Index Load Time (sec) = ' + str(t - s))
+            print("Vamana: Index Load Time (sec) = " + str(t - s))
         else:
             print("Vamana: Unexpected Index Build Time Error")
 
-        print('Vamana: End of Fit')
+        print("Vamana: End of Fit")
 
     def set_query_arguments(self, l_search):
         print("Vamana: L_Search = " + str(l_search))
@@ -95,7 +95,7 @@ def get_batch_results(self):
 
 class VamanaPQ(BaseANN):
     def __init__(self, metric, param):
-        self.metric = {'angular': 'cosine', 'euclidean': 'l2'}[metric]
+        self.metric = {"angular": "cosine", "euclidean": "l2"}[metric]
         self.l_build = int(param["l_build"])
         self.max_outdegree = int(param["max_outdegree"])
         self.alpha = float(param["alpha"])
@@ -114,12 +114,11 @@ def __init__(self, metric, param):
         self.params.set("num_threads", 1)
 
     def fit(self, X):
-
         def bin_to_float(binary):
-            return struct.unpack('!f',struct.pack('!I', int(binary, 2)))[0]
+            return struct.unpack("!f", struct.pack("!I", int(binary, 2)))[0]
 
         print("Vamana PQ: Starting Fit...")
-        index_dir = 'indices'
+        index_dir = "indices"
 
         if self.chunks > X.shape[1]:
             raise ValueError
@@ -127,52 +126,53 @@ def bin_to_float(binary):
         if not os.path.exists(index_dir):
             os.makedirs(index_dir)
 
-        data_path = os.path.join(index_dir, 'base.bin')
-        pq_path = os.path.join(index_dir, 'pq_memory_index')
-        self.name = 'VamanaPQ-{}-{}-{}'.format(self.l_build,
-                                             self.max_outdegree, self.alpha)
+        data_path = os.path.join(index_dir, "base.bin")
+        pq_path = os.path.join(index_dir, "pq_memory_index")
+        self.name = "VamanaPQ-{}-{}-{}".format(self.l_build, self.max_outdegree, self.alpha)
         save_path = os.path.join(index_dir, self.name)
-        print('Vamana PQ: Index Stored At: ' + save_path)
-        shape = [np.float32(bin_to_float('{:032b}'.format(X.shape[0]))),
-                 np.float32(bin_to_float('{:032b}'.format(X.shape[1])))]
+        print("Vamana PQ: Index Stored At: " + save_path)
+        shape = [
+            np.float32(bin_to_float("{:032b}".format(X.shape[0]))),
+            np.float32(bin_to_float("{:032b}".format(X.shape[1]))),
+        ]
         X = X.flatten()
         X = np.insert(X, 0, shape)
         X.tofile(data_path)
 
         if not os.path.exists(save_path):
-            print('Vamana PQ: Creating Index')
+            print("Vamana PQ: Creating Index")
             s = time.time()
-            if self.metric == 'l2':
+            if self.metric == "l2":
                 index = vp.SinglePrecisionIndex(vp.Metric.FAST_L2, data_path)
-            elif self.metric == 'cosine':
+            elif self.metric == "cosine":
                 index = vp.SinglePrecisionIndex(vp.Metric.INNER_PRODUCT, data_path)
             else:
-                print('Vamana PQ: Unknown Metric Error!')
+                print("Vamana PQ: Unknown Metric Error!")
             index.pq_build(data_path, pq_path, self.params)
             t = time.time()
-            print('Vamana PQ: Index Build Time (sec) = ' + str(t - s))
+            print("Vamana PQ: Index Build Time (sec) = " + str(t - s))
             index.save(save_path)
         if os.path.exists(save_path):
-            print('Vamana PQ: Loading Index: ' + str(save_path))
+            print("Vamana PQ: Loading Index: " + str(save_path))
             s = time.time()
-            if self.metric == 'l2':
+            if self.metric == "l2":
                 self.index = vp.SinglePrecisionIndex(vp.Metric.FAST_L2, data_path)
-            elif self.metric == 'cosine':
+            elif self.metric == "cosine":
                 self.index = vp.SinglePrecisionIndex(vp.Metric.INNER_PRODUCT, data_path)
             else:
-                print('Vamana PQ: Unknown Metric Error!')
-            self.index.load(file_name = save_path)
+                print("Vamana PQ: Unknown Metric Error!")
+            self.index.load(file_name=save_path)
             print("Vamana PQ: Index Loaded")
-            self.index.pq_load(pq_prefix_path = pq_path)
+            self.index.pq_load(pq_prefix_path=pq_path)
             print("Vamana PQ: PQ Data Loaded")
             self.index.optimize_graph()
             print("Vamana PQ: Graph Optimization Completed")
             t = time.time()
-            print('Vamana PQ: Index Load Time (sec) = ' + str(t - s))
+            print("Vamana PQ: Index Load Time (sec) = " + str(t - s))
         else:
             print("Vamana PQ: Unexpected Index Build Time Error")
 
-        print('Vamana PQ: End of Fit')
+        print("Vamana PQ: End of Fit")
 
     def set_query_arguments(self, l_search):
         print("Vamana PQ: L_Search = " + str(l_search))
diff --git a/ann_benchmarks/algorithms/dolphinnpy.py b/ann_benchmarks/algorithms/dolphinnpy.py
index 34e7192cc..93c41dec5 100644
--- a/ann_benchmarks/algorithms/dolphinnpy.py
+++ b/ann_benchmarks/algorithms/dolphinnpy.py
@@ -1,5 +1,6 @@
 from __future__ import absolute_import
 import sys
+
 sys.path.append("install/lib-dolphinnpy")  # noqa
 import numpy
 from dolphinn import Dolphinn
@@ -9,7 +10,7 @@
 
 class DolphinnPy(BaseANN):
     def __init__(self, num_probes):
-        self.name = 'Dolphinn(num_probes={} )'.format(num_probes)
+        self.name = "Dolphinn(num_probes={} )".format(num_probes)
         self.num_probes = num_probes
         self.m = 1
         self._index = None
diff --git a/ann_benchmarks/algorithms/dummy_algo.py b/ann_benchmarks/algorithms/dummy_algo.py
index 8bd39aeda..939da89b3 100644
--- a/ann_benchmarks/algorithms/dummy_algo.py
+++ b/ann_benchmarks/algorithms/dummy_algo.py
@@ -5,7 +5,7 @@
 
 class DummyAlgoMt(BaseANN):
     def __init__(self, metric):
-        self.name = 'DummyAlgoMultiThread'
+        self.name = "DummyAlgoMultiThread"
 
     def fit(self, X):
         self.len = len(X) - 1
@@ -16,7 +16,7 @@ def query(self, v, n):
 
 class DummyAlgoSt(BaseANN):
     def __init__(self, metric):
-        self.name = 'DummyAlgoSingleThread'
+        self.name = "DummyAlgoSingleThread"
 
     def fit(self, X):
         self.len = len(X) - 1
diff --git a/ann_benchmarks/algorithms/elasticsearch.py b/ann_benchmarks/algorithms/elasticsearch.py
index 95eccab09..e7d037554 100644
--- a/ann_benchmarks/algorithms/elasticsearch.py
+++ b/ann_benchmarks/algorithms/elasticsearch.py
@@ -20,6 +20,7 @@
 # logging.basicConfig(level=logging.INFO)
 # logging.getLogger("elasticsearch").setLevel(logging.INFO)
 
+
 def es_wait():
     print("Waiting for elasticsearch health endpoint...")
     req = Request("http://localhost:9200/_cluster/health?wait_for_status=yellow&timeout=1s")
@@ -50,9 +51,9 @@ def __init__(self, metric: str, dimension: int):
         self.es = Elasticsearch(["http://localhost:9200"])
         self.batch_res = []
         if self.metric == "euclidean":
-            self.script = "1 / (1 + l2norm(params.query_vec, \"vec\"))"
+            self.script = '1 / (1 + l2norm(params.query_vec, "vec"))'
         elif self.metric == "angular":
-            self.script = "1.0 + cosineSimilarity(params.query_vec, \"vec\")"
+            self.script = '1.0 + cosineSimilarity(params.query_vec, "vec")'
         else:
             raise NotImplementedError(f"Not implemented for metric {self.metric}")
         es_wait()
@@ -60,17 +61,14 @@ def __init__(self, metric: str, dimension: int):
     def fit(self, X):
         body = dict(settings=dict(number_of_shards=1, number_of_replicas=0))
         mapping = dict(
-            properties=dict(
-                id=dict(type="keyword", store=True),
-                vec=dict(type="dense_vector", dims=self.dimension)
-            )
+            properties=dict(id=dict(type="keyword", store=True), vec=dict(type="dense_vector", dims=self.dimension))
         )
         self.es.indices.create(self.index, body=body)
         self.es.indices.put_mapping(mapping, self.index)
 
         def gen():
             for i, vec in enumerate(X):
-                yield { "_op_type": "index", "_index": self.index, "vec": vec.tolist(), 'id': str(i + 1) }
+                yield {"_op_type": "index", "_index": self.index, "vec": vec.tolist(), "id": str(i + 1)}
 
         (_, errors) = bulk(self.es, gen(), chunk_size=500, max_retries=9)
         assert len(errors) == 0, errors
@@ -82,21 +80,23 @@ def query(self, q, n):
         body = dict(
             query=dict(
                 script_score=dict(
-                    query=dict(match_all=dict()),
-                    script=dict(
-                        source=self.script,
-                        params=dict(query_vec=q.tolist())
-                    )
+                    query=dict(match_all=dict()), script=dict(source=self.script, params=dict(query_vec=q.tolist()))
                 )
             )
         )
-        res = self.es.search(index=self.index, body=body, size=n, _source=False, docvalue_fields=['id'],
-                             stored_fields="_none_", filter_path=["hits.hits.fields.id"])
-        return [int(h['fields']['id'][0]) - 1 for h in res['hits']['hits']]
+        res = self.es.search(
+            index=self.index,
+            body=body,
+            size=n,
+            _source=False,
+            docvalue_fields=["id"],
+            stored_fields="_none_",
+            filter_path=["hits.hits.fields.id"],
+        )
+        return [int(h["fields"]["id"][0]) - 1 for h in res["hits"]["hits"]]
 
     def batch_query(self, X, n):
         self.batch_res = [self.query(q, n) for q in X]
 
     def get_batch_results(self):
         return self.batch_res
-
diff --git a/ann_benchmarks/algorithms/elastiknn.py b/ann_benchmarks/algorithms/elastiknn.py
index 8add0d8f2..d27a5ad5f 100644
--- a/ann_benchmarks/algorithms/elastiknn.py
+++ b/ann_benchmarks/algorithms/elastiknn.py
@@ -39,15 +39,15 @@ def es_wait():
 
 def dealias_metric(metric: str) -> str:
     mlower = metric.lower()
-    if mlower == 'euclidean':
-        return 'l2'
-    elif mlower == 'angular':
-        return 'cosine'
+    if mlower == "euclidean":
+        return "l2"
+    elif mlower == "angular":
+        return "cosine"
     else:
         return mlower
 
-class Exact(BaseANN):
 
+class Exact(BaseANN):
     def __init__(self, metric: str, dimension: int):
         self.name = f"eknn-exact-metric={metric}_dimension={dimension}"
         self.metric = metric
@@ -61,19 +61,19 @@ def _handle_sparse(self, X):
         return [Vec.SparseBool(x, self.dimension) for x in X]
 
     def fit(self, X):
-        if self.metric in {'jaccard', 'hamming'}:
+        if self.metric in {"jaccard", "hamming"}:
             return self.model.fit(self._handle_sparse(X), shards=1)[0]
         else:
             return self.model.fit(X, shards=1)
 
     def query(self, q, n):
-        if self.metric in {'jaccard', 'hamming'}:
+        if self.metric in {"jaccard", "hamming"}:
             return self.model.kneighbors(self._handle_sparse([q]), n)[0]
         else:
             return self.model.kneighbors(np.expand_dims(q, 0), n)[0]
 
     def batch_query(self, X, n):
-        if self.metric in {'jaccard', 'hamming'}:
+        if self.metric in {"jaccard", "hamming"}:
             self.batch_res = self.model.kneighbors(self._handle_sparse(X), n)
         else:
             self.batch_res = self.model.kneighbors(X, n)
@@ -83,7 +83,6 @@ def get_batch_results(self):
 
 
 class L2Lsh(BaseANN):
-
     def __init__(self, L: int, k: int, w: int):
         self.name_prefix = f"eknn-l2lsh-L={L}-k={k}-w={w}"
         self.name = None  # set based on query args.
@@ -112,7 +111,7 @@ def set_query_arguments(self, candidates: int, probes: int):
         self.sum_query_dur = 0
 
     def query(self, q, n):
-    
+
         t0 = perf_counter()
         res = self.model.kneighbors(np.expand_dims(q, 0), n, return_similarity=False)[0]
         dur = perf_counter() - t0
@@ -121,7 +120,9 @@ def query(self, q, n):
         self.sum_query_dur += dur
         self.num_queries += 1
         if self.num_queries > 500 and self.num_queries / self.sum_query_dur < 50:
-            raise Exception("Throughput after 500 queries is less than 50 q/s. Giving up to avoid wasteful computation.")
+            raise Exception(
+                "Throughput after 500 queries is less than 50 q/s. Giving up to avoid wasteful computation."
+            )
         elif res[-2:].sum() < 0:
             raise Exception(f"Model returned fewer than {n} neighbors. Giving up to avoid wasteful computation.")
         else:
diff --git a/ann_benchmarks/algorithms/faiss.py b/ann_benchmarks/algorithms/faiss.py
index 9d6244400..008386ecc 100644
--- a/ann_benchmarks/algorithms/faiss.py
+++ b/ann_benchmarks/algorithms/faiss.py
@@ -1,5 +1,6 @@
 from __future__ import absolute_import
 import sys
+
 sys.path.append("install/lib-faiss")  # noqa
 import numpy
 import sklearn.preprocessing
@@ -9,14 +10,13 @@
 
 class Faiss(BaseANN):
     def query(self, v, n):
-        if self._metric == 'angular':
+        if self._metric == "angular":
             v /= numpy.linalg.norm(v)
-        D, I = self.index.search(numpy.expand_dims(
-            v, axis=0).astype(numpy.float32), n)
+        D, I = self.index.search(numpy.expand_dims(v, axis=0).astype(numpy.float32), n)
         return I[0]
 
     def batch_query(self, X, n):
-        if self._metric == 'angular':
+        if self._metric == "angular":
             X /= numpy.linalg.norm(X)
         self.res = self.index.search(X.astype(numpy.float32), n)
 
@@ -37,7 +37,7 @@ def __init__(self, metric, n_bits):
         self._n_bits = n_bits
         self.index = None
         self._metric = metric
-        self.name = 'FaissLSH(n_bits={})'.format(self._n_bits)
+        self.name = "FaissLSH(n_bits={})".format(self._n_bits)
 
     def fit(self, X):
         if X.dtype != numpy.float32:
@@ -54,15 +54,14 @@ def __init__(self, metric, n_list):
         self._metric = metric
 
     def fit(self, X):
-        if self._metric == 'angular':
-            X = sklearn.preprocessing.normalize(X, axis=1, norm='l2')
+        if self._metric == "angular":
+            X = sklearn.preprocessing.normalize(X, axis=1, norm="l2")
 
         if X.dtype != numpy.float32:
             X = X.astype(numpy.float32)
 
         self.quantizer = faiss.IndexFlatL2(X.shape[1])
-        index = faiss.IndexIVFFlat(
-            self.quantizer, X.shape[1], self._n_list, faiss.METRIC_L2)
+        index = faiss.IndexIVFFlat(self.quantizer, X.shape[1], self._n_list, faiss.METRIC_L2)
         index.train(X)
         index.add(X)
         self.index = index
@@ -73,12 +72,10 @@ def set_query_arguments(self, n_probe):
         self.index.nprobe = self._n_probe
 
     def get_additional(self):
-        return {"dist_comps": faiss.cvar.indexIVF_stats.ndis +      # noqa
-                faiss.cvar.indexIVF_stats.nq * self._n_list}
+        return {"dist_comps": faiss.cvar.indexIVF_stats.ndis + faiss.cvar.indexIVF_stats.nq * self._n_list}  # noqa
 
     def __str__(self):
-        return 'FaissIVF(n_list=%d, n_probe=%d)' % (self._n_list,
-                                                    self._n_probe)
+        return "FaissIVF(n_list=%d, n_probe=%d)" % (self._n_list, self._n_probe)
 
 
 class FaissIVFPQfs(Faiss):
@@ -89,11 +86,11 @@ def __init__(self, metric, n_list):
     def fit(self, X):
         if X.dtype != numpy.float32:
             X = X.astype(numpy.float32)
-        if self._metric == 'angular':
+        if self._metric == "angular":
             faiss.normalize_L2(X)
 
         d = X.shape[1]
-        faiss_metric = faiss.METRIC_INNER_PRODUCT if self._metric == 'angular' else faiss.METRIC_L2
+        faiss_metric = faiss.METRIC_INNER_PRODUCT if self._metric == "angular" else faiss.METRIC_L2
         factory_string = f"IVF{self._n_list},PQ{d//2}x4fs"
         index = faiss.index_factory(d, factory_string, faiss_metric)
         index.train(X)
@@ -114,10 +111,7 @@ def set_query_arguments(self, n_probe, k_reorder):
             self.index = self.refine_index
 
     def get_additional(self):
-        return {"dist_comps": faiss.cvar.indexIVF_stats.ndis +      # noqa
-                faiss.cvar.indexIVF_stats.nq * self._n_list}
+        return {"dist_comps": faiss.cvar.indexIVF_stats.ndis + faiss.cvar.indexIVF_stats.nq * self._n_list}  # noqa
 
     def __str__(self):
-        return 'FaissIVFPQfs(n_list=%d, n_probe=%d, k_reorder=%d)' % (self._n_list,
-                                                                      self._n_probe,
-                                                                      self._k_reorder)
+        return "FaissIVFPQfs(n_list=%d, n_probe=%d, k_reorder=%d)" % (self._n_list, self._n_probe, self._k_reorder)
diff --git a/ann_benchmarks/algorithms/faiss_gpu.py b/ann_benchmarks/algorithms/faiss_gpu.py
index b30423abc..0754ae19d 100644
--- a/ann_benchmarks/algorithms/faiss_gpu.py
+++ b/ann_benchmarks/algorithms/faiss_gpu.py
@@ -1,5 +1,6 @@
 from __future__ import absolute_import
 import sys
+
 # Assumes local installation of FAISS
 sys.path.append("faiss")  # noqa
 import numpy
@@ -12,8 +13,7 @@
 
 class FaissGPU(BaseANN):
     def __init__(self, n_bits, n_probes):
-        self.name = 'FaissGPU(n_bits={}, n_probes={})'.format(
-            n_bits, n_probes)
+        self.name = "FaissGPU(n_bits={}, n_probes={})".format(n_bits, n_probes)
         self._n_bits = n_bits
         self._n_probes = n_probes
         self._res = faiss.StandardGpuResources()
@@ -21,8 +21,7 @@ def __init__(self, n_bits, n_probes):
 
     def fit(self, X):
         X = X.astype(numpy.float32)
-        self._index = faiss.GpuIndexIVFFlat(self._res, len(X[0]), self._n_bits,
-                                            faiss.METRIC_L2)
+        self._index = faiss.GpuIndexIVFFlat(self._res, len(X[0]), self._n_bits, faiss.METRIC_L2)
         # self._index = faiss.index_factory(len(X[0]),
         #                                   "IVF%d,Flat" % self._n_bits)
         # co = faiss.GpuClonerOptions()
diff --git a/ann_benchmarks/algorithms/faiss_hnsw.py b/ann_benchmarks/algorithms/faiss_hnsw.py
index 38414dfc2..da9948b0d 100644
--- a/ann_benchmarks/algorithms/faiss_hnsw.py
+++ b/ann_benchmarks/algorithms/faiss_hnsw.py
@@ -14,7 +14,7 @@ def fit(self, X):
         self.index.hnsw.efConstruction = self.method_param["efConstruction"]
         self.index.verbose = True
 
-        if self._metric == 'angular':
+        if self._metric == "angular":
             X = X / np.linalg.norm(X, axis=1)[:, np.newaxis]
         if X.dtype != np.float32:
             X = X.astype(np.float32)
@@ -30,7 +30,7 @@ def get_additional(self):
         return {"dist_comps": faiss.cvar.hnsw_stats.ndis}
 
     def __str__(self):
-        return 'faiss (%s, ef: %d)' % (self.method_param, self.index.hnsw.efSearch)
+        return "faiss (%s, ef: %d)" % (self.method_param, self.index.hnsw.efSearch)
 
     def freeIndex(self):
         del self.p
diff --git a/ann_benchmarks/algorithms/flann.py b/ann_benchmarks/algorithms/flann.py
index 69790d9cc..9c8a7a8ec 100644
--- a/ann_benchmarks/algorithms/flann.py
+++ b/ann_benchmarks/algorithms/flann.py
@@ -8,20 +8,18 @@
 class FLANN(BaseANN):
     def __init__(self, metric, target_precision):
         self._target_precision = target_precision
-        self.name = 'FLANN(target_precision=%f)' % self._target_precision
+        self.name = "FLANN(target_precision=%f)" % self._target_precision
         self._metric = metric
 
     def fit(self, X):
-        self._flann = pyflann.FLANN(
-            target_precision=self._target_precision,
-            algorithm='autotuned', log_level='info')
-        if self._metric == 'angular':
-            X = sklearn.preprocessing.normalize(X, axis=1, norm='l2')
+        self._flann = pyflann.FLANN(target_precision=self._target_precision, algorithm="autotuned", log_level="info")
+        if self._metric == "angular":
+            X = sklearn.preprocessing.normalize(X, axis=1, norm="l2")
         self._flann.build_index(X)
 
     def query(self, v, n):
-        if self._metric == 'angular':
-            v = sklearn.preprocessing.normalize([v], axis=1, norm='l2')[0]
+        if self._metric == "angular":
+            v = sklearn.preprocessing.normalize([v], axis=1, norm="l2")[0]
         if v.dtype != numpy.float32:
             v = v.astype(numpy.float32)
         return self._flann.nn_index(v, n)[0][0]
diff --git a/ann_benchmarks/algorithms/hnswlib.py b/ann_benchmarks/algorithms/hnswlib.py
index 8526eaef4..9000517e0 100644
--- a/ann_benchmarks/algorithms/hnswlib.py
+++ b/ann_benchmarks/algorithms/hnswlib.py
@@ -6,18 +6,18 @@
 
 class HnswLib(BaseANN):
     def __init__(self, metric, method_param):
-        self.metric = {'angular': 'cosine', 'euclidean': 'l2'}[metric]
+        self.metric = {"angular": "cosine", "euclidean": "l2"}[metric]
         self.method_param = method_param
         # print(self.method_param,save_index,query_param)
         # self.ef=query_param['ef']
-        self.name = 'hnswlib (%s)' % (self.method_param)
+        self.name = "hnswlib (%s)" % (self.method_param)
 
     def fit(self, X):
         # Only l2 is supported currently
         self.p = hnswlib.Index(space=self.metric, dim=len(X[0]))
-        self.p.init_index(max_elements=len(X),
-                          ef_construction=self.method_param["efConstruction"],
-                          M=self.method_param["M"])
+        self.p.init_index(
+            max_elements=len(X), ef_construction=self.method_param["efConstruction"], M=self.method_param["M"]
+        )
         data_labels = np.arange(len(X))
         self.p.add_items(np.asarray(X), data_labels)
         self.p.set_num_threads(1)
diff --git a/ann_benchmarks/algorithms/kdtree.py b/ann_benchmarks/algorithms/kdtree.py
index 6048ecd6a..11ca2926d 100644
--- a/ann_benchmarks/algorithms/kdtree.py
+++ b/ann_benchmarks/algorithms/kdtree.py
@@ -8,15 +8,15 @@ class KDTree(BaseANN):
     def __init__(self, metric, leaf_size=20):
         self._leaf_size = leaf_size
         self._metric = metric
-        self.name = 'KDTree(leaf_size=%d)' % self._leaf_size
+        self.name = "KDTree(leaf_size=%d)" % self._leaf_size
 
     def fit(self, X):
-        if self._metric == 'angular':
-            X = sklearn.preprocessing.normalize(X, axis=1, norm='l2')
+        if self._metric == "angular":
+            X = sklearn.preprocessing.normalize(X, axis=1, norm="l2")
         self._tree = sklearn.neighbors.KDTree(X, leaf_size=self._leaf_size)
 
     def query(self, v, n):
-        if self._metric == 'angular':
-            v = sklearn.preprocessing.normalize([v], axis=1, norm='l2')[0]
+        if self._metric == "angular":
+            v = sklearn.preprocessing.normalize([v], axis=1, norm="l2")[0]
         dist, ind = self._tree.query([v], k=n)
         return ind[0]
diff --git a/ann_benchmarks/algorithms/kgraph.py b/ann_benchmarks/algorithms/kgraph.py
index fa14e2e17..0f8148ffa 100644
--- a/ann_benchmarks/algorithms/kgraph.py
+++ b/ann_benchmarks/algorithms/kgraph.py
@@ -9,7 +9,7 @@
 class KGraph(BaseANN):
     def __init__(self, metric, index_params, save_index):
         metric = str(metric)
-        self.name = 'KGraph(%s)' % (metric)
+        self.name = "KGraph(%s)" % (metric)
         self._metric = metric
         self._index_params = index_params
         self._save_index = save_index
@@ -18,7 +18,7 @@ def fit(self, X):
         if X.dtype != numpy.float32:
             X = X.astype(numpy.float32)
         self._kgraph = pykgraph.KGraph(X, self._metric)
-        path = os.path.join(INDEX_DIR, 'kgraph-index-%s' % self._metric)
+        path = os.path.join(INDEX_DIR, "kgraph-index-%s" % self._metric)
         if os.path.exists(path):
             self._kgraph.load(path)
         else:
@@ -34,6 +34,5 @@ def set_query_arguments(self, P):
     def query(self, v, n):
         if v.dtype != numpy.float32:
             v = v.astype(numpy.float32)
-        result = self._kgraph.search(
-            numpy.array([v]), K=n, threads=1, P=self._P)
+        result = self._kgraph.search(numpy.array([v]), K=n, threads=1, P=self._P)
         return result[0]
diff --git a/ann_benchmarks/algorithms/lshf.py b/ann_benchmarks/algorithms/lshf.py
index 59a59dfb1..a1a8db2cc 100644
--- a/ann_benchmarks/algorithms/lshf.py
+++ b/ann_benchmarks/algorithms/lshf.py
@@ -6,20 +6,18 @@
 
 class LSHF(BaseANN):
     def __init__(self, metric, n_estimators=10, n_candidates=50):
-        self.name = 'LSHF(n_est=%d, n_cand=%d)' % (n_estimators, n_candidates)
+        self.name = "LSHF(n_est=%d, n_cand=%d)" % (n_estimators, n_candidates)
         self._metric = metric
         self._n_estimators = n_estimators
         self._n_candidates = n_candidates
 
     def fit(self, X):
-        self._lshf = sklearn.neighbors.LSHForest(
-            n_estimators=self._n_estimators, n_candidates=self._n_candidates)
-        if self._metric == 'angular':
-            X = sklearn.preprocessing.normalize(X, axis=1, norm='l2')
+        self._lshf = sklearn.neighbors.LSHForest(n_estimators=self._n_estimators, n_candidates=self._n_candidates)
+        if self._metric == "angular":
+            X = sklearn.preprocessing.normalize(X, axis=1, norm="l2")
         self._lshf.fit(X)
 
     def query(self, v, n):
-        if self._metric == 'angular':
-            v = sklearn.preprocessing.normalize([v], axis=1, norm='l2')[0]
-        return self._lshf.kneighbors([v], return_distance=False,
-                                     n_neighbors=n)[0]
+        if self._metric == "angular":
+            v = sklearn.preprocessing.normalize([v], axis=1, norm="l2")[0]
+        return self._lshf.kneighbors([v], return_distance=False, n_neighbors=n)[0]
diff --git a/ann_benchmarks/algorithms/luceneknn.py b/ann_benchmarks/algorithms/luceneknn.py
index 6941d636b..42bfa5b62 100644
--- a/ann_benchmarks/algorithms/luceneknn.py
+++ b/ann_benchmarks/algorithms/luceneknn.py
@@ -21,6 +21,7 @@ class Codec(PyLucene94Codec):
     """
     Custom codec so that the appropriate Lucene94 codec can be returned with the configured M and efConstruction
     """
+
     def __init__(self, M, efConstruction):
         super(Codec, self).__init__()
         self.M = M
@@ -37,15 +38,16 @@ class PyLuceneKNN(BaseANN):
 
     def __init__(self, metric: str, dimension: int, param):
         try:
-            lucene.initVM(vmargs=['-Djava.awt.headless=true -Xmx6g -Xms6g'])
+            lucene.initVM(vmargs=["-Djava.awt.headless=true -Xmx6g -Xms6g"])
         except ValueError:
-            print('VM already initialized')
+            print("VM already initialized")
         self.metric = metric
         self.dimension = dimension
         self.param = param
         self.short_name = f"luceneknn-{param['M']}-{param['efConstruction']}"
-        self.simFunc = VectorSimilarityFunction.DOT_PRODUCT if self.metric == "angular" \
-            else VectorSimilarityFunction.EUCLIDEAN
+        self.simFunc = (
+            VectorSimilarityFunction.DOT_PRODUCT if self.metric == "angular" else VectorSimilarityFunction.EUCLIDEAN
+        )
         if self.metric not in ("euclidean", "angular"):
             raise NotImplementedError(f"Not implemented for metric {self.metric}")
 
@@ -56,10 +58,10 @@ def done(self):
     def fit(self, X):
         if self.dimension != X.shape[1]:
             raise Exception(f"Configured dimension {self.dimension} but data has shape {X.shape}")
-        if self.metric == 'angular':
-            X = sklearn.preprocessing.normalize(X, axis=1, norm='l2')
+        if self.metric == "angular":
+            X = sklearn.preprocessing.normalize(X, axis=1, norm="l2")
         iwc = IndexWriterConfig().setOpenMode(IndexWriterConfig.OpenMode.CREATE)
-        codec = Codec(self.param['M'], self.param['efConstruction'])
+        codec = Codec(self.param["M"], self.param["efConstruction"])
         iwc.setCodec(codec)
         iwc.setRAMBufferSizeMB(1994.0)
         self.dir = FSDirectory.open(Paths.get(self.short_name + ".index"))
@@ -70,7 +72,7 @@ def fit(self, X):
         X = X.tolist()
         for x in X:
             doc = Document()
-            doc.add(KnnVectorField("knn", JArray('float')(x), fieldType))
+            doc.add(KnnVectorField("knn", JArray("float")(x), fieldType))
             doc.add(StoredField("id", id))
             iw.addDocument(doc)
             id += 1
@@ -95,9 +97,9 @@ def run_knn_query_inner(self, num_candidates, n, q):
         return [int(self.searcher.doc(d.doc).get("id")) for d in topdocs.scoreDocs]
 
     def prepare_query(self, q, n):
-        if self.metric == 'angular':
+        if self.metric == "angular":
             q = q / np.linalg.norm(q)
-        self.q = JArray('float')(q.tolist())
+        self.q = JArray("float")(q.tolist())
         self.n = n
 
     def get_prepared_query_results(self):
@@ -107,9 +109,9 @@ def run_prepared_query(self):
         self.res = self.run_knn_query_inner(self.ef, self.n, self.q)
 
     def prepare_batch_query(self, X, n):
-        if self.metric == 'angular':
-            X = sklearn.preprocessing.normalize(X, axis=1, norm='l2')
-        self.queries = [JArray('float')(q) for q in X.tolist()]
+        if self.metric == "angular":
+            X = sklearn.preprocessing.normalize(X, axis=1, norm="l2")
+        self.queries = [JArray("float")(q) for q in X.tolist()]
         self.n = n
 
     def run_batch_query(self):
diff --git a/ann_benchmarks/algorithms/milvus.py b/ann_benchmarks/algorithms/milvus.py
index 55440ae85..e188e0f65 100644
--- a/ann_benchmarks/algorithms/milvus.py
+++ b/ann_benchmarks/algorithms/milvus.py
@@ -5,7 +5,7 @@
 
 
 def metric_mapping(_metric: str):
-    _metric_type = {'angular': 'cosine', 'euclidean': 'l2'}.get(_metric, None)
+    _metric_type = {"angular": "cosine", "euclidean": "l2"}.get(_metric, None)
     if _metric_type is None:
         raise Exception(f"[Milvus] Not support metric type: {_metric}!!!")
     return _metric_type
@@ -28,8 +28,7 @@ def __init__(self, metric, dim, index_param):
         self.client = None
 
     def fit(self, X):
-        self.client = pyknowhere.Index(
-            self._metric_type, self._dim, len(X), self._index_m, self._index_ef)
+        self.client = pyknowhere.Index(self._metric_type, self._dim, len(X), self._index_m, self._index_ef)
         self.client.add(X, numpy.arange(len(X)))
 
     def set_query_arguments(self, ef):
diff --git a/ann_benchmarks/algorithms/mrpt.py b/ann_benchmarks/algorithms/mrpt.py
index e63f70e65..63305320a 100644
--- a/ann_benchmarks/algorithms/mrpt.py
+++ b/ann_benchmarks/algorithms/mrpt.py
@@ -13,12 +13,11 @@ def __init__(self, metric, count):
     def fit(self, X):
         if X.dtype != numpy.float32:
             X = X.astype(numpy.float32)
-        if self._metric == 'angular':
-            X = sklearn.preprocessing.normalize(X, axis=1, norm='l2')
+        if self._metric == "angular":
+            X = sklearn.preprocessing.normalize(X, axis=1, norm="l2")
 
         self._index_autotuned = mrpt.MRPTIndex(X)
-        self._index_autotuned.build_autotune_sample(
-            target_recall=None, k=self._k, n_test=1000)
+        self._index_autotuned.build_autotune_sample(target_recall=None, k=self._k, n_test=1000)
 
     def set_query_arguments(self, target_recall):
         self._target_recall = target_recall
@@ -28,14 +27,16 @@ def set_query_arguments(self, target_recall):
     def query(self, v, n):
         if v.dtype != numpy.float32:
             v = v.astype(numpy.float32)
-        if self._metric == 'angular':
-            v = sklearn.preprocessing.normalize(
-                v.reshape(1, -1), axis=1, norm='l2').flatten()
+        if self._metric == "angular":
+            v = sklearn.preprocessing.normalize(v.reshape(1, -1), axis=1, norm="l2").flatten()
         return self._index.ann(v)
 
     def __str__(self):
-        str_template = ('MRPT(target recall=%.3f, trees=%d, depth=%d, vote '
-                        'threshold=%d, estimated recall=%.3f)')
-        return str_template % (self._target_recall, self._par['n_trees'],
-                               self._par['depth'], self._par['votes'],
-                               self._par['estimated_recall'])
+        str_template = "MRPT(target recall=%.3f, trees=%d, depth=%d, vote " "threshold=%d, estimated recall=%.3f)"
+        return str_template % (
+            self._target_recall,
+            self._par["n_trees"],
+            self._par["depth"],
+            self._par["votes"],
+            self._par["estimated_recall"],
+        )
diff --git a/ann_benchmarks/algorithms/n2.py b/ann_benchmarks/algorithms/n2.py
index d4b5b5974..604f62097 100644
--- a/ann_benchmarks/algorithms/n2.py
+++ b/ann_benchmarks/algorithms/n2.py
@@ -6,9 +6,9 @@
 class N2(BaseANN):
     def __init__(self, metric, method_param):
         self._metric = metric
-        self._m = method_param['M']
+        self._m = method_param["M"]
         self._m0 = self._m * 2
-        self._ef_construction = method_param['efConstruction']
+        self._ef_construction = method_param["efConstruction"]
         self._n_threads = 1
         self._ef_search = -1
 
@@ -16,7 +16,13 @@ def fit(self, X):
         self._n2 = n2.HnswIndex(X.shape[1], self._metric)
         for x in X:
             self._n2.add_data(x)
-        self._n2.build(m=self._m, max_m0=self._m0, ef_construction=self._ef_construction, n_threads=self._n_threads, graph_merging='merge_level0')
+        self._n2.build(
+            m=self._m,
+            max_m0=self._m0,
+            ef_construction=self._ef_construction,
+            n_threads=self._n_threads,
+            graph_merging="merge_level0",
+        )
 
     def set_query_arguments(self, ef):
         self._ef_search = ef
diff --git a/ann_benchmarks/algorithms/nearpy.py b/ann_benchmarks/algorithms/nearpy.py
index e991141eb..d047da092 100644
--- a/ann_benchmarks/algorithms/nearpy.py
+++ b/ann_benchmarks/algorithms/nearpy.py
@@ -11,31 +11,23 @@ def __init__(self, metric, n_bits, hash_counts):
         self._hash_counts = hash_counts
         self._metric = metric
         self._filter = NearestFilter(10)
-        self.name = 'NearPy(n_bits=%d, hash_counts=%d)' % (
-            self._n_bits, self._hash_counts)
+        self.name = "NearPy(n_bits=%d, hash_counts=%d)" % (self._n_bits, self._hash_counts)
 
     def fit(self, X):
         hashes = []
 
         for k in range(self._hash_counts):
-            nearpy_rbp = nearpy.hashes.RandomBinaryProjections(
-                'rbp_%d' % k, self._n_bits)
+            nearpy_rbp = nearpy.hashes.RandomBinaryProjections("rbp_%d" % k, self._n_bits)
             hashes.append(nearpy_rbp)
 
-        if self._metric == 'euclidean':
+        if self._metric == "euclidean":
             dist = nearpy.distances.EuclideanDistance()
-            self._nearpy_engine = nearpy.Engine(
-                X.shape[1],
-                lshashes=hashes,
-                distance=dist)
+            self._nearpy_engine = nearpy.Engine(X.shape[1], lshashes=hashes, distance=dist)
         else:  # Default (angular) = Cosine distance
-            self._nearpy_engine = nearpy.Engine(
-                X.shape[1],
-                lshashes=hashes,
-                vector_filters=[self._filter])
+            self._nearpy_engine = nearpy.Engine(X.shape[1], lshashes=hashes, vector_filters=[self._filter])
 
-        if self._metric == 'angular':
-            X = sklearn.preprocessing.normalize(X, axis=1, norm='l2')
+        if self._metric == "angular":
+            X = sklearn.preprocessing.normalize(X, axis=1, norm="l2")
         for i, x in enumerate(X):
             self._nearpy_engine.store_vector(x, i)
 
@@ -43,6 +35,6 @@ def query(self, v, n):
         # XXX: This feels like an unpleasant hack, but it's not clear how to do
         # better without making changes to NearPy
         self._filter.N = n
-        if self._metric == 'angular':
-            v = sklearn.preprocessing.normalize([v], axis=1, norm='l2')[0]
+        if self._metric == "angular":
+            v = sklearn.preprocessing.normalize([v], axis=1, norm="l2")[0]
         return [y for x, y, z in self._nearpy_engine.neighbours(v)]
diff --git a/ann_benchmarks/algorithms/nmslib.py b/ann_benchmarks/algorithms/nmslib.py
index b7cd437c1..c41fd991a 100644
--- a/ann_benchmarks/algorithms/nmslib.py
+++ b/ann_benchmarks/algorithms/nmslib.py
@@ -6,64 +6,64 @@
 from ann_benchmarks.constants import INDEX_DIR
 from ann_benchmarks.algorithms.base import BaseANN
 
+
 def sparse_matrix_to_str(matrix):
     result = []
     matrix = matrix.tocsr()
     matrix.sort_indices()
     for row in range(matrix.shape[0]):
         arr = [k for k in matrix.indices[matrix.indptr[row] : matrix.indptr[row + 1]]]
-        result.append(' '.join([str(k) for k in arr]))
+        result.append(" ".join([str(k) for k in arr]))
     return result
 
+
 def dense_vector_to_str(vector):
     if vector.dtype == np.bool_:
         indices = np.flatnonzero(vector)
     else:
         indices = vector
-    result = ' '.join([str(k) for k in indices])
+    result = " ".join([str(k) for k in indices])
     return result
 
+
 class NmslibReuseIndex(BaseANN):
     @staticmethod
     def encode(d):
         return ["%s=%s" % (a, b) for (a, b) in d.items()]
 
     def __init__(self, metric, method_name, index_param, query_param):
-        self._nmslib_metric = {
-            'angular': 'cosinesimil', 'euclidean': 'l2', 'jaccard': 'jaccard_sparse'}[metric]
+        self._nmslib_metric = {"angular": "cosinesimil", "euclidean": "l2", "jaccard": "jaccard_sparse"}[metric]
         self._method_name = method_name
         self._save_index = False
         self._index_param = NmslibReuseIndex.encode(index_param)
         if query_param is not False:
             self._query_param = NmslibReuseIndex.encode(query_param)
-            self.name = ('Nmslib(method_name={}, index_param={}, '
-                         'query_param={})'.format(self._method_name,
-                                                  self._index_param,
-                                                  self._query_param))
+            self.name = "Nmslib(method_name={}, index_param={}, " "query_param={})".format(
+                self._method_name, self._index_param, self._query_param
+            )
         else:
             self._query_param = None
-            self.name = 'Nmslib(method_name=%s, index_param=%s)' % (
-                self._method_name, self._index_param)
+            self.name = "Nmslib(method_name=%s, index_param=%s)" % (self._method_name, self._index_param)
 
-        self._index_name = os.path.join(INDEX_DIR, "nmslib_%s_%s_%s" % (
-            self._method_name, metric, '_'.join(self._index_param)))
+        self._index_name = os.path.join(
+            INDEX_DIR, "nmslib_%s_%s_%s" % (self._method_name, metric, "_".join(self._index_param))
+        )
 
         d = os.path.dirname(self._index_name)
         if not os.path.exists(d):
             os.makedirs(d)
 
     def fit(self, X):
-        if self._method_name == 'vptree':
+        if self._method_name == "vptree":
             # To avoid this issue: terminate called after throwing an instance
             # of 'std::runtime_error'
             # what():  The data size is too small or the bucket size is too
             # big. Select the parameters so that <total # of records> is NOT
             # less than <bucket size> * 1000
             # Aborted (core dumped)
-            self._index_param.append('bucketSize=%d' %
-                                     min(int(len(X) * 0.0005), 1000))
+            self._index_param.append("bucketSize=%d" % min(int(len(X) * 0.0005), 1000))
 
-        if self._nmslib_metric == 'jaccard_sparse':
+        if self._nmslib_metric == "jaccard_sparse":
             self._index = nmslib.init(
                 space=self._nmslib_metric,
                 method=self._method_name,
@@ -82,12 +82,11 @@ def fit(self, X):
             string_data = sparse_matrix_to_str(sparse_matrix)
             self._index.addDataPointBatch(string_data)
         else:
-            self._index = nmslib.init(
-                space=self._nmslib_metric, method=self._method_name)
+            self._index = nmslib.init(space=self._nmslib_metric, method=self._method_name)
             self._index.addDataPointBatch(X)
 
         if os.path.exists(self._index_name):
-            print('Loading index from file')
+            print("Loading index from file")
             self._index.loadIndex(self._index_name)
         else:
             self._index.createIndex(self._index_param)
@@ -97,11 +96,11 @@ def fit(self, X):
             self._index.setQueryTimeParams(self._query_param)
 
     def set_query_arguments(self, ef):
-        if self._method_name == 'hnsw' or self._method_name == 'sw-graph':
+        if self._method_name == "hnsw" or self._method_name == "sw-graph":
             self._index.setQueryTimeParams(["efSearch=%s" % (ef)])
 
     def query(self, v, n):
-        if self._nmslib_metric == 'jaccard_sparse':
+        if self._nmslib_metric == "jaccard_sparse":
             v_string = dense_vector_to_str(v)
             ids, distances = self._index.knnQuery(v_string, n)
         else:
@@ -109,7 +108,7 @@ def query(self, v, n):
         return ids
 
     def batch_query(self, X, n):
-        if self._nmslib_metric == 'jaccard_sparse':
+        if self._nmslib_metric == "jaccard_sparse":
             sparse_matrix = scipy.sparse.csr_matrix(X)
             string_data = sparse_matrix_to_str(sparse_matrix)
             self.res = self._index.knnQueryBatch(string_data, n)
diff --git a/ann_benchmarks/algorithms/onng_ngt.py b/ann_benchmarks/algorithms/onng_ngt.py
index 826e22e28..8255c0cca 100644
--- a/ann_benchmarks/algorithms/onng_ngt.py
+++ b/ann_benchmarks/algorithms/onng_ngt.py
@@ -8,85 +8,107 @@
 
 class ONNG(BaseANN):
     def __init__(self, metric, object_type, epsilon, param):
-        metrics = {'euclidean': '2', 'angular': 'E'}
-        self._edge_size = int(param['edge'])
-        self._outdegree = int(param['outdegree'])
-        self._indegree = int(param['indegree'])
+        metrics = {"euclidean": "2", "angular": "E"}
+        self._edge_size = int(param["edge"])
+        self._outdegree = int(param["outdegree"])
+        self._indegree = int(param["indegree"])
         self._metric = metrics[metric]
         self._object_type = object_type
-        self._edge_size_for_search = int(param['search_edge']) if 'search_edge' in param.keys() else 0
-        self._tree_disabled = (param['tree'] is False) if 'tree' in param.keys() else False
-        self._refine_enabled = (param['refine'] is True) if 'refine' in param.keys() else False
+        self._edge_size_for_search = int(param["search_edge"]) if "search_edge" in param.keys() else 0
+        self._tree_disabled = (param["tree"] is False) if "tree" in param.keys() else False
+        self._refine_enabled = (param["refine"] is True) if "refine" in param.keys() else False
         self._build_time_limit = 4
         self._epsilon = epsilon
-        print('ONNG: edge_size=' + str(self._edge_size))
-        print('ONNG: outdegree=' + str(self._outdegree))
-        print('ONNG: indegree=' + str(self._indegree))
-        print('ONNG: edge_size_for_search=' + str(self._edge_size_for_search))
-        print('ONNG: epsilon=' + str(self._epsilon))
-        print('ONNG: metric=' + metric)
-        print('ONNG: object_type=' + object_type)
+        print("ONNG: edge_size=" + str(self._edge_size))
+        print("ONNG: outdegree=" + str(self._outdegree))
+        print("ONNG: indegree=" + str(self._indegree))
+        print("ONNG: edge_size_for_search=" + str(self._edge_size_for_search))
+        print("ONNG: epsilon=" + str(self._epsilon))
+        print("ONNG: metric=" + metric)
+        print("ONNG: object_type=" + object_type)
 
     def fit(self, X):
-        print('ONNG: start indexing...')
+        print("ONNG: start indexing...")
         dim = len(X[0])
-        print('ONNG: # of data=' + str(len(X)))
-        print('ONNG: dimensionality=' + str(dim))
-        index_dir = 'indexes'
+        print("ONNG: # of data=" + str(len(X)))
+        print("ONNG: dimensionality=" + str(dim))
+        index_dir = "indexes"
         if not os.path.exists(index_dir):
             os.makedirs(index_dir)
-        index = os.path.join(
-            index_dir,
-            'ONNG-{}-{}-{}'.format(self._edge_size, self._outdegree,
-                                   self._indegree))
-        anngIndex = os.path.join(index_dir, 'ANNG-' + str(self._edge_size))
-        print('ONNG: index=' + index)
+        index = os.path.join(index_dir, "ONNG-{}-{}-{}".format(self._edge_size, self._outdegree, self._indegree))
+        anngIndex = os.path.join(index_dir, "ANNG-" + str(self._edge_size))
+        print("ONNG: index=" + index)
         if (not os.path.exists(index)) and (not os.path.exists(anngIndex)):
-            print('ONNG: create ANNG')
+            print("ONNG: create ANNG")
             t = time.time()
-            args = ['ngt', 'create', '-it', '-p8', '-b500', '-ga', '-of',
-                    '-D' + self._metric, '-d' + str(dim),
-                    '-E' + str(self._edge_size),
-                    '-S' + str(self._edge_size_for_search),
-                    '-e' + str(self._epsilon), '-P0', '-B30',
-                    '-T' + str(self._build_time_limit), anngIndex]
+            args = [
+                "ngt",
+                "create",
+                "-it",
+                "-p8",
+                "-b500",
+                "-ga",
+                "-of",
+                "-D" + self._metric,
+                "-d" + str(dim),
+                "-E" + str(self._edge_size),
+                "-S" + str(self._edge_size_for_search),
+                "-e" + str(self._epsilon),
+                "-P0",
+                "-B30",
+                "-T" + str(self._build_time_limit),
+                anngIndex,
+            ]
             subprocess.call(args)
             idx = ngtpy.Index(path=anngIndex)
             idx.batch_insert(X, num_threads=24, debug=False)
-            print('ONNG: ANNG construction time(sec)=' + str(time.time() - t))
+            print("ONNG: ANNG construction time(sec)=" + str(time.time() - t))
             t = time.time()
             if self._refine_enabled:
-                idx.refine_anng(epsilon=self._epsilon, num_of_edges=self._edge_size,
-                                num_of_explored_edges=self._edge_size_for_search)
-            print('ONNG: RNNG construction time(sec)=' + str(time.time() - t))
+                idx.refine_anng(
+                    epsilon=self._epsilon,
+                    num_of_edges=self._edge_size,
+                    num_of_explored_edges=self._edge_size_for_search,
+                )
+            print("ONNG: RNNG construction time(sec)=" + str(time.time() - t))
             idx.save()
             idx.close()
         if not os.path.exists(index):
-            print('ONNG: degree adjustment')
+            print("ONNG: degree adjustment")
             t = time.time()
-            args = ['ngt', 'reconstruct-graph', '-mS',
-                    '-o ' + str(self._outdegree),
-                    '-i ' + str(self._indegree), anngIndex, index]
+            args = [
+                "ngt",
+                "reconstruct-graph",
+                "-mS",
+                "-o " + str(self._outdegree),
+                "-i " + str(self._indegree),
+                anngIndex,
+                index,
+            ]
             subprocess.call(args)
-            print('ONNG: degree adjustment time(sec)=' + str(time.time() - t))
+            print("ONNG: degree adjustment time(sec)=" + str(time.time() - t))
         if os.path.exists(index):
-            print('ONNG: index already exists! ' + str(index))
+            print("ONNG: index already exists! " + str(index))
             t = time.time()
             print(self._tree_disabled)
             self.index = ngtpy.Index(index, read_only=True, tree_disabled=self._tree_disabled)
             self.indexName = index
-            print('ONNG: open time(sec)=' + str(time.time() - t))
+            print("ONNG: open time(sec)=" + str(time.time() - t))
         else:
-            print('ONNG: something wrong.')
-        print('ONNG: end of fit')
+            print("ONNG: something wrong.")
+        print("ONNG: end of fit")
 
     def set_query_arguments(self, parameters):
         epsilon, edge_size = parameters
         print("ONNG: edge_size=" + str(edge_size))
         print("ONNG: epsilon=" + str(epsilon))
-        self.name = 'ONNG-NGT(%s, %s, %s, %s, %1.3f)' % (
-            self._edge_size, self._outdegree,
-            self._indegree, edge_size, epsilon)
+        self.name = "ONNG-NGT(%s, %s, %s, %s, %1.3f)" % (
+            self._edge_size,
+            self._outdegree,
+            self._indegree,
+            edge_size,
+            epsilon,
+        )
         epsilon = epsilon - 1.0
         self.index.set(epsilon=epsilon, edge_size=edge_size)
 
@@ -94,4 +116,4 @@ def query(self, v, n):
         return self.index.search(v, n, with_distance=False)
 
     def freeIndex(self):
-        print('ONNG: free')
+        print("ONNG: free")
diff --git a/ann_benchmarks/algorithms/opensearchknn.py b/ann_benchmarks/algorithms/opensearchknn.py
index f1e8ebf6b..2fe636ec5 100644
--- a/ann_benchmarks/algorithms/opensearchknn.py
+++ b/ann_benchmarks/algorithms/opensearchknn.py
@@ -13,31 +13,27 @@
 # Configure the logger.
 logging.getLogger("elasticsearch").setLevel(logging.WARN)
 
+
 class OpenSearchKNN(BaseANN):
     def __init__(self, metric, dimension, method_param):
         self.metric = {"angular": "cosinesimil", "euclidean": "l2"}[metric]
         self.dimension = dimension
         self.method_param = method_param
-        self.param_string = "-".join(k+"-"+str(v) for k,v in self.method_param.items()).lower()
+        self.param_string = "-".join(k + "-" + str(v) for k, v in self.method_param.items()).lower()
         self.name = f"os-{self.param_string}"
         self.es = Elasticsearch(["http://localhost:9200"])
         es_wait()
 
     def fit(self, X):
         body = {
-            "settings": {
-                "index": {"knn": True},
-                "number_of_shards": 1, 
-                "number_of_replicas": 0,
-                "refresh_interval": -1
-            }
+            "settings": {"index": {"knn": True}, "number_of_shards": 1, "number_of_replicas": 0, "refresh_interval": -1}
         }
 
         mapping = {
             "properties": {
                 "id": {"type": "keyword", "store": True},
                 "vec": {
-                    "type": "knn_vector", 
+                    "type": "knn_vector",
                     "dimension": self.dimension,
                     "method": {
                         "name": "hnsw",
@@ -45,61 +41,60 @@ def fit(self, X):
                         "engine": "nmslib",
                         "parameters": {
                             "ef_construction": self.method_param["efConstruction"],
-                            "m": self.method_param["M"]
-                        }
-                    }
-                }
+                            "m": self.method_param["M"],
+                        },
+                    },
+                },
             }
         }
-            
+
         self.es.indices.create(self.name, body=body)
         self.es.indices.put_mapping(mapping, self.name)
 
         print("Uploading data to the Index:", self.name)
+
         def gen():
             for i, vec in enumerate(tqdm(X)):
-                yield { "_op_type": "index", "_index": self.name, "vec": vec.tolist(), 'id': str(i + 1) }
+                yield {"_op_type": "index", "_index": self.name, "vec": vec.tolist(), "id": str(i + 1)}
 
         (_, errors) = bulk(self.es, gen(), chunk_size=500, max_retries=2, request_timeout=10)
         assert len(errors) == 0, errors
-          
+
         print("Force Merge...")
         self.es.indices.forcemerge(self.name, max_num_segments=1, request_timeout=1000)
-               
+
         print("Refreshing the Index...")
         self.es.indices.refresh(self.name, request_timeout=1000)
-       
+
         print("Running Warmup API...")
-        res = urlopen(Request("http://localhost:9200/_plugins/_knn/warmup/"+self.name+"?pretty"))
+        res = urlopen(Request("http://localhost:9200/_plugins/_knn/warmup/" + self.name + "?pretty"))
         print(res.read().decode("utf-8"))
 
     def set_query_arguments(self, ef):
-        body = {
-            "settings": {
-                "index": {"knn.algo_param.ef_search": ef}
-            }
-        }
+        body = {"settings": {"index": {"knn.algo_param.ef_search": ef}}}
         self.es.indices.put_settings(body=body)
 
     def query(self, q, n):
-        body = {
-            "query": {
-                "knn": {
-                    "vec": {"vector": q.tolist(), "k": n}
-                }
-            }
-        }
+        body = {"query": {"knn": {"vec": {"vector": q.tolist(), "k": n}}}}
+
+        res = self.es.search(
+            index=self.name,
+            body=body,
+            size=n,
+            _source=False,
+            docvalue_fields=["id"],
+            stored_fields="_none_",
+            filter_path=["hits.hits.fields.id"],
+            request_timeout=10,
+        )
 
-        res = self.es.search(index=self.name, body=body, size=n, _source=False, docvalue_fields=['id'],
-                             stored_fields="_none_", filter_path=["hits.hits.fields.id"], request_timeout=10)
-        
-        return [int(h['fields']['id'][0]) - 1 for h in res['hits']['hits']]
+        return [int(h["fields"]["id"][0]) - 1 for h in res["hits"]["hits"]]
 
     def batch_query(self, X, n):
         self.batch_res = [self.query(q, n) for q in X]
 
     def get_batch_results(self):
         return self.batch_res
-    
+
     def freeIndex(self):
-        self.es.indices.delete(index=self.name)
\ No newline at end of file
+        self.es.indices.delete(index=self.name)
diff --git a/ann_benchmarks/algorithms/panng_ngt.py b/ann_benchmarks/algorithms/panng_ngt.py
index e3f7bdadb..027305cad 100644
--- a/ann_benchmarks/algorithms/panng_ngt.py
+++ b/ann_benchmarks/algorithms/panng_ngt.py
@@ -8,69 +8,67 @@
 
 class PANNG(BaseANN):
     def __init__(self, metric, object_type, param):
-        metrics = {'euclidean': 'L2', 'angular': 'Cosine'}
-        self._edge_size = int(param['edge'])
-        self._pathadj_size = int(param['pathadj'])
-        self._edge_size_for_search = int(param['searchedge'])
+        metrics = {"euclidean": "L2", "angular": "Cosine"}
+        self._edge_size = int(param["edge"])
+        self._pathadj_size = int(param["pathadj"])
+        self._edge_size_for_search = int(param["searchedge"])
         self._metric = metrics[metric]
         self._object_type = object_type
-        print('PANNG: edge_size=' + str(self._edge_size))
-        print('PANNG: pathadj_size=' + str(self._pathadj_size))
-        print('PANNG: edge_size_for_search=' + str(self._edge_size_for_search))
-        print('PANNG: metric=' + metric)
-        print('PANNG: object_type=' + object_type)
+        print("PANNG: edge_size=" + str(self._edge_size))
+        print("PANNG: pathadj_size=" + str(self._pathadj_size))
+        print("PANNG: edge_size_for_search=" + str(self._edge_size_for_search))
+        print("PANNG: metric=" + metric)
+        print("PANNG: object_type=" + object_type)
 
     def fit(self, X):
-        print('PANNG: start indexing...')
+        print("PANNG: start indexing...")
         dim = len(X[0])
-        print('PANNG: # of data=' + str(len(X)))
-        print('PANNG: Dimensionality=' + str(dim))
-        index_dir = 'indexes'
+        print("PANNG: # of data=" + str(len(X)))
+        print("PANNG: Dimensionality=" + str(dim))
+        index_dir = "indexes"
         if not os.path.exists(index_dir):
             os.makedirs(index_dir)
-        index = os.path.join(
-            index_dir,
-            'PANNG-' + str(self._edge_size) + '-' + str(self._pathadj_size))
+        index = os.path.join(index_dir, "PANNG-" + str(self._edge_size) + "-" + str(self._pathadj_size))
         print(index)
         if os.path.exists(index):
-            print('PANNG: index already exists! ' + str(index))
+            print("PANNG: index already exists! " + str(index))
         else:
             t0 = time.time()
-            ngtpy.create(path=index, dimension=dim,
-                         edge_size_for_creation=self._edge_size,
-                         distance_type=self._metric,
-                         object_type=self._object_type)
+            ngtpy.create(
+                path=index,
+                dimension=dim,
+                edge_size_for_creation=self._edge_size,
+                distance_type=self._metric,
+                object_type=self._object_type,
+            )
             idx = ngtpy.Index(path=index)
             idx.batch_insert(X, num_threads=24, debug=False)
             idx.save()
             idx.close()
             if self._pathadj_size > 0:
-                print('PANNG: path adjustment')
-                args = ['ngt', 'prune', '-s ' + str(self._pathadj_size),
-                        index]
+                print("PANNG: path adjustment")
+                args = ["ngt", "prune", "-s " + str(self._pathadj_size), index]
                 subprocess.call(args)
             indexingtime = time.time() - t0
-            print('PANNG: indexing, adjustment and saving time(sec)={}'
-                  .format(indexingtime))
+            print("PANNG: indexing, adjustment and saving time(sec)={}".format(indexingtime))
         t0 = time.time()
         self.index = ngtpy.Index(path=index, read_only=True)
         opentime = time.time() - t0
-        print('PANNG: open time(sec)=' + str(opentime))
+        print("PANNG: open time(sec)=" + str(opentime))
 
     def set_query_arguments(self, epsilon):
         print("PANNG: epsilon=" + str(epsilon))
         self._epsilon = epsilon - 1.0
-        self.name = 'PANNG-NGT(%d, %d, %d, %1.3f)' % (
+        self.name = "PANNG-NGT(%d, %d, %d, %1.3f)" % (
             self._edge_size,
             self._pathadj_size,
             self._edge_size_for_search,
-            self._epsilon + 1.0)
+            self._epsilon + 1.0,
+        )
 
     def query(self, v, n):
-        results = self.index.search(
-            v, n, self._epsilon, self._edge_size_for_search,
-            with_distance=False)
+        results = self.index.search(v, n, self._epsilon, self._edge_size_for_search, with_distance=False)
         return results
 
     def freeIndex(self):
-        print('PANNG: free')
+        print("PANNG: free")
diff --git a/ann_benchmarks/algorithms/pgvector.py b/ann_benchmarks/algorithms/pgvector.py
index 2fa1a9a51..5649fbac7 100644
--- a/ann_benchmarks/algorithms/pgvector.py
+++ b/ann_benchmarks/algorithms/pgvector.py
@@ -6,6 +6,7 @@
 
 from ann_benchmarks.algorithms.base import BaseANN
 
+
 class PGVector(BaseANN):
     def __init__(self, metric, lists):
         self._metric = metric
@@ -24,9 +25,11 @@ def fit(self, X):
                 copy.write_row((i, embedding))
         print("creating index...")
         if self._metric == "angular":
-            cur.execute('CREATE INDEX ON items USING ivfflat (embedding vector_cosine_ops) WITH (lists = %d)' % self._lists)
+            cur.execute(
+                "CREATE INDEX ON items USING ivfflat (embedding vector_cosine_ops) WITH (lists = %d)" % self._lists
+            )
         elif self._metric == "euclidean":
-            cur.execute('CREATE INDEX ON items USING ivfflat (embedding vector_l2_ops) WITH (lists = %d)' % self._lists)
+            cur.execute("CREATE INDEX ON items USING ivfflat (embedding vector_l2_ops) WITH (lists = %d)" % self._lists)
         else:
             raise RuntimeError(f"unknown metric {self._metric}")
         print("done!")
diff --git a/ann_benchmarks/algorithms/puffinn.py b/ann_benchmarks/algorithms/puffinn.py
index 5f372abc3..fd58bd7c9 100644
--- a/ann_benchmarks/algorithms/puffinn.py
+++ b/ann_benchmarks/algorithms/puffinn.py
@@ -6,11 +6,11 @@
 from ann_benchmarks.algorithms.base import BaseANN
 import numpy
 
+
 class Puffinn(BaseANN):
-    def __init__(self, metric, space=10**6, hash_function="fht_crosspolytope", hash_source='pool', hash_args=None):
-        if metric not in ['jaccard', 'angular']:
-            raise NotImplementedError(
-                    "Puffinn doesn't support metric %s" % metric)
+    def __init__(self, metric, space=10**6, hash_function="fht_crosspolytope", hash_source="pool", hash_args=None):
+        if metric not in ["jaccard", "angular"]:
+            raise NotImplementedError("Puffinn doesn't support metric %s" % metric)
         self.metric = metric
         self.space = space
         self.hash_function = hash_function
@@ -18,20 +18,26 @@ def __init__(self, metric, space=10**6, hash_function="fht_crosspolytope", hash_
         self.hash_args = hash_args
 
     def fit(self, X):
-        if self.metric == 'angular':
+        if self.metric == "angular":
             dimensions = len(X[0])
         else:
             dimensions = 0
             for x in X:
-                dimensions = max(dimensions, max(x)+1)
+                dimensions = max(dimensions, max(x) + 1)
 
         if self.hash_args:
-            self.index = puffinn.Index(self.metric, dimensions, self.space,\
-                    hash_function=self.hash_function, hash_source=self.hash_source,\
-                    hash_args=self.hash_args)
+            self.index = puffinn.Index(
+                self.metric,
+                dimensions,
+                self.space,
+                hash_function=self.hash_function,
+                hash_source=self.hash_source,
+                hash_args=self.hash_args,
+            )
         else:
-            self.index = puffinn.Index(self.metric, dimensions, self.space,\
-                    hash_function=self.hash_function, hash_source=self.hash_source)
+            self.index = puffinn.Index(
+                self.metric, dimensions, self.space, hash_function=self.hash_function, hash_source=self.hash_source
+            )
         for i, x in enumerate(X):
             if self.metric == "jaccard" and x.dtype == np.bool_:
                 x = np.flatnonzero(x)
@@ -49,4 +55,9 @@ def query(self, v, n):
         return self.index.search(v, n, self.recall)
 
     def __str__(self):
-        return 'PUFFINN(space=%d, recall=%f, hf=%s, hashsource=%s)' % (self.space, self.recall, self.hash_function, self.hash_source)
+        return "PUFFINN(space=%d, recall=%f, hf=%s, hashsource=%s)" % (
+            self.space,
+            self.recall,
+            self.hash_function,
+            self.hash_source,
+        )
diff --git a/ann_benchmarks/algorithms/pynndescent.py b/ann_benchmarks/algorithms/pynndescent.py
index 92f470538..392e71c76 100644
--- a/ann_benchmarks/algorithms/pynndescent.py
+++ b/ann_benchmarks/algorithms/pynndescent.py
@@ -13,9 +13,7 @@ def __init__(self, metric, index_param_dict, n_search_trees=1):
             self._n_neighbors = 30
 
         if "pruning_degree_multiplier" in index_param_dict:
-            self._pruning_degree_multiplier = float(
-                index_param_dict["pruning_degree_multiplier"]
-            )
+            self._pruning_degree_multiplier = float(index_param_dict["pruning_degree_multiplier"])
         else:
             self._pruning_degree_multiplier = 1.5
 
@@ -98,13 +96,13 @@ def query(self, v, n):
             self._query_matrix.data = np.ones(size, dtype=np.float32)
             ind, dist = self._index.query(self._query_matrix, k=n, epsilon=self._epsilon)
         else:
-            ind, dist = self._index.query(
-                v.reshape(1, -1).astype("float32"), k=n, epsilon=self._epsilon
-            )
+            ind, dist = self._index.query(v.reshape(1, -1).astype("float32"), k=n, epsilon=self._epsilon)
         return ind[0]
 
     def __str__(self):
-        str_template = "PyNNDescent(n_neighbors=%d, pruning_mult=%.2f, diversify_prob=%.3f, epsilon=%.3f, leaf_size=%02d)"
+        str_template = (
+            "PyNNDescent(n_neighbors=%d, pruning_mult=%.2f, diversify_prob=%.3f, epsilon=%.3f, leaf_size=%02d)"
+        )
         return str_template % (
             self._n_neighbors,
             self._pruning_degree_multiplier,
diff --git a/ann_benchmarks/algorithms/qdrant.py b/ann_benchmarks/algorithms/qdrant.py
index a88028b10..1bf34edab 100644
--- a/ann_benchmarks/algorithms/qdrant.py
+++ b/ann_benchmarks/algorithms/qdrant.py
@@ -4,34 +4,29 @@
 import numpy as np
 from time import sleep
 
+
 class Qdrant(BaseANN):
-    
-    _distances_mapping = {
-        'dot': Distance.DOT,
-        'angular': Distance.COSINE,
-        'euclidean': Distance.EUCLID
-    }
+
+    _distances_mapping = {"dot": Distance.DOT, "angular": Distance.COSINE, "euclidean": Distance.EUCLID}
 
     def __init__(self, metric, grpc):
         self._metric = metric
-        self._collection_name = 'ann_benchmarks_test'
+        self._collection_name = "ann_benchmarks_test"
         self._grpc = grpc
-        self._search_params = {
-            'hnsw_ef': None
-        }
+        self._search_params = {"hnsw_ef": None}
 
         qdrant_client_params = {
-            'host': 'localhost',
-            'port': 6333,
-            'grpc_port': 6334,
-            'prefer_grpc': self._grpc,
-            'https': False,
-        }        
+            "host": "localhost",
+            "port": 6333,
+            "grpc_port": 6334,
+            "prefer_grpc": self._grpc,
+            "https": False,
+        }
         self._client = QdrantClient(**qdrant_client_params)
-        
 
     def fit(self, X):
-        if X.dtype != np.float32: X = X.astype(np.float32)
+        if X.dtype != np.float32:
+            X = X.astype(np.float32)
 
         self._client.recreate_collection(
             collection_name=self._collection_name,
@@ -40,63 +35,63 @@ def fit(self, X):
             # hnsw_config=qdrant_models.HnswConfigDiff(
             #     ef_construct=100, #100 is qdrant default
             #     m=16 #16 is qdrant default
-            # ),      
-            timeout=30            
+            # ),
+            timeout=30,
         )
 
         self._client.upload_collection(
-            collection_name=self._collection_name,
-            vectors=X,
-            ids=list(range(X.shape[0])),
-            parallel=1
+            collection_name=self._collection_name, vectors=X, ids=list(range(X.shape[0])), parallel=1
         )
 
-        #wait for vectors to be fully indexed
+        # wait for vectors to be fully indexed
         SECONDS_WAITING_FOR_INDEXING_API_CALL = 5
         while True:
-            collection_info = self._client.http.collections_api.get_collection(self._collection_name).dict()['result']
+            collection_info = self._client.http.collections_api.get_collection(self._collection_name).dict()["result"]
 
-            vectors_count = collection_info['vectors_count']
-            indexed_vectors_count = collection_info['indexed_vectors_count']
-            status = collection_info['status']
+            vectors_count = collection_info["vectors_count"]
+            indexed_vectors_count = collection_info["indexed_vectors_count"]
+            status = collection_info["status"]
+
+            print("Stored vectors: " + str(vectors_count))
+            print("Indexed vectors: " + str(indexed_vectors_count))
+            print("Collection status: " + str(status))
 
-            print('Stored vectors: ' + str(vectors_count))
-            print('Indexed vectors: ' + str(indexed_vectors_count))
-            print('Collection status: ' + str(status))
-            
             print(type(status), status)
             if status == CollectionStatus.GREEN:
-                print('Vectors indexing finished.')
+                print("Vectors indexing finished.")
                 break
             else:
-                print('Waiting ' + str(SECONDS_WAITING_FOR_INDEXING_API_CALL) + ' seconds to query collection info again...')
+                print(
+                    "Waiting "
+                    + str(SECONDS_WAITING_FOR_INDEXING_API_CALL)
+                    + " seconds to query collection info again..."
+                )
                 sleep(SECONDS_WAITING_FOR_INDEXING_API_CALL)
 
-
     def set_query_arguments(self, hnsw_ef):
-        self._search_params['hnsw_ef'] = hnsw_ef
+        self._search_params["hnsw_ef"] = hnsw_ef
 
     def query(self, q, n):
-        search_params = SearchParams(hnsw_ef=self._search_params['hnsw_ef'])
+        search_params = SearchParams(hnsw_ef=self._search_params["hnsw_ef"])
 
         search_result = self._client.search(
             collection_name=self._collection_name,
             query_vector=q,
             search_params=search_params,
-            with_payload=False, #just in case
-            limit=n
+            with_payload=False,  # just in case
+            limit=n,
         )
 
         result_ids = [point.id for point in search_result]
         return result_ids
 
     def batch_query(self, X, n):
-        search_queries = [SearchRequest(vector=q.tolist(), limit=n, params=SearchParams(hnsw_ef=self._search_params['hnsw_ef'])) for q in X]
+        search_queries = [
+            SearchRequest(vector=q.tolist(), limit=n, params=SearchParams(hnsw_ef=self._search_params["hnsw_ef"]))
+            for q in X
+        ]
 
-        batch_search_results = self._client.search_batch(
-            collection_name=self._collection_name,
-            requests=search_queries
-        )
+        batch_search_results = self._client.search_batch(collection_name=self._collection_name, requests=search_queries)
 
         self.batch_results = []
         for search_result in batch_search_results:
@@ -106,4 +101,4 @@ def get_batch_results(self):
         return self.batch_results
 
     def __str__(self):
-        return "Qdrant(grpc=%s, hnsw_ef=%s)" % (self._grpc, self._search_params['hnsw_ef'])
+        return "Qdrant(grpc=%s, hnsw_ef=%s)" % (self._grpc, self._search_params["hnsw_ef"])
diff --git a/ann_benchmarks/algorithms/qg_ngt.py b/ann_benchmarks/algorithms/qg_ngt.py
index b097d5793..ddd4b4707 100644
--- a/ann_benchmarks/algorithms/qg_ngt.py
+++ b/ann_benchmarks/algorithms/qg_ngt.py
@@ -5,98 +5,126 @@
 import time
 from ann_benchmarks.algorithms.base import BaseANN
 
+
 class QG(BaseANN):
     def __init__(self, metric, object_type, epsilon, param):
-        metrics = {'euclidean': '2', 'angular': 'E'}
-        self._edge_size = int(param['edge'])
-        self._outdegree = int(param['outdegree'])
-        self._indegree = int(param['indegree'])
-        self._max_edge_size = int(param['max_edge']) if 'max_edge' in param.keys() else 128
+        metrics = {"euclidean": "2", "angular": "E"}
+        self._edge_size = int(param["edge"])
+        self._outdegree = int(param["outdegree"])
+        self._indegree = int(param["indegree"])
+        self._max_edge_size = int(param["max_edge"]) if "max_edge" in param.keys() else 128
         self._metric = metrics[metric]
         self._object_type = object_type
-        self._edge_size_for_search = int(param['search_edge']) if 'search_edge' in param.keys() else -2
-        self._tree_disabled = (param['tree'] is False) if 'tree' in param.keys() else False
+        self._edge_size_for_search = int(param["search_edge"]) if "search_edge" in param.keys() else -2
+        self._tree_disabled = (param["tree"] is False) if "tree" in param.keys() else False
         self._build_time_limit = 4
         self._epsilon = epsilon
-        print('QG: edge_size=' + str(self._edge_size))
-        print('QG: outdegree=' + str(self._outdegree))
-        print('QG: indegree=' + str(self._indegree))
-        print('QG: edge_size_for_search=' + str(self._edge_size_for_search))
-        print('QG: epsilon=' + str(self._epsilon))
-        print('QG: metric=' + metric)
-        print('QG: object_type=' + object_type)
+        print("QG: edge_size=" + str(self._edge_size))
+        print("QG: outdegree=" + str(self._outdegree))
+        print("QG: indegree=" + str(self._indegree))
+        print("QG: edge_size_for_search=" + str(self._edge_size_for_search))
+        print("QG: epsilon=" + str(self._epsilon))
+        print("QG: metric=" + metric)
+        print("QG: object_type=" + object_type)
 
     def fit(self, X):
-        print('QG: start indexing...')
+        print("QG: start indexing...")
         dim = len(X[0])
-        print('QG: # of data=' + str(len(X)))
-        print('QG: dimensionality=' + str(dim))
-        index_dir = 'indexes'
+        print("QG: # of data=" + str(len(X)))
+        print("QG: dimensionality=" + str(dim))
+        index_dir = "indexes"
         if not os.path.exists(index_dir):
             os.makedirs(index_dir)
-        index = os.path.join(
-            index_dir,
-            'ONNG-{}-{}-{}'.format(self._edge_size, self._outdegree,
-                                   self._indegree))
-        anngIndex = os.path.join(index_dir, 'ANNG-' + str(self._edge_size))
-        print('QG: index=' + index)
+        index = os.path.join(index_dir, "ONNG-{}-{}-{}".format(self._edge_size, self._outdegree, self._indegree))
+        anngIndex = os.path.join(index_dir, "ANNG-" + str(self._edge_size))
+        print("QG: index=" + index)
         if (not os.path.exists(index)) and (not os.path.exists(anngIndex)):
-            print('QG: create ANNG')
+            print("QG: create ANNG")
             t = time.time()
-            args = ['ngt', 'create', '-it', '-p8', '-b500', '-ga', '-of',
-                    '-D' + self._metric, '-d' + str(dim),
-                    '-E' + str(self._edge_size), '-S40',
-                    '-e' + str(self._epsilon), '-P0', '-B30',
-                    '-T' + str(self._build_time_limit), anngIndex]
+            args = [
+                "ngt",
+                "create",
+                "-it",
+                "-p8",
+                "-b500",
+                "-ga",
+                "-of",
+                "-D" + self._metric,
+                "-d" + str(dim),
+                "-E" + str(self._edge_size),
+                "-S40",
+                "-e" + str(self._epsilon),
+                "-P0",
+                "-B30",
+                "-T" + str(self._build_time_limit),
+                anngIndex,
+            ]
             subprocess.call(args)
             idx = ngtpy.Index(path=anngIndex)
             idx.batch_insert(X, num_threads=24, debug=False)
             idx.save()
             idx.close()
-            print('QG: ANNG construction time(sec)=' + str(time.time() - t))
+            print("QG: ANNG construction time(sec)=" + str(time.time() - t))
         if not os.path.exists(index):
-            print('QG: degree adjustment')
+            print("QG: degree adjustment")
             t = time.time()
-            args = ['ngt', 'reconstruct-graph', '-mS',
-                    '-E ' + str(self._outdegree),
-                    '-o ' + str(self._outdegree),
-                    '-i ' + str(self._indegree), anngIndex, index]
+            args = [
+                "ngt",
+                "reconstruct-graph",
+                "-mS",
+                "-E " + str(self._outdegree),
+                "-o " + str(self._outdegree),
+                "-i " + str(self._indegree),
+                anngIndex,
+                index,
+            ]
             subprocess.call(args)
-            print('QG: degree adjustment time(sec)=' + str(time.time() - t))
-        if not os.path.exists(index + '/qg'):
-            print('QG:create and append...')
+            print("QG: degree adjustment time(sec)=" + str(time.time() - t))
+        if not os.path.exists(index + "/qg"):
+            print("QG:create and append...")
             t = time.time()
-            args = ['qbg', 'create-qg', index]
+            args = ["qbg", "create-qg", index]
             subprocess.call(args)
-            print('QG: create qg time(sec)=' + str(time.time() - t))
-            print('QB: build...')
+            print("QG: create qg time(sec)=" + str(time.time() - t))
+            print("QB: build...")
             t = time.time()
-            args = ['qbg', 'build-qg', '-o20000', '-M6', '-ib',
-                    '-I400', '-Gz', '-Pn',
-                    '-E' + str(self._max_edge_size),
-                    index]
+            args = [
+                "qbg",
+                "build-qg",
+                "-o20000",
+                "-M6",
+                "-ib",
+                "-I400",
+                "-Gz",
+                "-Pn",
+                "-E" + str(self._max_edge_size),
+                index,
+            ]
             subprocess.call(args)
-            print('QG: build qg time(sec)=' + str(time.time() - t))
-        if os.path.exists(index + '/qg/grp'):
-            print('QG: index already exists! ' + str(index))
+            print("QG: build qg time(sec)=" + str(time.time() - t))
+        if os.path.exists(index + "/qg/grp"):
+            print("QG: index already exists! " + str(index))
             t = time.time()
             self.index = ngtpy.QuantizedIndex(index, self._max_edge_size)
             self.index.set_with_distance(False)
             self.indexName = index
-            print('QG: open time(sec)=' + str(time.time() - t))
+            print("QG: open time(sec)=" + str(time.time() - t))
         else:
-            print('QG: something wrong.')
-        print('QG: end of fit')
+            print("QG: something wrong.")
+        print("QG: end of fit")
 
     def set_query_arguments(self, parameters):
         result_expansion, epsilon = parameters
         print("QG: result_expansion=" + str(result_expansion))
         print("QG: epsilon=" + str(epsilon))
-        self.name = 'QG-NGT(%s, %s, %s, %s, %s, %1.3f)' % (
-            self._edge_size, self._outdegree,
-            self._indegree, self._max_edge_size,
+        self.name = "QG-NGT(%s, %s, %s, %s, %s, %1.3f)" % (
+            self._edge_size,
+            self._outdegree,
+            self._indegree,
+            self._max_edge_size,
             epsilon,
-            result_expansion)
+            result_expansion,
+        )
         epsilon = epsilon - 1.0
         self.index.set(epsilon=epsilon, result_expansion=result_expansion)
 
@@ -104,4 +132,4 @@ def query(self, v, n):
         return self.index.search(v, n)
 
     def freeIndex(self):
-        print('QG: free')
+        print("QG: free")
diff --git a/ann_benchmarks/algorithms/qsg_ngt.py b/ann_benchmarks/algorithms/qsg_ngt.py
index 8ed27f1a9..700fb4a35 100644
--- a/ann_benchmarks/algorithms/qsg_ngt.py
+++ b/ann_benchmarks/algorithms/qsg_ngt.py
@@ -10,149 +10,214 @@
 
 class QSG(BaseANN):
     def __init__(self, metric, object_type, epsilon, param):
-        metrics = {'euclidean': '2', 'angular': 'E'}
-        self._edge_size = int(param['edge'])
-        self._outdegree = int(param['outdegree'])
-        self._indegree = int(param['indegree'])
-        self._max_edge_size = int(param['max_edge']) if 'max_edge' in param.keys() else 128
+        metrics = {"euclidean": "2", "angular": "E"}
+        self._edge_size = int(param["edge"])
+        self._outdegree = int(param["outdegree"])
+        self._indegree = int(param["indegree"])
+        self._max_edge_size = int(param["max_edge"]) if "max_edge" in param.keys() else 128
         self._metric = metrics[metric]
         self._object_type = object_type
-        self._edge_size_for_search = int(param['search_edge']) if 'search_edge' in param.keys() else -2
-        self._tree_disabled = (param['tree'] is False) if 'tree' in param.keys() else False
+        self._edge_size_for_search = int(param["search_edge"]) if "search_edge" in param.keys() else -2
+        self._tree_disabled = (param["tree"] is False) if "tree" in param.keys() else False
         self._build_time_limit = 4
         self._epsilon = epsilon
-        self._paramE = param['paramE']
-        self._paramS = param['paramS']
-        self._range =  int(param['range'])
-        self._threshold =  int(param['threshold'])
-        self._rangeMax =  int(param['rangeMax'])
-        self._searchA =  int(param['searchA'])
-        self._ifES =  int(param['ifES'])
-        print('QSG: edge_size=' + str(self._edge_size))
-        print('QSG: outdegree=' + str(self._outdegree))
-        print('QSG: indegree=' + str(self._indegree))
-        print('QSG: edge_size_for_search=' + str(self._edge_size_for_search))
-        print('QSG: epsilon=' + str(self._epsilon))
-        print('QSG: metric=' + metric)
-        print('QSG: object_type=' + object_type)
-        print('QG: range=' +str(self._range))
-        print('QG: threshold=' + str(self._threshold))
+        self._paramE = param["paramE"]
+        self._paramS = param["paramS"]
+        self._range = int(param["range"])
+        self._threshold = int(param["threshold"])
+        self._rangeMax = int(param["rangeMax"])
+        self._searchA = int(param["searchA"])
+        self._ifES = int(param["ifES"])
+        print("QSG: edge_size=" + str(self._edge_size))
+        print("QSG: outdegree=" + str(self._outdegree))
+        print("QSG: indegree=" + str(self._indegree))
+        print("QSG: edge_size_for_search=" + str(self._edge_size_for_search))
+        print("QSG: epsilon=" + str(self._epsilon))
+        print("QSG: metric=" + metric)
+        print("QSG: object_type=" + object_type)
+        print("QG: range=" + str(self._range))
+        print("QG: threshold=" + str(self._threshold))
 
     def fit(self, X):
-        print('QSG: start indexing...')
+        print("QSG: start indexing...")
         dim = len(X[0])
-        print('QSG: # of data=' + str(len(X)))
-        print('QSG: dimensionality=' + str(dim))
-        index_dir = 'indexes'
+        print("QSG: # of data=" + str(len(X)))
+        print("QSG: dimensionality=" + str(dim))
+        index_dir = "indexes"
         if not os.path.exists(index_dir):
             os.makedirs(index_dir)
-        index = os.path.join(
-            index_dir,
-            'ONNG-{}-{}-{}'.format(self._edge_size, self._outdegree,
-                                   self._indegree))
-        anngIndex = os.path.join(index_dir, 'ANNG-' + str(self._edge_size))
-        print('QSG: index=' + index)
+        index = os.path.join(index_dir, "ONNG-{}-{}-{}".format(self._edge_size, self._outdegree, self._indegree))
+        anngIndex = os.path.join(index_dir, "ANNG-" + str(self._edge_size))
+        print("QSG: index=" + index)
         if (not os.path.exists(index)) and (not os.path.exists(anngIndex)):
-            print('QSG: create ANNG')
+            print("QSG: create ANNG")
             t = time.time()
-            args = ['ngt', 'create', '-it', '-p8', '-b500', '-ga', '-of',
-                    '-D' + self._metric, '-d' + str(dim),
-                    '-E' + str(self._edge_size), '-S40',
-                    '-e' + str(self._epsilon), '-P0', '-B30',
-                    '-T' + str(self._build_time_limit),'-R' + str(self._range), '-t' + str(self._threshold),'-M' + str(self._rangeMax),'-A' + str(self._searchA),'-H' + str(self._ifES), anngIndex]
+            args = [
+                "ngt",
+                "create",
+                "-it",
+                "-p8",
+                "-b500",
+                "-ga",
+                "-of",
+                "-D" + self._metric,
+                "-d" + str(dim),
+                "-E" + str(self._edge_size),
+                "-S40",
+                "-e" + str(self._epsilon),
+                "-P0",
+                "-B30",
+                "-T" + str(self._build_time_limit),
+                "-R" + str(self._range),
+                "-t" + str(self._threshold),
+                "-M" + str(self._rangeMax),
+                "-A" + str(self._searchA),
+                "-H" + str(self._ifES),
+                anngIndex,
+            ]
             subprocess.call(args)
             idx = ngtpy.Index(path=anngIndex)
             idx.batch_insert(X, num_threads=24, debug=False)
             idx.save()
             idx.close()
-            print('QSG: ANNG construction time(sec)=' + str(time.time() - t))
+            print("QSG: ANNG construction time(sec)=" + str(time.time() - t))
         if self._ifES == 1:
-            if self._metric == 'E':
-                X_normalized = preprocessing.normalize(X, norm='l2')
-                fvecs_dir = 'fvecs'
+            if self._metric == "E":
+                X_normalized = preprocessing.normalize(X, norm="l2")
+                fvecs_dir = "fvecs"
                 if not os.path.exists(fvecs_dir):
                     os.makedirs(fvecs_dir)
-                fvecs = os.path.join(fvecs_dir, 'base.fvecs')
-                with open(fvecs, 'wb') as fp:
+                fvecs = os.path.join(fvecs_dir, "base.fvecs")
+                with open(fvecs, "wb") as fp:
                     for y in X_normalized:
-                        d = struct.pack('I', y.size)
+                        d = struct.pack("I", y.size)
                         fp.write(d)
                         for x in y:
-                            a = struct.pack('f', x)
+                            a = struct.pack("f", x)
                             fp.write(a)
             else:
-                fvecs_dir = 'fvecs'
+                fvecs_dir = "fvecs"
                 if not os.path.exists(fvecs_dir):
                     os.makedirs(fvecs_dir)
-                fvecs = os.path.join(fvecs_dir, 'base.fvecs')
-                with open(fvecs, 'wb') as fp:
+                fvecs = os.path.join(fvecs_dir, "base.fvecs")
+                with open(fvecs, "wb") as fp:
                     for y in X:
-                        d = struct.pack('I', y.size)
+                        d = struct.pack("I", y.size)
                         fp.write(d)
                         for x in y:
-                            a = struct.pack('f', x)
+                            a = struct.pack("f", x)
                             fp.write(a)
             parmEfanna = self._paramE
             parmSSG = self._paramS
-            graph_dir = 'graph'
+            graph_dir = "graph"
             if not os.path.exists(graph_dir):
                 os.makedirs(graph_dir)
-            KNNG = os.path.join(graph_dir, 'KNNG-' + str(parmEfanna[0]) + '-' + str(parmEfanna[1]) + '-' + str(
-                parmEfanna[2]) + '-' + str(parmEfanna[3]) + '-' + str(parmEfanna[4]) + '.graph')
-            SG = os.path.join(anngIndex, 'grp')
-            cmds = '/home/app/hwtl_sdu-anns-qsgngtlib/qsgngt-knng ' + str(fvecs) + ' ' + str(KNNG) + ' ' + str(
-                parmEfanna[0]) + ' ' + str(parmEfanna[1]) + ' ' + str(parmEfanna[2]) + ' ' + str(
-                parmEfanna[3]) + ' ' + str(
-                parmEfanna[4]) + \
-                   '&& /home/app/hwtl_sdu-anns-qsgngtlib/qsgngt-SpaceGraph ' + str(fvecs) + ' ' + str(KNNG) + ' ' + str(
-                parmSSG[0]) + ' ' + str(parmSSG[1]) + ' ' + str(parmSSG[2]) + ' ' + str(SG)
+            KNNG = os.path.join(
+                graph_dir,
+                "KNNG-"
+                + str(parmEfanna[0])
+                + "-"
+                + str(parmEfanna[1])
+                + "-"
+                + str(parmEfanna[2])
+                + "-"
+                + str(parmEfanna[3])
+                + "-"
+                + str(parmEfanna[4])
+                + ".graph",
+            )
+            SG = os.path.join(anngIndex, "grp")
+            cmds = (
+                "/home/app/hwtl_sdu-anns-qsgngtlib/qsgngt-knng "
+                + str(fvecs)
+                + " "
+                + str(KNNG)
+                + " "
+                + str(parmEfanna[0])
+                + " "
+                + str(parmEfanna[1])
+                + " "
+                + str(parmEfanna[2])
+                + " "
+                + str(parmEfanna[3])
+                + " "
+                + str(parmEfanna[4])
+                + "&& /home/app/hwtl_sdu-anns-qsgngtlib/qsgngt-SpaceGraph "
+                + str(fvecs)
+                + " "
+                + str(KNNG)
+                + " "
+                + str(parmSSG[0])
+                + " "
+                + str(parmSSG[1])
+                + " "
+                + str(parmSSG[2])
+                + " "
+                + str(SG)
+            )
             os.system(cmds)
 
-
         if not os.path.exists(index):
-            print('QSG: degree adjustment')
+            print("QSG: degree adjustment")
             t = time.time()
-            args = ['ngt', 'reconstruct-graph', '-mS',
-                    '-E ' + str(self._outdegree),
-                    '-o ' + str(self._outdegree),
-                    '-i ' + str(self._indegree), anngIndex, index]
+            args = [
+                "ngt",
+                "reconstruct-graph",
+                "-mS",
+                "-E " + str(self._outdegree),
+                "-o " + str(self._outdegree),
+                "-i " + str(self._indegree),
+                anngIndex,
+                index,
+            ]
             subprocess.call(args)
-            print('QSG: degree adjustment time(sec)=' + str(time.time() - t))
-        if not os.path.exists(index + '/qg'):
-            print('QSG:create and append...')
+            print("QSG: degree adjustment time(sec)=" + str(time.time() - t))
+        if not os.path.exists(index + "/qg"):
+            print("QSG:create and append...")
             t = time.time()
-            args = ['qbg', 'create-qg', index]
+            args = ["qbg", "create-qg", index]
             subprocess.call(args)
-            print('QSG: create qg time(sec)=' + str(time.time() - t))
-            print('QB: build...')
+            print("QSG: create qg time(sec)=" + str(time.time() - t))
+            print("QB: build...")
             t = time.time()
-            args = ['qbg', 'build-qg', '-o20000', '-M6', '-ib',
-                    '-I400', '-Gz', '-Pn',
-                    '-E' + str(self._max_edge_size),
-                    index]
+            args = [
+                "qbg",
+                "build-qg",
+                "-o20000",
+                "-M6",
+                "-ib",
+                "-I400",
+                "-Gz",
+                "-Pn",
+                "-E" + str(self._max_edge_size),
+                index,
+            ]
             subprocess.call(args)
-            print('QSG: build qg time(sec)=' + str(time.time() - t))
-        if os.path.exists(index + '/qg/grp'):
-            print('QSG: index already exists! ' + str(index))
+            print("QSG: build qg time(sec)=" + str(time.time() - t))
+        if os.path.exists(index + "/qg/grp"):
+            print("QSG: index already exists! " + str(index))
             t = time.time()
             self.index = ngtpy.QuantizedIndex(index, self._max_edge_size)
             self.index.set_with_distance(False)
             self.indexName = index
-            print('QSG: open time(sec)=' + str(time.time() - t))
+            print("QSG: open time(sec)=" + str(time.time() - t))
         else:
-            print('QSG: something wrong.')
-        print('QSG: end of fit')
-        print('QSG:Successfully Build Index')
+            print("QSG: something wrong.")
+        print("QSG: end of fit")
+        print("QSG:Successfully Build Index")
 
     def set_query_arguments(self, parameters):
         result_expansion, epsilon = parameters
         print("QSG: result_expansion=" + str(result_expansion))
         print("QSG: epsilon=" + str(epsilon))
-        self.name = 'QSG-NGT(%s, %s, %s, %s, %s, %1.3f)' % (
-            self._edge_size, self._outdegree,
-            self._indegree, self._max_edge_size,
+        self.name = "QSG-NGT(%s, %s, %s, %s, %s, %1.3f)" % (
+            self._edge_size,
+            self._outdegree,
+            self._indegree,
+            self._max_edge_size,
             epsilon,
-            result_expansion)
+            result_expansion,
+        )
         epsilon = epsilon - 1.0
         self.index.set(epsilon=epsilon, result_expansion=result_expansion)
 
@@ -160,4 +225,4 @@ def query(self, v, n):
         return self.index.search(v, n)
 
     def freeIndex(self):
-        print('QSG: free')
+        print("QSG: free")
diff --git a/ann_benchmarks/algorithms/rpforest.py b/ann_benchmarks/algorithms/rpforest.py
index 063a614b8..199ac3cd8 100644
--- a/ann_benchmarks/algorithms/rpforest.py
+++ b/ann_benchmarks/algorithms/rpforest.py
@@ -6,7 +6,7 @@
 
 class RPForest(BaseANN):
     def __init__(self, leaf_size, n_trees):
-        self.name = 'RPForest(leaf_size=%d, n_trees=%d)' % (leaf_size, n_trees)
+        self.name = "RPForest(leaf_size=%d, n_trees=%d)" % (leaf_size, n_trees)
         self._model = rpforest.RPForest(leaf_size=leaf_size, no_trees=n_trees)
 
     def fit(self, X):
diff --git a/ann_benchmarks/algorithms/scann.py b/ann_benchmarks/algorithms/scann.py
index d2ceb0db2..0050bb84e 100644
--- a/ann_benchmarks/algorithms/scann.py
+++ b/ann_benchmarks/algorithms/scann.py
@@ -3,31 +3,35 @@
 import scann
 from ann_benchmarks.algorithms.base import BaseANN
 
-class Scann(BaseANN):
 
-  def __init__(self, n_leaves, avq_threshold, dims_per_block, dist):
-    self.name = "scann n_leaves={} avq_threshold={:.02f} dims_per_block={}".format(
-            n_leaves, avq_threshold, dims_per_block)
-    self.n_leaves = n_leaves
-    self.avq_threshold = avq_threshold
-    self.dims_per_block = dims_per_block
-    self.dist = dist
+class Scann(BaseANN):
+    def __init__(self, n_leaves, avq_threshold, dims_per_block, dist):
+        self.name = "scann n_leaves={} avq_threshold={:.02f} dims_per_block={}".format(
+            n_leaves, avq_threshold, dims_per_block
+        )
+        self.n_leaves = n_leaves
+        self.avq_threshold = avq_threshold
+        self.dims_per_block = dims_per_block
+        self.dist = dist
 
-  def fit(self, X):
-    if self.dist == "dot_product":
-      spherical = True
-      X[np.linalg.norm(X, axis=1) == 0] = 1.0 / np.sqrt(X.shape[1])
-      X /= np.linalg.norm(X, axis=1)[:, np.newaxis]
-    else:
-      spherical = False
+    def fit(self, X):
+        if self.dist == "dot_product":
+            spherical = True
+            X[np.linalg.norm(X, axis=1) == 0] = 1.0 / np.sqrt(X.shape[1])
+            X /= np.linalg.norm(X, axis=1)[:, np.newaxis]
+        else:
+            spherical = False
 
-    self.searcher = scann.scann_ops_pybind.builder(X, 10, self.dist).tree(
-        self.n_leaves, 1, training_sample_size=len(X), spherical=spherical, quantize_centroids=True).score_ah(
-            self.dims_per_block, anisotropic_quantization_threshold=self.avq_threshold).reorder(
-                1).build()
+        self.searcher = (
+            scann.scann_ops_pybind.builder(X, 10, self.dist)
+            .tree(self.n_leaves, 1, training_sample_size=len(X), spherical=spherical, quantize_centroids=True)
+            .score_ah(self.dims_per_block, anisotropic_quantization_threshold=self.avq_threshold)
+            .reorder(1)
+            .build()
+        )
 
-  def set_query_arguments(self, leaves_reorder):
-      self.leaves_to_search, self.reorder = leaves_reorder
+    def set_query_arguments(self, leaves_reorder):
+        self.leaves_to_search, self.reorder = leaves_reorder
 
-  def query(self, v, n):
-    return self.searcher.search(v, n, self.reorder, self.leaves_to_search)[0]
+    def query(self, v, n):
+        return self.searcher.search(v, n, self.reorder, self.leaves_to_search)[0]
diff --git a/ann_benchmarks/algorithms/sptag.py b/ann_benchmarks/algorithms/sptag.py
index be1f82fb0..399774d3d 100644
--- a/ann_benchmarks/algorithms/sptag.py
+++ b/ann_benchmarks/algorithms/sptag.py
@@ -6,12 +6,11 @@
 class Sptag(BaseANN):
     def __init__(self, metric, algo):
         self._algo = str(algo)
-        self._metric = {
-            'angular': 'Cosine', 'euclidean': 'L2'}[metric]
+        self._metric = {"angular": "Cosine", "euclidean": "L2"}[metric]
 
     def fit(self, X):
-        self._sptag = SPTAG.AnnIndex(self._algo, 'Float', X.shape[1])
-        self._sptag.SetBuildParam("NumberOfThreads", '32', "Index")
+        self._sptag = SPTAG.AnnIndex(self._algo, "Float", X.shape[1])
+        self._sptag.SetBuildParam("NumberOfThreads", "32", "Index")
         self._sptag.SetBuildParam("DistCalcMethod", self._metric, "Index")
         self._sptag.Build(X, X.shape[0], False)
 
@@ -23,6 +22,4 @@ def query(self, v, k):
         return self._sptag.Search(v, k)[0]
 
     def __str__(self):
-        return 'Sptag(metric=%s, algo=%s, check=%d)' % (self._metric,
-                                              self._algo, self._maxCheck)
-
+        return "Sptag(metric=%s, algo=%s, check=%d)" % (self._metric, self._algo, self._maxCheck)
diff --git a/ann_benchmarks/algorithms/subprocess.py b/ann_benchmarks/algorithms/subprocess.py
index 22d4728e7..61aa5ae82 100644
--- a/ann_benchmarks/algorithms/subprocess.py
+++ b/ann_benchmarks/algorithms/subprocess.py
@@ -4,8 +4,7 @@
 from types import MethodType
 import psutil
 import subprocess
-from ann_benchmarks.data import \
-    bit_unparse_entry, int_unparse_entry, float_unparse_entry
+from ann_benchmarks.data import bit_unparse_entry, int_unparse_entry, float_unparse_entry
 from ann_benchmarks.algorithms.base import BaseANN
 
 
@@ -17,12 +16,11 @@ def __init__(self, code):
 
 class Subprocess(BaseANN):
     def _raw_line(self):
-        return shlex.split(
-            self._get_program_handle().stdout.readline().strip())
+        return shlex.split(self._get_program_handle().stdout.readline().strip())
 
     def _line(self):
         line = self._raw_line()
-#       print("<- %s" % (" ".join(line)))
+        #       print("<- %s" % (" ".join(line)))
         while len(line) < 1 or line[0] != "epbprtv0":
             line = self._raw_line()
         return line[1:]
@@ -50,23 +48,29 @@ def _get_program_handle(self):
                 bufsize=1,  # line buffering
                 stdin=subprocess.PIPE,
                 stdout=subprocess.PIPE,
-                universal_newlines=True)
+                universal_newlines=True,
+            )
 
             for key, value in iter(self._params.items()):
-                self._write("%s %s" %
-                            (Subprocess._quote(key), Subprocess._quote(value)))
-                assert self._line()[0] == "ok", """\
-assigning value '%s' to option '%s' failed""" % (value, key)
+                self._write("%s %s" % (Subprocess._quote(key), Subprocess._quote(value)))
+                assert (
+                    self._line()[0] == "ok"
+                ), """\
+assigning value '%s' to option '%s' failed""" % (
+                    value,
+                    key,
+                )
             self._configuration_hook()
 
             self._write("")
-            assert self._line()[0] == "ok", """\
+            assert (
+                self._line()[0] == "ok"
+            ), """\
 transitioning to training mode failed"""
         return self._program
 
     def __init__(self, args, encoder, params):
-        self.name = "Subprocess(program = %s, %s)" % \
-            (basename(args[0]), str(params))
+        self.name = "Subprocess(program = %s, %s)" % (basename(args[0]), str(params))
         self._program = None
         self._args = args
         self._encoder = encoder
@@ -81,10 +85,15 @@ def fit(self, X):
         for entry in X:
             d = Subprocess._quote(self._encoder(entry))
             self._write(d)
-            assert self._line()[0] == "ok", """\
-encoded training point '%s' was rejected""" % d
+            assert self._line()[0] == "ok", (
+                """\
+encoded training point '%s' was rejected"""
+                % d
+            )
         self._write("")
-        assert self._line()[0] == "ok", """\
+        assert (
+            self._line()[0] == "ok"
+        ), """\
 transitioning to query mode failed"""
 
     def query(self, v, n):
@@ -98,7 +107,9 @@ def _handle_query_response(self):
             count = int(status[1])
             return self._collect_query_response_lines(count)
         else:
-            assert status[0] == "fail", """\
+            assert (
+                status[0] == "fail"
+            ), """\
 query neither succeeded nor failed"""
             return []
 
@@ -125,7 +136,9 @@ def __init__(self, args, encoder, params):
 
     def _configuration_hook(self):
         self._write("frontend prepared-queries 1")
-        assert self._line()[0] == "ok", """\
+        assert (
+            self._line()[0] == "ok"
+        ), """\
 enabling prepared queries mode failed"""
 
     def query(self, v, n):
@@ -136,8 +149,11 @@ def query(self, v, n):
     def prepare_query(self, v, n):
         d = Subprocess._quote(self._encoder(v))
         self._write("%s %d" % (d, n))
-        assert self._line()[0] == "ok", """\
-preparing the query '%s' failed""" % d
+        assert self._line()[0] == "ok", (
+            """\
+preparing the query '%s' failed"""
+            % d
+        )
 
     def run_prepared_query(self):
         self._write("query")
@@ -145,7 +161,9 @@ def run_prepared_query(self):
         if status[0] == "ok":
             self._result_count = int(status[1])
         else:
-            assert status[0] == "fail", """\
+            assert (
+                status[0] == "fail"
+            ), """\
 query neither succeeded nor failed"""
             self._result_count = 0
 
@@ -166,7 +184,9 @@ def __init__(self, args, encoder, params):
 
     def _configuration_hook(self):
         self._write("frontend batch-queries 1")
-        assert self._line()[0] == "ok", """\
+        assert (
+            self._line()[0] == "ok"
+        ), """\
 enabling batch queries mode failed"""
 
     def query(self, v, n):
@@ -178,13 +198,18 @@ def prepare_batch_query(self, X, n):
         d = " ".join(map(lambda p: Subprocess._quote(self._encoder(p)), X))
         self._qp_count = len(X)
         self._write("%s %d" % (d, n))
-        assert self._line()[0] == "ok", """\
-preparing the batch query '%s' failed""" % d
+        assert self._line()[0] == "ok", (
+            """\
+preparing the batch query '%s' failed"""
+            % d
+        )
 
     def run_batch_query(self):
         self._write("query")
         status = self._line()
-        assert status[0] == "ok", """\
+        assert (
+            status[0] == "ok"
+        ), """\
 batch query failed completely"""
 
     def get_batch_results(self):
@@ -232,15 +257,20 @@ def QueryParamWrapper(constructor, args, params):
     def _do(self, original=r._configuration_hook):
         original()
         self._write("frontend query-parameters 1")
-        assert self._line()[0] == "ok", """\
+        assert (
+            self._line()[0] == "ok"
+        ), """\
 enabling query parameter support failed"""
+
     r._configuration_hook = MethodType(_do, r)
 
     def _sqa(self, *args):
-        self._write("query-params %s set" %
-                    (" ".join(map(Subprocess._quote, args))))
-        assert self._line()[0] == "ok", """\
+        self._write("query-params %s set" % (" ".join(map(Subprocess._quote, args))))
+        assert (
+            self._line()[0] == "ok"
+        ), """\
 reconfiguring query parameters failed"""
         print(args)
+
     r.set_query_arguments = MethodType(_sqa, r)
     return r
diff --git a/ann_benchmarks/algorithms/vald.py b/ann_benchmarks/algorithms/vald.py
index eb5660bac..2db79b305 100644
--- a/ann_benchmarks/algorithms/vald.py
+++ b/ann_benchmarks/algorithms/vald.py
@@ -15,106 +15,102 @@
 
 
 default_server_config = {
-    'version': 'v0.0.0',
-    'logging': {
-        'logger': 'nop',
-        'level': 'fatal',
-        'format': 'raw'
-    },
-    'server_config': {
-        'servers': [
+    "version": "v0.0.0",
+    "logging": {"logger": "nop", "level": "fatal", "format": "raw"},
+    "server_config": {
+        "servers": [
             {
-                'name': 'agent-grpc',
-                'host': '127.0.0.1',
-                'port': 8082,
-                'mode': 'GRPC',
-                'probe_wait_time': '3s',
+                "name": "agent-grpc",
+                "host": "127.0.0.1",
+                "port": 8082,
+                "mode": "GRPC",
+                "probe_wait_time": "3s",
                 #'grpc': {
                 #    'bidirectional_stream_concurrency': 1
-                #},
+                # },
                 "network": "unix",
-                "socket_path": "/var/run/vald.sock"
+                "socket_path": "/var/run/vald.sock",
             }
         ],
-        'health_check_servers': [
+        "health_check_servers": [
             {
-                'name': 'readiness',
-                'host': '127.0.0.1',
-                'port': 3001,
-                'mode': '',
-                'probe_wait_time': '3s',
-                'http': {
-                    'shutdown_duration': '5s',
-                    'handler_timeout': '',
-                    'idle_timeout': '',
-                    'read_header_timeout': '',
-                    'read_timeout': '',
-                    'write_timeout': ''
-                }
+                "name": "readiness",
+                "host": "127.0.0.1",
+                "port": 3001,
+                "mode": "",
+                "probe_wait_time": "3s",
+                "http": {
+                    "shutdown_duration": "5s",
+                    "handler_timeout": "",
+                    "idle_timeout": "",
+                    "read_header_timeout": "",
+                    "read_timeout": "",
+                    "write_timeout": "",
+                },
             }
         ],
-        'startup_strategy': ['agent-grpc', 'readiness'],
-        'shutdown_strategy': ['readiness', 'agent-grpc'],
-        'full_shutdown_duration': '600s',
-        'tls': {
-            'enabled': False,
-        }
+        "startup_strategy": ["agent-grpc", "readiness"],
+        "shutdown_strategy": ["readiness", "agent-grpc"],
+        "full_shutdown_duration": "600s",
+        "tls": {
+            "enabled": False,
+        },
     },
-    'ngt': {
-        'enable_in_memory_mode': True,
-        'default_pool_size': 10000,
-        'default_epsilon': 0.01,
-        'default_radius': -1.0,
+    "ngt": {
+        "enable_in_memory_mode": True,
+        "default_pool_size": 10000,
+        "default_epsilon": 0.01,
+        "default_radius": -1.0,
         #'vqueue': {
         #    'insert_buffer_size': 100,
         #    'insert_buffer_pool_size': 1000,
         #    'delete_buffer_size': 100,
         #    'delete_buffer_pool_size': 1000
-        #}
-    }
+        # }
+    },
 }
 
 grpc_opts = [
-    ('grpc.keepalive_time_ms', 1000 * 10),
-    ('grpc.keepalive_timeout_ms', 1000 * 10),
-    ('grpc.max_connection_idle_ms', 1000 * 50)
+    ("grpc.keepalive_time_ms", 1000 * 10),
+    ("grpc.keepalive_timeout_ms", 1000 * 10),
+    ("grpc.max_connection_idle_ms", 1000 * 50),
 ]
 
-metrics = {'euclidean': 'l2', 'angular': 'cosine'}
+metrics = {"euclidean": "l2", "angular": "cosine"}
 
 
 class Vald(BaseANN):
     def __init__(self, metric, object_type, params):
         self._param = default_server_config
         self._ngt_config = {
-            'distance_type': metrics[metric],
-            'object_type': object_type,
-            'search_edge_size': int(params['searchedge']),
-            'creation_edge_size': int(params['edge']),
-            'bulk_insert_chunk_size': int(params['bulk'])
+            "distance_type": metrics[metric],
+            "object_type": object_type,
+            "search_edge_size": int(params["searchedge"]),
+            "creation_edge_size": int(params["edge"]),
+            "bulk_insert_chunk_size": int(params["bulk"]),
         }
-        #self._address = 'localhost:8082'
-        self._address = 'unix:///var/run/vald.sock'
+        # self._address = 'localhost:8082'
+        self._address = "unix:///var/run/vald.sock"
 
     def fit(self, X):
         dim = len(X[0])
-        self._ngt_config['dimension'] = dim
-        self._param['ngt'].update(self._ngt_config)
-        with open('config.yaml', 'w') as f:
+        self._ngt_config["dimension"] = dim
+        self._param["ngt"].update(self._ngt_config)
+        with open("config.yaml", "w") as f:
             yaml.dump(self._param, f)
 
         cfg = payload_pb2.Insert.Config(skip_strict_exist_check=True)
         vectors = [
-            payload_pb2.Insert.Request(
-                vector=payload_pb2.Object.Vector(id=str(i), vector=x.tolist()),
-                config=cfg) for i, x in enumerate(X)]
+            payload_pb2.Insert.Request(vector=payload_pb2.Object.Vector(id=str(i), vector=x.tolist()), config=cfg)
+            for i, x in enumerate(X)
+        ]
 
-        p = subprocess.Popen(['/go/bin/ngt', '-f', 'config.yaml'])
+        p = subprocess.Popen(["/go/bin/ngt", "-f", "config.yaml"])
         atexit.register(lambda: p.kill())
 
         while True:
             try:
-                with urllib.request.urlopen('http://localhost:3001/readiness') as response:
+                with urllib.request.urlopen("http://localhost:3001/readiness") as response:
                     if response.getcode() == 200:
                         break
             except (urllib.error.HTTPError, urllib.error.URLError):
@@ -126,9 +122,7 @@ def fit(self, X):
             pass
 
         astub = agent_pb2_grpc.AgentStub(channel)
-        astub.CreateIndex(
-            payload_pb2.Control.CreateIndexRequest(
-                pool_size=10000))
+        astub.CreateIndex(payload_pb2.Control.CreateIndexRequest(pool_size=10000))
 
     def set_query_arguments(self, epsilon):
         self._epsilon = epsilon - 1.0
@@ -141,9 +135,9 @@ def query(self, v, n):
         return [int(result.id) for result in response.results]
 
     def __str__(self):
-        return 'Vald(%d, %d, %d, %1.3f)' % (
-            self._ngt_config['creation_edge_size'],
-            self._ngt_config['search_edge_size'],
-            self._ngt_config['bulk_insert_chunk_size'],
-            self._epsilon + 1.0
+        return "Vald(%d, %d, %d, %1.3f)" % (
+            self._ngt_config["creation_edge_size"],
+            self._ngt_config["search_edge_size"],
+            self._ngt_config["bulk_insert_chunk_size"],
+            self._epsilon + 1.0,
         )
diff --git a/ann_benchmarks/algorithms/vearch.py b/ann_benchmarks/algorithms/vearch.py
index bf16dbe5f..6115bdd95 100644
--- a/ann_benchmarks/algorithms/vearch.py
+++ b/ann_benchmarks/algorithms/vearch.py
@@ -20,6 +20,7 @@ def get_batch_results(self):
             res.append(single_ids.tolist())
         return res
 
+
 class VearchIndex(Vearch):
     def __init__(self, metric, nlist, ns_threshold, n_dims_block):
         self.nlist = nlist
@@ -29,9 +30,14 @@ def __init__(self, metric, nlist, ns_threshold, n_dims_block):
             self.metric = "InnerProduct"
         self.ns_threshold = ns_threshold
         self.n_dims_block = n_dims_block
-    
+
     def __str__(self):
-        return "VearchIndex(nlist=%d, n_dims_block=%d, nprobe=%d, rerank=%d)" % (self.nlist, self.n_dims_block, self.nprobe, self.rerank)
+        return "VearchIndex(nlist=%d, n_dims_block=%d, nprobe=%d, rerank=%d)" % (
+            self.nlist,
+            self.n_dims_block,
+            self.nprobe,
+            self.rerank,
+        )
 
     def fit(self, X):
         if X.dtype != np.float32:
@@ -39,38 +45,31 @@ def fit(self, X):
 
         if self.metric == "InnerProduct":
             X[np.linalg.norm(X, axis=1) == 0] = 1.0 / np.sqrt(X.shape[1])
-            X /= np.linalg.norm(X, axis=1)[:, np.newaxis] 
+            X /= np.linalg.norm(X, axis=1)[:, np.newaxis]
 
         d = X.shape[1]
         self.nsubvector = int(d / self.n_dims_block)
         self.engine = vearch.Engine("files", "logs")
         table = {
-            "name" : "test_table",
-            "engine" : {
+            "name": "test_table",
+            "engine": {
                 "index_size": X.shape[0],
-                "retrieval_type": "VEARCH",       
+                "retrieval_type": "VEARCH",
                 "retrieval_param": {
                     "metric_type": self.metric,
                     "ncentroids": self.nlist,
                     "nsubvector": self.nsubvector,
                     "reordering": True,
-                    "ns_threshold": self.ns_threshold
-                }
+                    "ns_threshold": self.ns_threshold,
+                },
             },
-            "properties" : {
-                "feature": {
-                    "type": "vector",
-                    "index": True,
-                    "dimension": d,
-                    "store_type": "Mmap"
-                }
-            }
+            "properties": {"feature": {"type": "vector", "index": True, "dimension": d, "store_type": "Mmap"}},
         }
         self.engine.create_table(table)
         self.engine.add2(X)
         indexed_num = 0
         while indexed_num != X.shape[0]:
-            indexed_num = self.engine.get_status()['min_indexed_num']
+            indexed_num = self.engine.get_status()["min_indexed_num"]
             time.sleep(0.5)
 
     def set_query_arguments(self, n_probe, k_rerank):
diff --git a/ann_benchmarks/algorithms/vespa.py b/ann_benchmarks/algorithms/vespa.py
index cf5ba3af7..23393f95a 100644
--- a/ann_benchmarks/algorithms/vespa.py
+++ b/ann_benchmarks/algorithms/vespa.py
@@ -8,10 +8,9 @@
 # see https://docs.vespa.ai/en/approximate-nn-hnsw.html for more details.
 class VespaHnswBase(BaseANN):
     def __init__(self, enable_normalize, metric, dimension, param):
-        if metric not in ('angular', 'euclidean'):
-            raise NotImplementedError(
-                "VespaHnsw doesn't support metric %s" % metric)
-        self.metric = {'angular': DistanceMetric.Angular, 'euclidean': DistanceMetric.Euclidean}[metric]
+        if metric not in ("angular", "euclidean"):
+            raise NotImplementedError("VespaHnsw doesn't support metric %s" % metric)
+        self.metric = {"angular": DistanceMetric.Angular, "euclidean": DistanceMetric.Euclidean}[metric]
         normalize = False
         if self.metric == DistanceMetric.Angular and enable_normalize:
             normalize = True
@@ -21,8 +20,12 @@ def __init__(self, enable_normalize, metric, dimension, param):
         self.max_links_per_node = param.get("M", 8)
         self.dimension = dimension
         self.neighbors_to_explore = 200
-        self.name = 'VespaHnsw()'
-        self.index = HnswIndex(dimension, HnswIndexParams(self.max_links_per_node, self.neighbors_to_explore_at_insert, self.metric, False), normalize)
+        self.name = "VespaHnsw()"
+        self.index = HnswIndex(
+            dimension,
+            HnswIndexParams(self.max_links_per_node, self.neighbors_to_explore_at_insert, self.metric, False),
+            normalize,
+        )
 
     def fit(self, X):
         for i, x in enumerate(X):
@@ -38,9 +41,10 @@ def query(self, v, n):
     def query_with_distances(self, v, n):
         return self.index.find_top_k(n, v, self.neighbors_to_explore)
 
+
 class VespaHnsw(VespaHnswBase):
     def __init__(self, metric, dimension, param):
         super().__init__(True, metric, dimension, param)
 
     def __str__(self):
-        return 'VespaHnsw ({}, ef: {})'.format(self.param, self.neighbors_to_explore)
+        return "VespaHnsw ({}, ef: {})".format(self.param, self.neighbors_to_explore)
diff --git a/ann_benchmarks/constants.py b/ann_benchmarks/constants.py
index 407200b6b..03b3c4a2b 100644
--- a/ann_benchmarks/constants.py
+++ b/ann_benchmarks/constants.py
@@ -1 +1 @@
-INDEX_DIR = 'indices'
+INDEX_DIR = "indices"
diff --git a/ann_benchmarks/data.py b/ann_benchmarks/data.py
index 14b47ed47..de18ad6d9 100644
--- a/ann_benchmarks/data.py
+++ b/ann_benchmarks/data.py
@@ -19,9 +19,7 @@ def int_unparse_entry(entry):
 
 
 def bit_parse_entry(line):
-    return [bool(int(x)) for x in list(line.strip()
-                                       .replace(" ", "")
-                                       .replace("\t", ""))]
+    return [bool(int(x)) for x in list(line.strip().replace(" ", "").replace("\t", ""))]
 
 
 def bit_unparse_entry(entry):
@@ -33,13 +31,9 @@ def bit_unparse_entry(entry):
         "type": numpy.float,
         "parse_entry": float_parse_entry,
         "unparse_entry": float_unparse_entry,
-        "finish_entries": numpy.vstack
-    },
-    "bit": {
-        "type": numpy.bool_,
-        "parse_entry": bit_parse_entry,
-        "unparse_entry": bit_unparse_entry
+        "finish_entries": numpy.vstack,
     },
+    "bit": {"type": numpy.bool_, "parse_entry": bit_parse_entry, "unparse_entry": bit_unparse_entry},
     "int": {
         "type": numpy.object,
         "parse_entry": int_parse_entry,
diff --git a/ann_benchmarks/datasets.py b/ann_benchmarks/datasets.py
index d2aa566cc..15cd9f875 100644
--- a/ann_benchmarks/datasets.py
+++ b/ann_benchmarks/datasets.py
@@ -7,35 +7,34 @@
 from urllib.request import urlretrieve
 
 
-
 def download(src, dst):
     if not os.path.exists(dst):
         # TODO: should be atomic
-        print('downloading %s -> %s...' % (src, dst))
+        print("downloading %s -> %s..." % (src, dst))
         urlretrieve(src, dst)
 
 
 def get_dataset_fn(dataset):
-    if not os.path.exists('data'):
-        os.mkdir('data')
-    return os.path.join('data', '%s.hdf5' % dataset)
+    if not os.path.exists("data"):
+        os.mkdir("data")
+    return os.path.join("data", "%s.hdf5" % dataset)
 
 
 def get_dataset(which):
     hdf5_fn = get_dataset_fn(which)
     try:
-        url = 'http://ann-benchmarks.com/%s.hdf5' % which
+        url = "http://ann-benchmarks.com/%s.hdf5" % which
         download(url, hdf5_fn)
     except:
         print("Cannot download %s" % url)
         if which in DATASETS:
             print("Creating dataset locally")
             DATASETS[which](hdf5_fn)
-    hdf5_f = h5py.File(hdf5_fn, 'r')
+    hdf5_f = h5py.File(hdf5_fn, "r")
 
     # here for backward compatibility, to ensure old datasets can still be used with newer versions
     # cast to integer because the json parser (later on) cannot interpret numpy integers
-    dimension = int(hdf5_f.attrs['dimension']) if 'dimension' in hdf5_f.attrs else len(hdf5_f['train'][0])
+    dimension = int(hdf5_f.attrs["dimension"]) if "dimension" in hdf5_f.attrs else len(hdf5_f["train"][0])
 
     return hdf5_f, dimension
 
@@ -45,45 +44,48 @@ def get_dataset(which):
 # just rely on the prepared datasets at http://ann-benchmarks.com
 
 
-def write_output(train, test, fn, distance, point_type='float', count=100):
+def write_output(train, test, fn, distance, point_type="float", count=100):
     from ann_benchmarks.algorithms.bruteforce import BruteForceBLAS
-    f = h5py.File(fn, 'w')
-    f.attrs['type'] = 'dense'
-    f.attrs['distance'] = distance
-    f.attrs['dimension'] = len(train[0])
-    f.attrs['point_type'] = point_type
-    print('train size: %9d * %4d' % train.shape)
-    print('test size:  %9d * %4d' % test.shape)
-    f.create_dataset('train', (len(train), len(
-        train[0])), dtype=train.dtype)[:] = train
-    f.create_dataset('test', (len(test), len(
-        test[0])), dtype=test.dtype)[:] = test
-    neighbors = f.create_dataset('neighbors', (len(test), count), dtype='i')
-    distances = f.create_dataset('distances', (len(test), count), dtype='f')
+
+    f = h5py.File(fn, "w")
+    f.attrs["type"] = "dense"
+    f.attrs["distance"] = distance
+    f.attrs["dimension"] = len(train[0])
+    f.attrs["point_type"] = point_type
+    print("train size: %9d * %4d" % train.shape)
+    print("test size:  %9d * %4d" % test.shape)
+    f.create_dataset("train", (len(train), len(train[0])), dtype=train.dtype)[:] = train
+    f.create_dataset("test", (len(test), len(test[0])), dtype=test.dtype)[:] = test
+    neighbors = f.create_dataset("neighbors", (len(test), count), dtype="i")
+    distances = f.create_dataset("distances", (len(test), count), dtype="f")
     bf = BruteForceBLAS(distance, precision=train.dtype)
 
     bf.fit(train)
     for i, x in enumerate(test):
         if i % 1000 == 0:
-            print('%d/%d...' % (i, len(test)))
+            print("%d/%d..." % (i, len(test)))
         res = list(bf.query_with_distances(x, count))
         res.sort(key=lambda t: t[-1])
         neighbors[i] = [j for j, _ in res]
         distances[i] = [d for _, d in res]
     f.close()
 
+
 """
 param: train and test are arrays of arrays of indices.
 """
+
+
 def write_sparse_output(train, test, fn, distance, dimension, count=100):
     from ann_benchmarks.algorithms.bruteforce import BruteForceBLAS
-    f = h5py.File(fn, 'w')
-    f.attrs['type'] = 'sparse'
-    f.attrs['distance'] = distance
-    f.attrs['dimension'] = dimension
-    f.attrs['point_type'] = 'bit'
-    print('train size: %9d * %4d' % (train.shape[0], dimension))
-    print('test size:  %9d * %4d' % (test.shape[0], dimension))
+
+    f = h5py.File(fn, "w")
+    f.attrs["type"] = "sparse"
+    f.attrs["distance"] = distance
+    f.attrs["dimension"] = dimension
+    f.attrs["point_type"] = "bit"
+    print("train size: %9d * %4d" % (train.shape[0], dimension))
+    print("test size:  %9d * %4d" % (test.shape[0], dimension))
 
     # We ensure the sets are sorted
     train = numpy.array(list(map(sorted, train)))
@@ -92,50 +94,50 @@ def write_sparse_output(train, test, fn, distance, dimension, count=100):
     flat_train = numpy.hstack(train.flatten())
     flat_test = numpy.hstack(test.flatten())
 
-    f.create_dataset('train', (len(flat_train),), dtype=flat_train.dtype)[:] = flat_train
-    f.create_dataset('test', (len(flat_test),), dtype=flat_test.dtype)[:] = flat_test
-    neighbors = f.create_dataset('neighbors', (len(test), count), dtype='i')
-    distances = f.create_dataset('distances', (len(test), count), dtype='f')
+    f.create_dataset("train", (len(flat_train),), dtype=flat_train.dtype)[:] = flat_train
+    f.create_dataset("test", (len(flat_test),), dtype=flat_test.dtype)[:] = flat_test
+    neighbors = f.create_dataset("neighbors", (len(test), count), dtype="i")
+    distances = f.create_dataset("distances", (len(test), count), dtype="f")
 
-    f.create_dataset('size_test', (len(test),), dtype='i')[:] = list(map(len, test))
-    f.create_dataset('size_train', (len(train),), dtype='i')[:] = list(map(len, train))
+    f.create_dataset("size_test", (len(test),), dtype="i")[:] = list(map(len, test))
+    f.create_dataset("size_train", (len(train),), dtype="i")[:] = list(map(len, train))
 
     bf = BruteForceBLAS(distance, precision=train.dtype)
     bf.fit(train)
     for i, x in enumerate(test):
         if i % 1000 == 0:
-            print('%d/%d...' % (i, len(test)))
+            print("%d/%d..." % (i, len(test)))
         res = list(bf.query_with_distances(x, count))
         res.sort(key=lambda t: t[-1])
         neighbors[i] = [j for j, _ in res]
         distances[i] = [d for _, d in res]
     f.close()
 
+
 def train_test_split(X, test_size=10000, dimension=None):
     import sklearn.model_selection
+
     if dimension is None:
         dimension = X.shape[1]
-    print('Splitting %d*%d into train/test' % (X.shape[0], dimension))
-    return sklearn.model_selection.train_test_split(
-        X, test_size=test_size, random_state=1)
+    print("Splitting %d*%d into train/test" % (X.shape[0], dimension))
+    return sklearn.model_selection.train_test_split(X, test_size=test_size, random_state=1)
 
 
 def glove(out_fn, d):
     import zipfile
 
-    url = 'http://nlp.stanford.edu/data/glove.twitter.27B.zip'
-    fn = os.path.join('data', 'glove.twitter.27B.zip')
+    url = "http://nlp.stanford.edu/data/glove.twitter.27B.zip"
+    fn = os.path.join("data", "glove.twitter.27B.zip")
     download(url, fn)
     with zipfile.ZipFile(fn) as z:
-        print('preparing %s' % out_fn)
-        z_fn = 'glove.twitter.27B.%dd.txt' % d
+        print("preparing %s" % out_fn)
+        z_fn = "glove.twitter.27B.%dd.txt" % d
         X = []
         for line in z.open(z_fn):
             v = [float(x) for x in line.strip().split()[1:]]
             X.append(numpy.array(v))
         X_train, X_test = train_test_split(X)
-        write_output(numpy.array(X_train), numpy.array(
-            X_test), out_fn, 'angular')
+        write_output(numpy.array(X_train), numpy.array(X_test), out_fn, "angular")
 
 
 def _load_texmex_vectors(f, n, k):
@@ -144,16 +146,17 @@ def _load_texmex_vectors(f, n, k):
     v = numpy.zeros((n, k))
     for i in range(n):
         f.read(4)  # ignore vec length
-        v[i] = struct.unpack('f' * k, f.read(k * 4))
+        v[i] = struct.unpack("f" * k, f.read(k * 4))
 
     return v
 
 
 def _get_irisa_matrix(t, fn):
     import struct
+
     m = t.getmember(fn)
     f = t.extractfile(m)
-    k, = struct.unpack('i', f.read(4))
+    (k,) = struct.unpack("i", f.read(4))
     n = m.size // (4 + 4 * k)
     f.seek(0)
     return _load_texmex_vectors(f, n, k)
@@ -162,32 +165,32 @@ def _get_irisa_matrix(t, fn):
 def sift(out_fn):
     import tarfile
 
-    url = 'ftp://ftp.irisa.fr/local/texmex/corpus/sift.tar.gz'
-    fn = os.path.join('data', 'sift.tar.tz')
+    url = "ftp://ftp.irisa.fr/local/texmex/corpus/sift.tar.gz"
+    fn = os.path.join("data", "sift.tar.tz")
     download(url, fn)
-    with tarfile.open(fn, 'r:gz') as t:
-        train = _get_irisa_matrix(t, 'sift/sift_base.fvecs')
-        test = _get_irisa_matrix(t, 'sift/sift_query.fvecs')
-        write_output(train, test, out_fn, 'euclidean')
+    with tarfile.open(fn, "r:gz") as t:
+        train = _get_irisa_matrix(t, "sift/sift_base.fvecs")
+        test = _get_irisa_matrix(t, "sift/sift_query.fvecs")
+        write_output(train, test, out_fn, "euclidean")
 
 
 def gist(out_fn):
     import tarfile
 
-    url = 'ftp://ftp.irisa.fr/local/texmex/corpus/gist.tar.gz'
-    fn = os.path.join('data', 'gist.tar.tz')
+    url = "ftp://ftp.irisa.fr/local/texmex/corpus/gist.tar.gz"
+    fn = os.path.join("data", "gist.tar.tz")
     download(url, fn)
-    with tarfile.open(fn, 'r:gz') as t:
-        train = _get_irisa_matrix(t, 'gist/gist_base.fvecs')
-        test = _get_irisa_matrix(t, 'gist/gist_query.fvecs')
-        write_output(train, test, out_fn, 'euclidean')
+    with tarfile.open(fn, "r:gz") as t:
+        train = _get_irisa_matrix(t, "gist/gist_base.fvecs")
+        test = _get_irisa_matrix(t, "gist/gist_query.fvecs")
+        write_output(train, test, out_fn, "euclidean")
 
 
 def _load_mnist_vectors(fn):
     import gzip
     import struct
 
-    print('parsing vectors in %s...' % fn)
+    print("parsing vectors in %s..." % fn)
     f = gzip.open(fn)
     type_code_info = {
         0x08: (1, "!B"),
@@ -195,14 +198,13 @@ def _load_mnist_vectors(fn):
         0x0B: (2, "!H"),
         0x0C: (4, "!I"),
         0x0D: (4, "!f"),
-        0x0E: (8, "!d")
+        0x0E: (8, "!d"),
     }
     magic, type_code, dim_count = struct.unpack("!hBB", f.read(4))
     assert magic == 0
     assert type_code in type_code_info
 
-    dimensions = [struct.unpack("!I", f.read(4))[0]
-                  for i in range(dim_count)]
+    dimensions = [struct.unpack("!I", f.read(4))[0] for i in range(dim_count)]
 
     entry_count = dimensions[0]
     entry_size = numpy.product(dimensions[1:])
@@ -210,41 +212,46 @@ def _load_mnist_vectors(fn):
     b, format_string = type_code_info[type_code]
     vectors = []
     for i in range(entry_count):
-        vectors.append([struct.unpack(format_string, f.read(b))[0]
-                        for j in range(entry_size)])
+        vectors.append([struct.unpack(format_string, f.read(b))[0] for j in range(entry_size)])
     return numpy.array(vectors)
 
 
 def mnist(out_fn):
-    download(
-        'http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz', 'mnist-train.gz')  # noqa
-    download(
-        'http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz', 'mnist-test.gz')  # noqa
-    train = _load_mnist_vectors('mnist-train.gz')
-    test = _load_mnist_vectors('mnist-test.gz')
-    write_output(train, test, out_fn, 'euclidean')
+    download("http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz", "mnist-train.gz")  # noqa
+    download("http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz", "mnist-test.gz")  # noqa
+    train = _load_mnist_vectors("mnist-train.gz")
+    test = _load_mnist_vectors("mnist-test.gz")
+    write_output(train, test, out_fn, "euclidean")
 
 
 def fashion_mnist(out_fn):
-    download('http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz',  # noqa
-             'fashion-mnist-train.gz')
-    download('http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz',  # noqa
-             'fashion-mnist-test.gz')
-    train = _load_mnist_vectors('fashion-mnist-train.gz')
-    test = _load_mnist_vectors('fashion-mnist-test.gz')
-    write_output(train, test, out_fn, 'euclidean')
+    download(
+        "http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz",  # noqa
+        "fashion-mnist-train.gz",
+    )
+    download(
+        "http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz",  # noqa
+        "fashion-mnist-test.gz",
+    )
+    train = _load_mnist_vectors("fashion-mnist-train.gz")
+    test = _load_mnist_vectors("fashion-mnist-test.gz")
+    write_output(train, test, out_fn, "euclidean")
+
 
 # Creates a 'deep image descriptor' dataset using the 'deep10M.fvecs' sample
 # from http://sites.skoltech.ru/compvision/noimi/. The download logic is adapted
 # from the script https://github.com/arbabenko/GNOIMI/blob/master/downloadDeep1B.py.
 def deep_image(out_fn):
-    yadisk_key = 'https://yadi.sk/d/11eDCm7Dsn9GA'
-    response = urlopen('https://cloud-api.yandex.net/v1/disk/public/resources/download?public_key=' \
-        + yadisk_key + '&path=/deep10M.fvecs')
+    yadisk_key = "https://yadi.sk/d/11eDCm7Dsn9GA"
+    response = urlopen(
+        "https://cloud-api.yandex.net/v1/disk/public/resources/download?public_key="
+        + yadisk_key
+        + "&path=/deep10M.fvecs"
+    )
     response_body = response.read().decode("utf-8")
 
-    dataset_url = response_body.split(',')[0][9:-1]
-    filename = os.path.join('data', 'deep-image.fvecs')
+    dataset_url = response_body.split(",")[0][9:-1]
+    filename = os.path.join("data", "deep-image.fvecs")
     download(dataset_url, filename)
 
     # In the fvecs file format, each vector is stored by first writing its
@@ -254,14 +261,16 @@ def deep_image(out_fn):
     fv = fv.reshape(-1, dim + 1)[:, 1:]
 
     X_train, X_test = train_test_split(fv)
-    write_output(X_train, X_test, out_fn, 'angular')
+    write_output(X_train, X_test, out_fn, "angular")
+
 
 def transform_bag_of_words(filename, n_dimensions, out_fn):
     import gzip
     from scipy.sparse import lil_matrix
     from sklearn.feature_extraction.text import TfidfTransformer
     from sklearn import random_projection
-    with gzip.open(filename, 'rb') as f:
+
+    with gzip.open(filename, "rb") as f:
         file_content = f.readlines()
         entries = int(file_content[0])
         words = int(file_content[1])
@@ -274,25 +283,23 @@ def transform_bag_of_words(filename, n_dimensions, out_fn):
         print("normalizing matrix entries with tfidf...")
         B = TfidfTransformer().fit_transform(A)
         print("reducing dimensionality...")
-        C = random_projection.GaussianRandomProjection(
-            n_components=n_dimensions).fit_transform(B)
+        C = random_projection.GaussianRandomProjection(n_components=n_dimensions).fit_transform(B)
         X_train, X_test = train_test_split(C)
-        write_output(numpy.array(X_train), numpy.array(
-            X_test), out_fn, 'angular')
+        write_output(numpy.array(X_train), numpy.array(X_test), out_fn, "angular")
 
 
 def nytimes(out_fn, n_dimensions):
-    fn = 'nytimes_%s.txt.gz' % n_dimensions
-    download('https://archive.ics.uci.edu/ml/machine-learning-databases/bag-of-words/docword.nytimes.txt.gz', fn)  # noqa
+    fn = "nytimes_%s.txt.gz" % n_dimensions
+    download(
+        "https://archive.ics.uci.edu/ml/machine-learning-databases/bag-of-words/docword.nytimes.txt.gz", fn
+    )  # noqa
     transform_bag_of_words(fn, n_dimensions, out_fn)
 
 
 def random_float(out_fn, n_dims, n_samples, centers, distance):
     import sklearn.datasets
 
-    X, _ = sklearn.datasets.make_blobs(
-        n_samples=n_samples, n_features=n_dims,
-        centers=centers, random_state=1)
+    X, _ = sklearn.datasets.make_blobs(n_samples=n_samples, n_features=n_dims, centers=centers, random_state=1)
     X_train, X_test = train_test_split(X, test_size=0.1)
     write_output(X_train, X_test, out_fn, distance)
 
@@ -300,63 +307,62 @@ def random_float(out_fn, n_dims, n_samples, centers, distance):
 def random_bitstring(out_fn, n_dims, n_samples, n_queries):
     import sklearn.datasets
 
-    Y, _ = sklearn.datasets.make_blobs(
-        n_samples=n_samples, n_features=n_dims,
-        centers=n_queries, random_state=1)
+    Y, _ = sklearn.datasets.make_blobs(n_samples=n_samples, n_features=n_dims, centers=n_queries, random_state=1)
     X = numpy.zeros((n_samples, n_dims), dtype=numpy.bool_)
     for i, vec in enumerate(Y):
         X[i] = numpy.array([v > 0 for v in vec], dtype=numpy.bool_)
 
     X_train, X_test = train_test_split(X, test_size=n_queries)
-    write_output(X_train, X_test, out_fn, 'hamming', 'bit')
+    write_output(X_train, X_test, out_fn, "hamming", "bit")
 
 
 def word2bits(out_fn, path, fn):
     import tarfile
-    local_fn = fn + '.tar.gz'
-    url = 'http://web.stanford.edu/~maxlam/word_vectors/compressed/%s/%s.tar.gz' % (  # noqa
-        path, fn)
+
+    local_fn = fn + ".tar.gz"
+    url = "http://web.stanford.edu/~maxlam/word_vectors/compressed/%s/%s.tar.gz" % (path, fn)  # noqa
     download(url, local_fn)
-    print('parsing vectors in %s...' % local_fn)
-    with tarfile.open(local_fn, 'r:gz') as t:
+    print("parsing vectors in %s..." % local_fn)
+    with tarfile.open(local_fn, "r:gz") as t:
         f = t.extractfile(fn)
         n_words, k = [int(z) for z in next(f).strip().split()]
         X = numpy.zeros((n_words, k), dtype=numpy.bool_)
         for i in range(n_words):
-            X[i] = numpy.array([float(z) > 0 for z in next(
-                f).strip().split()[1:]], dtype=numpy.bool_)
+            X[i] = numpy.array([float(z) > 0 for z in next(f).strip().split()[1:]], dtype=numpy.bool_)
 
         X_train, X_test = train_test_split(X, test_size=1000)
-        write_output(X_train, X_test, out_fn, 'hamming', 'bit')
+        write_output(X_train, X_test, out_fn, "hamming", "bit")
 
 
 def sift_hamming(out_fn, fn):
     import tarfile
-    local_fn = fn + '.tar.gz'
-    url = 'http://sss.projects.itu.dk/ann-benchmarks/datasets/%s.tar.gz' % fn
+
+    local_fn = fn + ".tar.gz"
+    url = "http://sss.projects.itu.dk/ann-benchmarks/datasets/%s.tar.gz" % fn
     download(url, local_fn)
-    print('parsing vectors in %s...' % local_fn)
-    with tarfile.open(local_fn, 'r:gz') as t:
+    print("parsing vectors in %s..." % local_fn)
+    with tarfile.open(local_fn, "r:gz") as t:
         f = t.extractfile(fn)
         lines = f.readlines()
         X = numpy.zeros((len(lines), 256), dtype=numpy.bool_)
         for i, line in enumerate(lines):
-            X[i] = numpy.array(
-                [int(x) > 0 for x in line.decode().strip()], dtype=numpy.bool_)
+            X[i] = numpy.array([int(x) > 0 for x in line.decode().strip()], dtype=numpy.bool_)
         X_train, X_test = train_test_split(X, test_size=1000)
-        write_output(X_train, X_test, out_fn, 'hamming', 'bit')
+        write_output(X_train, X_test, out_fn, "hamming", "bit")
+
 
 def kosarak(out_fn):
     import gzip
-    local_fn = 'kosarak.dat.gz'
+
+    local_fn = "kosarak.dat.gz"
     # only consider sets with at least min_elements many elements
     min_elements = 20
-    url = 'http://fimi.uantwerpen.be/data/%s' % local_fn
+    url = "http://fimi.uantwerpen.be/data/%s" % local_fn
     download(url, local_fn)
 
     X = []
     dimension = 0
-    with gzip.open('kosarak.dat.gz', 'r') as f:
+    with gzip.open("kosarak.dat.gz", "r") as f:
         content = f.readlines()
         # preprocess data to find sets with more than 20 elements
         # keep track of used ids for reenumeration
@@ -366,7 +372,8 @@ def kosarak(out_fn):
                 dimension = max(dimension, max(X[-1]) + 1)
 
     X_train, X_test = train_test_split(numpy.array(X), test_size=500, dimension=dimension)
-    write_sparse_output(X_train, X_test, out_fn, 'jaccard', dimension)
+    write_sparse_output(X_train, X_test, out_fn, "jaccard", dimension)
+
 
 def random_jaccard(out_fn, n=10000, size=50, universe=80):
     random.seed(1)
@@ -376,8 +383,7 @@ def random_jaccard(out_fn, n=10000, size=50, universe=80):
         X.append(random.sample(l, size))
 
     X_train, X_test = train_test_split(numpy.array(X), test_size=100, dimension=universe)
-    write_sparse_output(X_train, X_test, out_fn, 'jaccard', universe)
-
+    write_sparse_output(X_train, X_test, out_fn, "jaccard", universe)
 
 
 def lastfm(out_fn, n_dimensions, test_size=50000):
@@ -405,15 +411,12 @@ def lastfm(out_fn, n_dimensions, test_size=50000):
     # train an als model on the lastfm data
     _, _, play_counts = get_lastfm()
     model = implicit.als.AlternatingLeastSquares(factors=n_dimensions)
-    model.fit(implicit.nearest_neighbours.bm25_weight(
-        play_counts, K1=100, B=0.8))
+    model.fit(implicit.nearest_neighbours.bm25_weight(play_counts, K1=100, B=0.8))
 
     # transform item factors so that each one has the same norm,
     # and transform the user factors such by appending a 0 column
     _, item_factors = augment_inner_product_matrix(model.item_factors)
-    user_factors = numpy.append(model.user_factors,
-                                numpy.zeros((model.user_factors.shape[0], 1)),
-                                axis=1)
+    user_factors = numpy.append(model.user_factors, numpy.zeros((model.user_factors.shape[0], 1)), axis=1)
 
     # only query the first 50k users (speeds things up signficantly
     # without changing results)
@@ -421,12 +424,13 @@ def lastfm(out_fn, n_dimensions, test_size=50000):
 
     # after that transformation a cosine lookup will return the same results
     # as the inner product on the untransformed data
-    write_output(item_factors, user_factors, out_fn, 'angular')
+    write_output(item_factors, user_factors, out_fn, "angular")
 
-def movielens(fn, ratings_file, out_fn, separator='::', ignore_header=False):
+
+def movielens(fn, ratings_file, out_fn, separator="::", ignore_header=False):
     import zipfile
 
-    url = 'http://files.grouplens.org/datasets/movielens/%s' % fn
+    url = "http://files.grouplens.org/datasets/movielens/%s" % fn
 
     download(url, fn)
     with zipfile.ZipFile(fn) as z:
@@ -434,19 +438,19 @@ def movielens(fn, ratings_file, out_fn, separator='::', ignore_header=False):
         if ignore_header:
             file.readline()
 
-        print('preparing %s' % out_fn)
+        print("preparing %s" % out_fn)
 
         users = {}
         X = []
         dimension = 0
         for line in file:
-            el = line.decode('UTF-8').split(separator)
+            el = line.decode("UTF-8").split(separator)
 
             userId = el[0]
             itemId = int(el[1])
             rating = float(el[2])
 
-            if rating < 3: # We only keep ratings >= 3
+            if rating < 3:  # We only keep ratings >= 3
                 continue
 
             if userId not in users:
@@ -454,58 +458,50 @@ def movielens(fn, ratings_file, out_fn, separator='::', ignore_header=False):
                 X.append([])
 
             X[users[userId]].append(itemId)
-            dimension = max(dimension, itemId+1)
+            dimension = max(dimension, itemId + 1)
 
         X_train, X_test = train_test_split(numpy.array(X), test_size=500, dimension=dimension)
-        write_sparse_output(X_train, X_test, out_fn, 'jaccard', dimension)
+        write_sparse_output(X_train, X_test, out_fn, "jaccard", dimension)
+
 
 def movielens1m(out_fn):
-    movielens('ml-1m.zip', 'ml-1m/ratings.dat', out_fn)
+    movielens("ml-1m.zip", "ml-1m/ratings.dat", out_fn)
+
 
 def movielens10m(out_fn):
-    movielens('ml-10m.zip', 'ml-10M100K/ratings.dat', out_fn)
+    movielens("ml-10m.zip", "ml-10M100K/ratings.dat", out_fn)
+
 
 def movielens20m(out_fn):
-    movielens('ml-20m.zip', 'ml-20m/ratings.csv', out_fn, ',', True)
+    movielens("ml-20m.zip", "ml-20m/ratings.csv", out_fn, ",", True)
+
 
 DATASETS = {
-    'deep-image-96-angular': deep_image,
-    'fashion-mnist-784-euclidean': fashion_mnist,
-    'gist-960-euclidean': gist,
-    'glove-25-angular': lambda out_fn: glove(out_fn, 25),
-    'glove-50-angular': lambda out_fn: glove(out_fn, 50),
-    'glove-100-angular': lambda out_fn: glove(out_fn, 100),
-    'glove-200-angular': lambda out_fn: glove(out_fn, 200),
-    'mnist-784-euclidean': mnist,
-    'random-xs-20-euclidean': lambda out_fn: random_float(out_fn, 20, 10000, 100,
-                                                    'euclidean'),
-    'random-s-100-euclidean': lambda out_fn: random_float(out_fn, 100, 100000, 1000,
-                                                    'euclidean'),
-    'random-xs-20-angular': lambda out_fn: random_float(out_fn, 20, 10000, 100,
-                                                  'angular'),
-    'random-s-100-angular': lambda out_fn: random_float(out_fn, 100, 100000, 1000,
-                                                  'angular'),
-    'random-xs-16-hamming': lambda out_fn: random_bitstring(out_fn, 16, 10000,
-                                                            100),
-    'random-s-128-hamming': lambda out_fn: random_bitstring(out_fn, 128,
-                                                            50000, 1000),
-    'random-l-256-hamming': lambda out_fn: random_bitstring(out_fn, 256,
-                                                            100000, 1000),
-    'random-s-jaccard': lambda out_fn: random_jaccard(out_fn, n=10000,
-                                                       size=20, universe=40),
-    'random-l-jaccard': lambda out_fn: random_jaccard(out_fn, n=100000,
-                                                       size=70, universe=100),
-    'sift-128-euclidean': sift,
-    'nytimes-256-angular': lambda out_fn: nytimes(out_fn, 256),
-    'nytimes-16-angular': lambda out_fn: nytimes(out_fn, 16),
-    'word2bits-800-hamming': lambda out_fn: word2bits(
-        out_fn, '400K',
-        'w2b_bitlevel1_size800_vocab400K'),
-    'lastfm-64-dot': lambda out_fn: lastfm(out_fn, 64),
-    'sift-256-hamming': lambda out_fn: sift_hamming(
-        out_fn, 'sift.hamming.256'),
-    'kosarak-jaccard': lambda out_fn: kosarak(out_fn),
-    'movielens1m-jaccard': movielens1m,
-    'movielens10m-jaccard': movielens10m,
-    'movielens20m-jaccard': movielens20m,
+    "deep-image-96-angular": deep_image,
+    "fashion-mnist-784-euclidean": fashion_mnist,
+    "gist-960-euclidean": gist,
+    "glove-25-angular": lambda out_fn: glove(out_fn, 25),
+    "glove-50-angular": lambda out_fn: glove(out_fn, 50),
+    "glove-100-angular": lambda out_fn: glove(out_fn, 100),
+    "glove-200-angular": lambda out_fn: glove(out_fn, 200),
+    "mnist-784-euclidean": mnist,
+    "random-xs-20-euclidean": lambda out_fn: random_float(out_fn, 20, 10000, 100, "euclidean"),
+    "random-s-100-euclidean": lambda out_fn: random_float(out_fn, 100, 100000, 1000, "euclidean"),
+    "random-xs-20-angular": lambda out_fn: random_float(out_fn, 20, 10000, 100, "angular"),
+    "random-s-100-angular": lambda out_fn: random_float(out_fn, 100, 100000, 1000, "angular"),
+    "random-xs-16-hamming": lambda out_fn: random_bitstring(out_fn, 16, 10000, 100),
+    "random-s-128-hamming": lambda out_fn: random_bitstring(out_fn, 128, 50000, 1000),
+    "random-l-256-hamming": lambda out_fn: random_bitstring(out_fn, 256, 100000, 1000),
+    "random-s-jaccard": lambda out_fn: random_jaccard(out_fn, n=10000, size=20, universe=40),
+    "random-l-jaccard": lambda out_fn: random_jaccard(out_fn, n=100000, size=70, universe=100),
+    "sift-128-euclidean": sift,
+    "nytimes-256-angular": lambda out_fn: nytimes(out_fn, 256),
+    "nytimes-16-angular": lambda out_fn: nytimes(out_fn, 16),
+    "word2bits-800-hamming": lambda out_fn: word2bits(out_fn, "400K", "w2b_bitlevel1_size800_vocab400K"),
+    "lastfm-64-dot": lambda out_fn: lastfm(out_fn, 64),
+    "sift-256-hamming": lambda out_fn: sift_hamming(out_fn, "sift.hamming.256"),
+    "kosarak-jaccard": lambda out_fn: kosarak(out_fn),
+    "movielens1m-jaccard": movielens1m,
+    "movielens10m-jaccard": movielens10m,
+    "movielens20m-jaccard": movielens20m,
 }
diff --git a/ann_benchmarks/distance.py b/ann_benchmarks/distance.py
index d649a2769..94c3df434 100644
--- a/ann_benchmarks/distance.py
+++ b/ann_benchmarks/distance.py
@@ -4,51 +4,59 @@
 # Need own implementation of jaccard because scipy's
 # implementation is different
 
+
 def jaccard(a, b):
     if len(a) == 0 or len(b) == 0:
-         return 0
+        return 0
     intersect = len(set(a) & set(b))
     return intersect / (float)(len(a) + len(b) - intersect)
 
+
 def norm(a):
-    return np.sum(a ** 2) ** 0.5
+    return np.sum(a**2) ** 0.5
+
 
 def euclidean(a, b):
     return norm(a - b)
 
+
 metrics = {
-    'hamming': {
-        'distance': lambda a, b: np.sum(a.astype(np.bool_) ^ b.astype(np.bool_)),
-        'distance_valid': lambda a: True,
+    "hamming": {
+        "distance": lambda a, b: np.sum(a.astype(np.bool_) ^ b.astype(np.bool_)),
+        "distance_valid": lambda a: True,
     },
     # return 1 - jaccard similarity, because smaller distances are better.
-    'jaccard': {
-        'distance': lambda a, b: 1 - jaccard(a, b),
-        'distance_valid': lambda a: a < 1 - 1e-5,
+    "jaccard": {
+        "distance": lambda a, b: 1 - jaccard(a, b),
+        "distance_valid": lambda a: a < 1 - 1e-5,
     },
-    'euclidean': {
-        'distance': lambda a, b: euclidean(a, b),
-        'distance_valid': lambda a: True,
+    "euclidean": {
+        "distance": lambda a, b: euclidean(a, b),
+        "distance_valid": lambda a: True,
+    },
+    "angular": {
+        "distance": lambda a, b: euclidean(a, b) / (norm(a) * norm(b)),
+        "distance_valid": lambda a: True,
     },
-    'angular': {
-        'distance': lambda a, b: euclidean(a, b) / (norm(a) * norm(b)),
-        'distance_valid': lambda a: True,
-    }
 }
 
+
 def sparse_to_lists(data, lengths):
     X = []
     index = 0
     for l in lengths:
-        X.append(data[index:index+l])
+        X.append(data[index : index + l])
         index += l
 
     return X
 
+
 def dataset_transform(dataset):
-    if dataset.attrs.get('type', 'dense') != 'sparse':
-        return np.array(dataset['train']), np.array(dataset['test'])
+    if dataset.attrs.get("type", "dense") != "sparse":
+        return np.array(dataset["train"]), np.array(dataset["test"])
 
     # we store the dataset as a list of integers, accompanied by a list of lengths in hdf5
     # so we transform it back to the format expected by the algorithms here (array of array of ints)
-    return sparse_to_lists(dataset['train'], dataset['size_train']), sparse_to_lists(dataset['test'], dataset['size_test'])
+    return sparse_to_lists(dataset["train"], dataset["size_train"]), sparse_to_lists(
+        dataset["test"], dataset["size_test"]
+    )
diff --git a/ann_benchmarks/main.py b/ann_benchmarks/main.py
index 45889143f..fd478d432 100644
--- a/ann_benchmarks/main.py
+++ b/ann_benchmarks/main.py
@@ -13,10 +13,12 @@
 
 from ann_benchmarks.datasets import get_dataset, DATASETS
 from ann_benchmarks.constants import INDEX_DIR
-from ann_benchmarks.algorithms.definitions import (get_definitions,
-                                                   list_algorithms,
-                                                   algorithm_status,
-                                                   InstantiationStatus)
+from ann_benchmarks.algorithms.definitions import (
+    get_definitions,
+    list_algorithms,
+    algorithm_status,
+    InstantiationStatus,
+)
 from ann_benchmarks.results import get_result_filename
 from ann_benchmarks.runner import run, run_docker
 
@@ -44,83 +46,56 @@ def run_worker(cpu, args, queue):
             if args.batch:
                 cpu_limit = "0-%d" % (multiprocessing.cpu_count() - 1)
 
-            run_docker(definition, args.dataset, args.count,
-                       args.runs, args.timeout, args.batch, cpu_limit, mem_limit)
+            run_docker(definition, args.dataset, args.count, args.runs, args.timeout, args.batch, cpu_limit, mem_limit)
 
 
 def main():
-    parser = argparse.ArgumentParser(
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
     parser.add_argument(
-        '--dataset',
-        metavar='NAME',
-        help='the dataset to load training points from',
-        default='glove-100-angular',
-        choices=DATASETS.keys())
+        "--dataset",
+        metavar="NAME",
+        help="the dataset to load training points from",
+        default="glove-100-angular",
+        choices=DATASETS.keys(),
+    )
     parser.add_argument(
-        "-k", "--count",
-        default=10,
-        type=positive_int,
-        help="the number of near neighbours to search for")
-    parser.add_argument(
-        '--definitions',
-        metavar='FILE',
-        help='load algorithm definitions from FILE',
-        default='algos.yaml')
-    parser.add_argument(
-        '--algorithm',
-        metavar='NAME',
-        help='run only the named algorithm',
-        default=None)
+        "-k", "--count", default=10, type=positive_int, help="the number of near neighbours to search for"
+    )
     parser.add_argument(
-        '--docker-tag',
-        metavar='NAME',
-        help='run only algorithms in a particular docker image',
-        default=None)
+        "--definitions", metavar="FILE", help="load algorithm definitions from FILE", default="algos.yaml"
+    )
+    parser.add_argument("--algorithm", metavar="NAME", help="run only the named algorithm", default=None)
     parser.add_argument(
-        '--list-algorithms',
-        help='print the names of all known algorithms and exit',
-        action='store_true')
+        "--docker-tag", metavar="NAME", help="run only algorithms in a particular docker image", default=None
+    )
     parser.add_argument(
-        '--force',
-        help='re-run algorithms even if their results already exist',
-        action='store_true')
+        "--list-algorithms", help="print the names of all known algorithms and exit", action="store_true"
+    )
+    parser.add_argument("--force", help="re-run algorithms even if their results already exist", action="store_true")
     parser.add_argument(
-        '--runs',
-        metavar='COUNT',
+        "--runs",
+        metavar="COUNT",
         type=positive_int,
-        help='run each algorithm instance %(metavar)s times and use only'
-             ' the best result',
-        default=5)
-    parser.add_argument(
-        '--timeout',
-        type=int,
-        help='Timeout (in seconds) for each individual algorithm run, or -1'
-             'if no timeout should be set',
-        default=2 * 3600)
+        help="run each algorithm instance %(metavar)s times and use only" " the best result",
+        default=5,
+    )
     parser.add_argument(
-        '--local',
-        action='store_true',
-        help='If set, then will run everything locally (inside the same '
-             'process) rather than using Docker')
-    parser.add_argument(
-        '--batch',
-        action='store_true',
-        help='If set, algorithms get all queries at once')
-    parser.add_argument(
-        '--max-n-algorithms',
+        "--timeout",
         type=int,
-        help='Max number of algorithms to run (just used for testing)',
-        default=-1)
+        help="Timeout (in seconds) for each individual algorithm run, or -1" "if no timeout should be set",
+        default=2 * 3600,
+    )
     parser.add_argument(
-        '--run-disabled',
-        help='run algorithms that are disabled in algos.yml',
-        action='store_true')
+        "--local",
+        action="store_true",
+        help="If set, then will run everything locally (inside the same " "process) rather than using Docker",
+    )
+    parser.add_argument("--batch", action="store_true", help="If set, algorithms get all queries at once")
     parser.add_argument(
-        '--parallelism',
-        type=positive_int,
-        help='Number of Docker containers in parallel',
-        default=1)
+        "--max-n-algorithms", type=int, help="Max number of algorithms to run (just used for testing)", default=-1
+    )
+    parser.add_argument("--run-disabled", help="run algorithms that are disabled in algos.yml", action="store_true")
+    parser.add_argument("--parallelism", type=positive_int, help="Number of Docker containers in parallel", default=1)
 
     args = parser.parse_args()
     if args.timeout == -1:
@@ -139,10 +114,9 @@ def main():
         shutil.rmtree(INDEX_DIR)
 
     dataset, dimension = get_dataset(args.dataset)
-    point_type = dataset.attrs.get('point_type', 'float')
-    distance = dataset.attrs['distance']
-    definitions = get_definitions(
-        args.definitions, dimension, point_type, distance, args.count)
+    point_type = dataset.attrs.get("point_type", "float")
+    distance = dataset.attrs["distance"]
+    definitions = get_definitions(args.definitions, dimension, point_type, distance, args.count)
 
     # Filter out, from the loaded definitions, all those query argument groups
     # that correspond to experiments that have already been run. (This might
@@ -155,22 +129,19 @@ def main():
             query_argument_groups = [[]]
         not_yet_run = []
         for query_arguments in query_argument_groups:
-            fn = get_result_filename(args.dataset,
-                                     args.count, definition,
-                                     query_arguments, args.batch)
+            fn = get_result_filename(args.dataset, args.count, definition, query_arguments, args.batch)
             if args.force or not os.path.exists(fn):
                 not_yet_run.append(query_arguments)
         if not_yet_run:
             if definition.query_argument_groups:
-                definition = definition._replace(
-                    query_argument_groups=not_yet_run)
+                definition = definition._replace(query_argument_groups=not_yet_run)
             filtered_definitions.append(definition)
     definitions = filtered_definitions
 
     random.shuffle(definitions)
 
     if args.algorithm:
-        logger.info(f'running only {args.algorithm}')
+        logger.info(f"running only {args.algorithm}")
         definitions = [d for d in definitions if d.algorithm == args.algorithm]
 
     if not args.local:
@@ -179,67 +150,70 @@ def main():
         docker_tags = set()
         for image in docker_client.images.list():
             for tag in image.tags:
-                tag = tag.split(':')[0]
+                tag = tag.split(":")[0]
                 docker_tags.add(tag)
 
         if args.docker_tag:
-            logger.info(f'running only {args.docker_tag}')
-            definitions = [
-                d for d in definitions if d.docker_tag == args.docker_tag]
+            logger.info(f"running only {args.docker_tag}")
+            definitions = [d for d in definitions if d.docker_tag == args.docker_tag]
 
         if set(d.docker_tag for d in definitions).difference(docker_tags):
-            logger.info(f'not all docker images available, only: {set(docker_tags)}')
-            logger.info(f'missing docker images: '
-                        f'{str(set(d.docker_tag for d in definitions).difference(docker_tags))}')
-            definitions = [
-                d for d in definitions if d.docker_tag in docker_tags]
+            logger.info(f"not all docker images available, only: {set(docker_tags)}")
+            logger.info(
+                f"missing docker images: " f"{str(set(d.docker_tag for d in definitions).difference(docker_tags))}"
+            )
+            definitions = [d for d in definitions if d.docker_tag in docker_tags]
     else:
+
         def _test(df):
             status = algorithm_status(df)
             # If the module was loaded but doesn't actually have a constructor
             # of the right name, then the definition is broken
             if status == InstantiationStatus.NO_CONSTRUCTOR:
-                raise Exception("%s.%s(%s): error: the module '%s' does not"
-                                " expose the named constructor" % (
-                                    df.module, df.constructor,
-                                    df.arguments, df.module))
+                raise Exception(
+                    "%s.%s(%s): error: the module '%s' does not"
+                    " expose the named constructor" % (df.module, df.constructor, df.arguments, df.module)
+                )
 
             if status == InstantiationStatus.NO_MODULE:
                 # If the module couldn't be loaded (presumably because
                 # of a missing dependency), print a warning and remove
                 # this definition from the list of things to be run
-                logging.warning("%s.%s(%s): the module '%s' could not be "
-                      "loaded; skipping" % (df.module, df.constructor,
-                                            df.arguments, df.module))
+                logging.warning(
+                    "%s.%s(%s): the module '%s' could not be "
+                    "loaded; skipping" % (df.module, df.constructor, df.arguments, df.module)
+                )
                 return False
             else:
                 return True
+
         definitions = [d for d in definitions if _test(d)]
 
     if not args.run_disabled:
         if len([d for d in definitions if d.disabled]):
-            logger.info(f'Not running disabled algorithms {[d for d in definitions if d.disabled]}')
+            logger.info(f"Not running disabled algorithms {[d for d in definitions if d.disabled]}")
         definitions = [d for d in definitions if not d.disabled]
 
     if args.max_n_algorithms >= 0:
-        definitions = definitions[:args.max_n_algorithms]
+        definitions = definitions[: args.max_n_algorithms]
 
     if len(definitions) == 0:
-        raise Exception('Nothing to run')
+        raise Exception("Nothing to run")
     else:
-        logger.info(f'Order: {definitions}')
+        logger.info(f"Order: {definitions}")
 
     if args.parallelism > multiprocessing.cpu_count() - 1:
-        raise Exception('Parallelism larger than %d! (CPU count minus one)' % (multiprocessing.cpu_count() - 1))
+        raise Exception("Parallelism larger than %d! (CPU count minus one)" % (multiprocessing.cpu_count() - 1))
 
     # Multiprocessing magic to farm this out to all CPUs
     queue = multiprocessing.Queue()
     for definition in definitions:
         queue.put(definition)
     if args.batch and args.parallelism > 1:
-        raise Exception(f"Batch mode uses all available CPU resources, --parallelism should be set to 1. (Was: {args.parallelism})")
-    workers = [multiprocessing.Process(target=run_worker, args=(i+1, args, queue))
-               for i in range(args.parallelism)]
+        raise Exception(
+            f"Batch mode uses all available CPU resources, --parallelism should be set to 1. (Was: {args.parallelism})"
+        )
+    workers = [multiprocessing.Process(target=run_worker, args=(i + 1, args, queue)) for i in range(args.parallelism)]
     [worker.start() for worker in workers]
     [worker.join() for worker in workers]
 
diff --git a/ann_benchmarks/plotting/metrics.py b/ann_benchmarks/plotting/metrics.py
index e14fe5056..6d038e8dd 100644
--- a/ann_benchmarks/plotting/metrics.py
+++ b/ann_benchmarks/plotting/metrics.py
@@ -10,8 +10,7 @@ def epsilon_threshold(data, count, epsilon):
     return data[count - 1] * (1 + epsilon)
 
 
-def get_recall_values(dataset_distances, run_distances, count, threshold,
-                      epsilon=1e-3):
+def get_recall_values(dataset_distances, run_distances, count, threshold, epsilon=1e-3):
     recalls = np.zeros(len(run_distances))
     for i in range(len(run_distances)):
         t = threshold(dataset_distances[i], count, epsilon)
@@ -20,76 +19,73 @@ def get_recall_values(dataset_distances, run_distances, count, threshold,
             if d <= t:
                 actual += 1
         recalls[i] = actual
-    return (np.mean(recalls) / float(count),
-            np.std(recalls) / float(count),
-            recalls)
+    return (np.mean(recalls) / float(count), np.std(recalls) / float(count), recalls)
 
 
 def knn(dataset_distances, run_distances, count, metrics, epsilon=1e-3):
-    if 'knn' not in metrics:
-        print('Computing knn metrics')
-        knn_metrics = metrics.create_group('knn')
-        mean, std, recalls = get_recall_values(dataset_distances,
-                                               run_distances, count,
-                                               knn_threshold, epsilon)
-        knn_metrics.attrs['mean'] = mean
-        knn_metrics.attrs['std'] = std
-        knn_metrics['recalls'] = recalls
+    if "knn" not in metrics:
+        print("Computing knn metrics")
+        knn_metrics = metrics.create_group("knn")
+        mean, std, recalls = get_recall_values(dataset_distances, run_distances, count, knn_threshold, epsilon)
+        knn_metrics.attrs["mean"] = mean
+        knn_metrics.attrs["std"] = std
+        knn_metrics["recalls"] = recalls
     else:
         print("Found cached result")
-    return metrics['knn']
+    return metrics["knn"]
 
 
 def epsilon(dataset_distances, run_distances, count, metrics, epsilon=0.01):
-    s = 'eps' + str(epsilon)
+    s = "eps" + str(epsilon)
     if s not in metrics:
-        print('Computing epsilon metrics')
+        print("Computing epsilon metrics")
         epsilon_metrics = metrics.create_group(s)
-        mean, std, recalls = get_recall_values(dataset_distances,
-                                               run_distances, count,
-                                               epsilon_threshold, epsilon)
-        epsilon_metrics.attrs['mean'] = mean
-        epsilon_metrics.attrs['std'] = std
-        epsilon_metrics['recalls'] = recalls
+        mean, std, recalls = get_recall_values(dataset_distances, run_distances, count, epsilon_threshold, epsilon)
+        epsilon_metrics.attrs["mean"] = mean
+        epsilon_metrics.attrs["std"] = std
+        epsilon_metrics["recalls"] = recalls
     else:
         print("Found cached result")
     return metrics[s]
 
 
 def rel(dataset_distances, run_distances, metrics):
-    if 'rel' not in metrics.attrs:
-        print('Computing rel metrics')
+    if "rel" not in metrics.attrs:
+        print("Computing rel metrics")
         total_closest_distance = 0.0
         total_candidate_distance = 0.0
-        for true_distances, found_distances in zip(dataset_distances,
-                                                   run_distances):
+        for true_distances, found_distances in zip(dataset_distances, run_distances):
             total_closest_distance += np.sum(true_distances)
             total_candidate_distance += np.sum(found_distances)
         if total_closest_distance < 0.01:
-            metrics.attrs['rel'] = float("inf")
+            metrics.attrs["rel"] = float("inf")
         else:
-            metrics.attrs['rel'] = total_candidate_distance / \
-                total_closest_distance
+            metrics.attrs["rel"] = total_candidate_distance / total_closest_distance
     else:
         print("Found cached result")
-    return metrics.attrs['rel']
+    return metrics.attrs["rel"]
 
 
 def queries_per_second(queries, attrs):
     return 1.0 / attrs["best_search_time"]
 
+
 def percentile_50(times):
     return np.percentile(times, 50.0) * 1000.0
 
+
 def percentile_95(times):
     return np.percentile(times, 95.0) * 1000.0
 
+
 def percentile_99(times):
     return np.percentile(times, 99.0) * 1000.0
 
+
 def percentile_999(times):
     return np.percentile(times, 99.9) * 1000.0
 
+
 def index_size(queries, attrs):
     # TODO(erikbern): should replace this with peak memory usage or something
     return attrs.get("index_size", 0)
@@ -104,79 +100,106 @@ def candidates(queries, attrs):
 
 
 def dist_computations(queries, attrs):
-    return attrs.get("dist_comps", 0) / (attrs['run_count'] * len(queries))
+    return attrs.get("dist_comps", 0) / (attrs["run_count"] * len(queries))
 
 
 all_metrics = {
     "k-nn": {
         "description": "Recall",
-        "function": lambda true_distances, run_distances, metrics, times, run_attrs: knn(true_distances, run_distances, run_attrs["count"], metrics).attrs['mean'],  # noqa
+        "function": lambda true_distances, run_distances, metrics, times, run_attrs: knn(
+            true_distances, run_distances, run_attrs["count"], metrics
+        ).attrs[
+            "mean"
+        ],  # noqa
         "worst": float("-inf"),
-        "lim": [0.0, 1.03]
+        "lim": [0.0, 1.03],
     },
     "epsilon": {
         "description": "Epsilon 0.01 Recall",
-        "function": lambda true_distances, run_distances, metrics, times, run_attrs: epsilon(true_distances, run_distances, run_attrs["count"], metrics).attrs['mean'],  # noqa
-        "worst": float("-inf")
+        "function": lambda true_distances, run_distances, metrics, times, run_attrs: epsilon(
+            true_distances, run_distances, run_attrs["count"], metrics
+        ).attrs[
+            "mean"
+        ],  # noqa
+        "worst": float("-inf"),
     },
     "largeepsilon": {
         "description": "Epsilon 0.1 Recall",
-        "function": lambda true_distances, run_distances, metrics, times, run_attrs: epsilon(true_distances, run_distances, run_attrs["count"], metrics, 0.1).attrs['mean'],  # noqa
-        "worst": float("-inf")
+        "function": lambda true_distances, run_distances, metrics, times, run_attrs: epsilon(
+            true_distances, run_distances, run_attrs["count"], metrics, 0.1
+        ).attrs[
+            "mean"
+        ],  # noqa
+        "worst": float("-inf"),
     },
     "rel": {
         "description": "Relative Error",
-        "function": lambda true_distances, run_distances, metrics, times, run_attrs: rel(true_distances, run_distances, metrics),  # noqa
-        "worst": float("inf")
+        "function": lambda true_distances, run_distances, metrics, times, run_attrs: rel(
+            true_distances, run_distances, metrics
+        ),  # noqa
+        "worst": float("inf"),
     },
     "qps": {
         "description": "Queries per second (1/s)",
-        "function": lambda true_distances, run_distances, metrics, times, run_attrs: queries_per_second(true_distances, run_attrs),  # noqa
-        "worst": float("-inf")
+        "function": lambda true_distances, run_distances, metrics, times, run_attrs: queries_per_second(
+            true_distances, run_attrs
+        ),  # noqa
+        "worst": float("-inf"),
     },
     "p50": {
         "description": "Percentile 50 (millis)",
         "function": lambda true_distances, run_distances, metrics, times, run_attrs: percentile_50(times),  # noqa
-        "worst": float("inf")
+        "worst": float("inf"),
     },
     "p95": {
         "description": "Percentile 95 (millis)",
         "function": lambda true_distances, run_distances, metrics, times, run_attrs: percentile_95(times),  # noqa
-        "worst": float("inf")
+        "worst": float("inf"),
     },
     "p99": {
         "description": "Percentile 99 (millis)",
         "function": lambda true_distances, run_distances, metrics, times, run_attrs: percentile_99(times),  # noqa
-        "worst": float("inf")
+        "worst": float("inf"),
     },
     "p999": {
         "description": "Percentile 99.9 (millis)",
         "function": lambda true_distances, run_distances, metrics, times, run_attrs: percentile_999(times),  # noqa
-        "worst": float("inf")
+        "worst": float("inf"),
     },
     "distcomps": {
         "description": "Distance computations",
-        "function": lambda true_distances, run_distances,  metrics, times, run_attrs: dist_computations(true_distances, run_attrs), # noqa
-        "worst": float("inf")
+        "function": lambda true_distances, run_distances, metrics, times, run_attrs: dist_computations(
+            true_distances, run_attrs
+        ),  # noqa
+        "worst": float("inf"),
     },
     "build": {
         "description": "Build time (s)",
-        "function": lambda true_distances, run_distances, metrics, times, run_attrs: build_time(true_distances, run_attrs), # noqa
-        "worst": float("inf")
+        "function": lambda true_distances, run_distances, metrics, times, run_attrs: build_time(
+            true_distances, run_attrs
+        ),  # noqa
+        "worst": float("inf"),
     },
     "candidates": {
         "description": "Candidates generated",
-        "function": lambda true_distances, run_distances, metrics, times, run_attrs: candidates(true_distances, run_attrs), # noqa
-        "worst": float("inf")
+        "function": lambda true_distances, run_distances, metrics, times, run_attrs: candidates(
+            true_distances, run_attrs
+        ),  # noqa
+        "worst": float("inf"),
     },
     "indexsize": {
         "description": "Index size (kB)",
-        "function": lambda true_distances, run_distances, metrics, times, run_attrs: index_size(true_distances, run_attrs),  # noqa
-        "worst": float("inf")
+        "function": lambda true_distances, run_distances, metrics, times, run_attrs: index_size(
+            true_distances, run_attrs
+        ),  # noqa
+        "worst": float("inf"),
     },
     "queriessize": {
         "description": "Index size (kB)/Queries per second (s)",
-        "function": lambda true_distances, run_distances, metrics, times, run_attrs: index_size(true_distances, run_attrs) / queries_per_second(true_distances, run_attrs), # noqa
-        "worst": float("inf")
-    }
+        "function": lambda true_distances, run_distances, metrics, times, run_attrs: index_size(
+            true_distances, run_attrs
+        )
+        / queries_per_second(true_distances, run_attrs),  # noqa
+        "worst": float("inf"),
+    },
 }
diff --git a/ann_benchmarks/plotting/plot_variants.py b/ann_benchmarks/plotting/plot_variants.py
index e8777ee47..dcba83ad9 100644
--- a/ann_benchmarks/plotting/plot_variants.py
+++ b/ann_benchmarks/plotting/plot_variants.py
@@ -1,4 +1,3 @@
-
 all_plot_variants = {
     "recall/time": ("k-nn", "qps"),
     "recall/buildtime": ("k-nn", "build"),
diff --git a/ann_benchmarks/plotting/utils.py b/ann_benchmarks/plotting/utils.py
index a49d795db..035ae290e 100644
--- a/ann_benchmarks/plotting/utils.py
+++ b/ann_benchmarks/plotting/utils.py
@@ -6,9 +6,9 @@
 
 
 def get_or_create_metrics(run):
-    if 'metrics' not in run:
-        run.create_group('metrics')
-    return run['metrics']
+    if "metrics" not in run:
+        run.create_group("metrics")
+    return run["metrics"]
 
 
 def create_pointset(data, xn, yn):
@@ -21,8 +21,7 @@ def create_pointset(data, xn, yn):
     # Generate Pareto frontier
     xs, ys, ls = [], [], []
     last_x = xm["worst"]
-    comparator = ((lambda xv, lx: xv > lx)
-                  if last_x < 0 else (lambda xv, lx: xv < lx))
+    comparator = (lambda xv, lx: xv > lx) if last_x < 0 else (lambda xv, lx: xv < lx)
     for algo, algo_name, xv, yv in data:
         if not xv or not yv:
             continue
@@ -37,32 +36,29 @@ def create_pointset(data, xn, yn):
     return xs, ys, ls, axs, ays, als
 
 
-def compute_metrics(true_nn_distances, res, metric_1, metric_2,
-                    recompute=False):
+def compute_metrics(true_nn_distances, res, metric_1, metric_2, recompute=False):
     all_results = {}
     for i, (properties, run) in enumerate(res):
-        algo = properties['algo']
-        algo_name = properties['name']
+        algo = properties["algo"]
+        algo_name = properties["name"]
         # cache distances to avoid access to hdf5 file
-        run_distances = np.array(run['distances'])
+        run_distances = np.array(run["distances"])
         # cache times to avoid access to hdf5 file
-        times = np.array(run['times'])
-        if recompute and 'metrics' in run:
-            del run['metrics']
+        times = np.array(run["times"])
+        if recompute and "metrics" in run:
+            del run["metrics"]
         metrics_cache = get_or_create_metrics(run)
 
-        metric_1_value = metrics[metric_1]['function'](
-            true_nn_distances,
-            run_distances, metrics_cache, times, properties)
-        metric_2_value = metrics[metric_2]['function'](
-            true_nn_distances,
-            run_distances, metrics_cache, times, properties)
+        metric_1_value = metrics[metric_1]["function"](
+            true_nn_distances, run_distances, metrics_cache, times, properties
+        )
+        metric_2_value = metrics[metric_2]["function"](
+            true_nn_distances, run_distances, metrics_cache, times, properties
+        )
 
-        print('%3d: %80s %12.3f %12.3f' %
-              (i, algo_name, metric_1_value, metric_2_value))
+        print("%3d: %80s %12.3f %12.3f" % (i, algo_name, metric_1_value, metric_2_value))
 
-        all_results.setdefault(algo, []).append(
-            (algo, algo_name, metric_1_value, metric_2_value))
+        all_results.setdefault(algo, []).append((algo, algo_name, metric_1_value, metric_2_value))
 
     return all_results
 
@@ -70,47 +66,43 @@ def compute_metrics(true_nn_distances, res, metric_1, metric_2,
 def compute_all_metrics(true_nn_distances, run, properties, recompute=False):
     algo = properties["algo"]
     algo_name = properties["name"]
-    print('--')
+    print("--")
     print(algo_name)
     results = {}
     # cache distances to avoid access to hdf5 file
     run_distances = np.array(run["distances"])
     # cache times to avoid access to hdf5 file
-    times = np.array(run['times'])
-    if recompute and 'metrics' in run:
-        del run['metrics']
+    times = np.array(run["times"])
+    if recompute and "metrics" in run:
+        del run["metrics"]
     metrics_cache = get_or_create_metrics(run)
 
     for name, metric in metrics.items():
-        v = metric["function"](
-            true_nn_distances, run_distances, metrics_cache, times, properties)
+        v = metric["function"](true_nn_distances, run_distances, metrics_cache, times, properties)
         results[name] = v
         if v:
-            print('%s: %g' % (name, v))
+            print("%s: %g" % (name, v))
     return (algo, algo_name, results)
 
+
 def compute_metrics_all_runs(dataset, res, recompute=False):
-    true_nn_distances=list(dataset['distances'])
+    true_nn_distances = list(dataset["distances"])
     for i, (properties, run) in enumerate(res):
-        algo = properties['algo']
-        algo_name = properties['name']
+        algo = properties["algo"]
+        algo_name = properties["name"]
         # cache distances to avoid access to hdf5 file
         # print('Load distances and times')
-        run_distances = np.array(run['distances'])
-        times = np.array(run['times'])
+        run_distances = np.array(run["distances"])
+        times = np.array(run["times"])
         # print('... done')
-        if recompute and 'metrics' in run:
-            print('Recomputing metrics, clearing cache')
-            del run['metrics']
+        if recompute and "metrics" in run:
+            print("Recomputing metrics, clearing cache")
+            del run["metrics"]
         metrics_cache = get_or_create_metrics(run)
-        
-        dataset = properties['dataset']
-
-        run_result = {
-            'algorithm': algo,
-            'parameters': algo_name,
-            'count': properties['count']
-        }
+
+        dataset = properties["dataset"]
+
+        run_result = {"algorithm": algo, "parameters": algo_name, "count": properties["count"]}
         for name, metric in metrics.items():
             v = metric["function"](true_nn_distances, run_distances, metrics_cache, times, properties)
             run_result[name] = v
@@ -119,29 +111,23 @@ def compute_metrics_all_runs(dataset, res, recompute=False):
 
 def generate_n_colors(n):
     vs = np.linspace(0.3, 0.9, 7)
-    colors = [(.9, .4, .4, 1.)]
+    colors = [(0.9, 0.4, 0.4, 1.0)]
 
     def euclidean(a, b):
-        return sum((x - y)**2 for x, y in zip(a, b))
+        return sum((x - y) ** 2 for x, y in zip(a, b))
+
     while len(colors) < n:
-        new_color = max(itertools.product(vs, vs, vs),
-                        key=lambda a: min(euclidean(a, b) for b in colors))
-        colors.append(new_color + (1.,))
+        new_color = max(itertools.product(vs, vs, vs), key=lambda a: min(euclidean(a, b) for b in colors))
+        colors.append(new_color + (1.0,))
     return colors
 
 
 def create_linestyles(unique_algorithms):
-    colors = dict(
-        zip(unique_algorithms, generate_n_colors(len(unique_algorithms))))
-    linestyles = dict((algo, ['--', '-.', '-', ':'][i % 4])
-                      for i, algo in enumerate(unique_algorithms))
-    markerstyles = dict((algo, ['+', '<', 'o', '*', 'x'][i % 5])
-                        for i, algo in enumerate(unique_algorithms))
-    faded = dict((algo, (r, g, b, 0.3))
-                 for algo, (r, g, b, a) in colors.items())
-    return dict((algo, (colors[algo], faded[algo],
-                        linestyles[algo], markerstyles[algo]))
-                for algo in unique_algorithms)
+    colors = dict(zip(unique_algorithms, generate_n_colors(len(unique_algorithms))))
+    linestyles = dict((algo, ["--", "-.", "-", ":"][i % 4]) for i, algo in enumerate(unique_algorithms))
+    markerstyles = dict((algo, ["+", "<", "o", "*", "x"][i % 5]) for i, algo in enumerate(unique_algorithms))
+    faded = dict((algo, (r, g, b, 0.3)) for algo, (r, g, b, a) in colors.items())
+    return dict((algo, (colors[algo], faded[algo], linestyles[algo], markerstyles[algo])) for algo in unique_algorithms)
 
 
 def get_up_down(metric):
@@ -157,9 +143,10 @@ def get_left_right(metric):
 
 
 def get_plot_label(xm, ym):
-    template = ("%(xlabel)s-%(ylabel)s tradeoff - %(updown)s and"
-                " to the %(leftright)s is better")
-    return template % {"xlabel": xm["description"],
-                       "ylabel": ym["description"],
-                       "updown": get_up_down(ym),
-                       "leftright": get_left_right(xm)}
+    template = "%(xlabel)s-%(ylabel)s tradeoff - %(updown)s and" " to the %(leftright)s is better"
+    return template % {
+        "xlabel": xm["description"],
+        "ylabel": ym["description"],
+        "updown": get_up_down(ym),
+        "leftright": get_left_right(xm),
+    }
diff --git a/ann_benchmarks/results.py b/ann_benchmarks/results.py
index 5ad801e51..e11540275 100644
--- a/ann_benchmarks/results.py
+++ b/ann_benchmarks/results.py
@@ -7,55 +7,51 @@
 import traceback
 
 
-def get_result_filename(dataset=None, count=None, definition=None,
-                        query_arguments=None, batch_mode=False):
-    d = ['results']
+def get_result_filename(dataset=None, count=None, definition=None, query_arguments=None, batch_mode=False):
+    d = ["results"]
     if dataset:
         d.append(dataset)
     if count:
         d.append(str(count))
     if definition:
-        d.append(definition.algorithm + ('-batch' if batch_mode else ''))
+        d.append(definition.algorithm + ("-batch" if batch_mode else ""))
         data = definition.arguments + query_arguments
-        d.append(re.sub(r'\W+', '_', json.dumps(data, sort_keys=True))
-                 .strip('_') + ".hdf5")
+        d.append(re.sub(r"\W+", "_", json.dumps(data, sort_keys=True)).strip("_") + ".hdf5")
     return os.path.join(*d)
 
 
-def store_results(dataset, count, definition, query_arguments, attrs, results,
-                  batch):
-    fn = get_result_filename(
-        dataset, count, definition, query_arguments, batch)
+def store_results(dataset, count, definition, query_arguments, attrs, results, batch):
+    fn = get_result_filename(dataset, count, definition, query_arguments, batch)
     head, tail = os.path.split(fn)
     if not os.path.isdir(head):
         os.makedirs(head)
-    f = h5py.File(fn, 'w')
+    f = h5py.File(fn, "w")
     for k, v in attrs.items():
         f.attrs[k] = v
-    times = f.create_dataset('times', (len(results),), 'f')
-    neighbors = f.create_dataset('neighbors', (len(results), count), 'i')
-    distances = f.create_dataset('distances', (len(results), count), 'f')
+    times = f.create_dataset("times", (len(results),), "f")
+    neighbors = f.create_dataset("neighbors", (len(results), count), "i")
+    distances = f.create_dataset("distances", (len(results), count), "f")
     for i, (time, ds) in enumerate(results):
         times[i] = time
         neighbors[i] = [n for n, d in ds] + [-1] * (count - len(ds))
-        distances[i] = [d for n, d in ds] + [float('inf')] * (count - len(ds))
+        distances[i] = [d for n, d in ds] + [float("inf")] * (count - len(ds))
     f.close()
 
 
 def load_all_results(dataset=None, count=None, batch_mode=False):
     for root, _, files in os.walk(get_result_filename(dataset, count)):
         for fn in files:
-            if os.path.splitext(fn)[-1] != '.hdf5':
+            if os.path.splitext(fn)[-1] != ".hdf5":
                 continue
             try:
-                f = h5py.File(os.path.join(root, fn), 'r+')
+                f = h5py.File(os.path.join(root, fn), "r+")
                 properties = dict(f.attrs)
-                if batch_mode != properties['batch_mode']:
+                if batch_mode != properties["batch_mode"]:
                     continue
                 yield properties, f
                 f.close()
             except:
-                print('Was unable to read', fn)
+                print("Was unable to read", fn)
                 traceback.print_exc()
 
 
@@ -63,5 +59,5 @@ def get_unique_algorithms():
     algorithms = set()
     for batch_mode in [False, True]:
         for properties, _ in load_all_results(batch_mode=batch_mode):
-            algorithms.add(properties['algo'])
+            algorithms.add(properties["algo"])
     return algorithms
diff --git a/ann_benchmarks/runner.py b/ann_benchmarks/runner.py
index 8aaddc5cd..a0ab5f457 100644
--- a/ann_benchmarks/runner.py
+++ b/ann_benchmarks/runner.py
@@ -11,22 +11,20 @@
 import numpy
 import psutil
 
-from ann_benchmarks.algorithms.definitions import (Definition,
-                                                   instantiate_algorithm)
+from ann_benchmarks.algorithms.definitions import Definition, instantiate_algorithm
 from ann_benchmarks.datasets import get_dataset, DATASETS
 from ann_benchmarks.distance import metrics, dataset_transform
 from ann_benchmarks.results import store_results
 
 
-def run_individual_query(algo, X_train, X_test, distance, count, run_count,
-                         batch):
-    prepared_queries = \
-        (batch and hasattr(algo, "prepare_batch_query")) or \
-        ((not batch) and hasattr(algo, "prepare_query"))
+def run_individual_query(algo, X_train, X_test, distance, count, run_count, batch):
+    prepared_queries = (batch and hasattr(algo, "prepare_batch_query")) or (
+        (not batch) and hasattr(algo, "prepare_query")
+    )
 
-    best_search_time = float('inf')
+    best_search_time = float("inf")
     for i in range(run_count):
-        print('Run %d/%d...' % (i + 1, run_count))
+        print("Run %d/%d..." % (i + 1, run_count))
         # a bit dumb but can't be a scalar since of Python's scoping rules
         n_items_processed = [0]
 
@@ -35,20 +33,23 @@ def single_query(v):
                 algo.prepare_query(v, count)
                 start = time.time()
                 algo.run_prepared_query()
-                total = (time.time() - start)
+                total = time.time() - start
                 candidates = algo.get_prepared_query_results()
             else:
                 start = time.time()
                 candidates = algo.query(v, count)
-                total = (time.time() - start)
-            candidates = [(int(idx), float(metrics[distance]['distance'](v, X_train[idx])))  # noqa
-                          for idx in candidates]
+                total = time.time() - start
+            candidates = [
+                (int(idx), float(metrics[distance]["distance"](v, X_train[idx]))) for idx in candidates  # noqa
+            ]
             n_items_processed[0] += 1
             if n_items_processed[0] % 1000 == 0:
-                print('Processed %d/%d queries...' % (n_items_processed[0], len(X_test)))
+                print("Processed %d/%d queries..." % (n_items_processed[0], len(X_test)))
             if len(candidates) > count:
-                print('warning: algorithm %s returned %d results, but count'
-                      ' is only %d)' % (algo, len(candidates), count))
+                print(
+                    "warning: algorithm %s returned %d results, but count"
+                    " is only %d)" % (algo, len(candidates), count)
+                )
             return (total, candidates)
 
         def batch_query(X):
@@ -56,15 +57,16 @@ def batch_query(X):
                 algo.prepare_batch_query(X, count)
                 start = time.time()
                 algo.run_batch_query()
-                total = (time.time() - start)
+                total = time.time() - start
             else:
                 start = time.time()
                 algo.batch_query(X, count)
-                total = (time.time() - start)
+                total = time.time() - start
             results = algo.get_batch_results()
-            candidates = [[(int(idx), float(metrics[distance]['distance'](v, X_train[idx])))  # noqa
-                           for idx in single_results]
-                          for v, single_results in zip(X, results)]
+            candidates = [
+                [(int(idx), float(metrics[distance]["distance"](v, X_train[idx]))) for idx in single_results]  # noqa
+                for v, single_results in zip(X, results)
+            ]
             return [(total / float(len(X)), v) for v in candidates]
 
         if batch:
@@ -87,7 +89,7 @@ def batch_query(X):
         "name": str(algo),
         "run_count": run_count,
         "distance": distance,
-        "count": int(count)
+        "count": int(count),
     }
     additional = algo.get_additional()
     for k in additional:
@@ -97,18 +99,23 @@ def batch_query(X):
 
 def run(definition, dataset, count, run_count, batch):
     algo = instantiate_algorithm(definition)
-    assert not definition.query_argument_groups \
-           or hasattr(algo, "set_query_arguments"), """\
+    assert not definition.query_argument_groups or hasattr(
+        algo, "set_query_arguments"
+    ), """\
 error: query argument groups have been specified for %s.%s(%s), but the \
 algorithm instantiated from it does not implement the set_query_arguments \
-function""" % (definition.module, definition.constructor, definition.arguments)
+function""" % (
+        definition.module,
+        definition.constructor,
+        definition.arguments,
+    )
 
     D, dimension = get_dataset(dataset)
-    X_train = numpy.array(D['train'])
-    X_test = numpy.array(D['test'])
-    distance = D.attrs['distance']
-    print('got a train set of size (%d * %d)' % (X_train.shape[0], dimension))
-    print('got %d queries' % len(X_test))
+    X_train = numpy.array(D["train"])
+    X_test = numpy.array(D["test"])
+    distance = D.attrs["distance"]
+    print("got a train set of size (%d * %d)" % (X_train.shape[0], dimension))
+    print("got %d queries" % len(X_test))
 
     X_train, X_test = dataset_transform(D)
 
@@ -121,8 +128,8 @@ def run(definition, dataset, count, run_count, batch):
         algo.fit(X_train)
         build_time = time.time() - t0
         index_size = algo.get_memory_usage() - memory_usage_before
-        print('Built index in', build_time)
-        print('Index size: ', index_size)
+        print("Built index in", build_time)
+        print("Index size: ", index_size)
 
         query_argument_groups = definition.query_argument_groups
         # Make sure that algorithms with no query argument groups still get run
@@ -131,68 +138,49 @@ def run(definition, dataset, count, run_count, batch):
             query_argument_groups = [[]]
 
         for pos, query_arguments in enumerate(query_argument_groups, 1):
-            print("Running query argument group %d of %d..." %
-                  (pos, len(query_argument_groups)))
+            print("Running query argument group %d of %d..." % (pos, len(query_argument_groups)))
             if query_arguments:
                 algo.set_query_arguments(*query_arguments)
-            descriptor, results = run_individual_query(
-                algo, X_train, X_test, distance, count, run_count, batch)
+            descriptor, results = run_individual_query(algo, X_train, X_test, distance, count, run_count, batch)
             descriptor["build_time"] = build_time
             descriptor["index_size"] = index_size
             descriptor["algo"] = definition.algorithm
             descriptor["dataset"] = dataset
-            store_results(dataset, count, definition,
-                          query_arguments, descriptor, results, batch)
+            store_results(dataset, count, definition, query_arguments, descriptor, results, batch)
     finally:
         algo.done()
 
 
 def run_from_cmdline():
-    parser = argparse.ArgumentParser('''
+    parser = argparse.ArgumentParser(
+        """
 
             NOTICE: You probably want to run.py rather than this script.
 
-''')
-    parser.add_argument(
-        '--dataset',
-        choices=DATASETS.keys(),
-        help='Dataset to benchmark on.',
-        required=True)
-    parser.add_argument(
-        '--algorithm',
-        help='Name of algorithm for saving the results.',
-        required=True)
-    parser.add_argument(
-        '--module',
-        help='Python module containing algorithm. E.g. "ann_benchmarks.algorithms.annoy"',
-        required=True)
+"""
+    )
+    parser.add_argument("--dataset", choices=DATASETS.keys(), help="Dataset to benchmark on.", required=True)
+    parser.add_argument("--algorithm", help="Name of algorithm for saving the results.", required=True)
     parser.add_argument(
-        '--constructor',
-        help='Constructer to load from modulel. E.g. "Annoy"',
-        required=True)
+        "--module", help='Python module containing algorithm. E.g. "ann_benchmarks.algorithms.annoy"', required=True
+    )
+    parser.add_argument("--constructor", help='Constructer to load from modulel. E.g. "Annoy"', required=True)
     parser.add_argument(
-        '--count',
-        help='K: Number of nearest neighbours for the algorithm to return.',
-        required=True,
-        type=int)
+        "--count", help="K: Number of nearest neighbours for the algorithm to return.", required=True, type=int
+    )
     parser.add_argument(
-        '--runs',
-        help='Number of times to run the algorihm. Will use the fastest run-time over the bunch.',
+        "--runs",
+        help="Number of times to run the algorihm. Will use the fastest run-time over the bunch.",
         required=True,
-        type=int)
+        type=int,
+    )
     parser.add_argument(
-        '--batch',
+        "--batch",
         help='If flag included, algorithms will be run in batch mode, rather than "individual query" mode.',
-        action='store_true')
-    parser.add_argument(
-        'build',
-        help='JSON of arguments to pass to the constructor. E.g. ["angular", 100]'
-        )
-    parser.add_argument(
-        'queries',
-        help='JSON of arguments to pass to the queries. E.g. [100]',
-        nargs='*',
-        default=[])
+        action="store_true",
+    )
+    parser.add_argument("build", help='JSON of arguments to pass to the constructor. E.g. ["angular", 100]')
+    parser.add_argument("queries", help="JSON of arguments to pass to the queries. E.g. [100]", nargs="*", default=[])
     args = parser.parse_args()
     algo_args = json.loads(args.build)
     print(algo_args)
@@ -205,21 +193,28 @@ def run_from_cmdline():
         constructor=args.constructor,
         arguments=algo_args,
         query_argument_groups=query_args,
-        disabled=False
+        disabled=False,
     )
     run(definition, args.dataset, args.count, args.runs, args.batch)
 
 
-def run_docker(definition, dataset, count, runs, timeout, batch, cpu_limit,
-               mem_limit=None):
-    cmd = ['--dataset', dataset,
-           '--algorithm', definition.algorithm,
-           '--module', definition.module,
-           '--constructor', definition.constructor,
-           '--runs', str(runs),
-           '--count', str(count)]
+def run_docker(definition, dataset, count, runs, timeout, batch, cpu_limit, mem_limit=None):
+    cmd = [
+        "--dataset",
+        dataset,
+        "--algorithm",
+        definition.algorithm,
+        "--module",
+        definition.module,
+        "--constructor",
+        definition.constructor,
+        "--runs",
+        str(runs),
+        "--count",
+        str(count),
+    ]
     if batch:
-        cmd += ['--batch']
+        cmd += ["--batch"]
     cmd.append(json.dumps(definition.arguments))
     cmd += [json.dumps(qag) for qag in definition.query_argument_groups]
 
@@ -231,24 +226,24 @@ def run_docker(definition, dataset, count, runs, timeout, batch, cpu_limit,
         definition.docker_tag,
         cmd,
         volumes={
-            os.path.abspath('ann_benchmarks'):
-                {'bind': '/home/app/ann_benchmarks', 'mode': 'ro'},
-            os.path.abspath('data'):
-                {'bind': '/home/app/data', 'mode': 'ro'},
-            os.path.abspath('results'):
-                {'bind': '/home/app/results', 'mode': 'rw'},
+            os.path.abspath("ann_benchmarks"): {"bind": "/home/app/ann_benchmarks", "mode": "ro"},
+            os.path.abspath("data"): {"bind": "/home/app/data", "mode": "ro"},
+            os.path.abspath("results"): {"bind": "/home/app/results", "mode": "rw"},
         },
         cpuset_cpus=cpu_limit,
         mem_limit=mem_limit,
-        detach=True)
+        detach=True,
+    )
     logger = logging.getLogger(f"annb.{container.short_id}")
 
-    logger.info('Created container %s: CPU limit %s, mem limit %s, timeout %d, command %s' % \
-                (container.short_id, cpu_limit, mem_limit, timeout, cmd))
+    logger.info(
+        "Created container %s: CPU limit %s, mem limit %s, timeout %d, command %s"
+        % (container.short_id, cpu_limit, mem_limit, timeout, cmd)
+    )
 
     def stream_logs():
         for line in container.logs(stream=True):
-            logger.info(colors.color(line.decode().rstrip(), fg='blue'))
+            logger.info(colors.color(line.decode().rstrip(), fg="blue"))
 
     t = threading.Thread(target=stream_logs, daemon=True)
     t.start()
@@ -257,21 +252,22 @@ def stream_logs():
         return_value = container.wait(timeout=timeout)
         _handle_container_return_value(return_value, container, logger)
     except:
-        logger.error('Container.wait for container %s failed with exception' % container.short_id)
+        logger.error("Container.wait for container %s failed with exception" % container.short_id)
         traceback.print_exc()
     finally:
         container.remove(force=True)
 
+
 def _handle_container_return_value(return_value, container, logger):
-    base_msg = 'Child process for container %s' % (container.short_id)
-    if type(return_value) is dict: # The return value from container.wait changes from int to dict in docker 3.0.0
-        error_msg = return_value['Error']
-        exit_code = return_value['StatusCode']
-        msg = base_msg + 'returned exit code %d with message %s' %(exit_code, error_msg)
-    else: 
+    base_msg = "Child process for container %s" % (container.short_id)
+    if type(return_value) is dict:  # The return value from container.wait changes from int to dict in docker 3.0.0
+        error_msg = return_value["Error"]
+        exit_code = return_value["StatusCode"]
+        msg = base_msg + "returned exit code %d with message %s" % (exit_code, error_msg)
+    else:
         exit_code = return_value
-        msg = base_msg + 'returned exit code %d' % (exit_code)
+        msg = base_msg + "returned exit code %d" % (exit_code)
 
     if exit_code not in [0, None]:
-        logger.error(colors.color(container.logs().decode(), fg='red'))
+        logger.error(colors.color(container.logs().decode(), fg="red"))
         logger.error(msg)
diff --git a/create_dataset.py b/create_dataset.py
index b9463a8e0..0726b470f 100644
--- a/create_dataset.py
+++ b/create_dataset.py
@@ -3,10 +3,7 @@
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
-    parser.add_argument(
-        '--dataset',
-        choices=DATASETS.keys(),
-        required=True)
+    parser.add_argument("--dataset", choices=DATASETS.keys(), required=True)
     args = parser.parse_args()
     fn = get_dataset_fn(args.dataset)
     DATASETS[args.dataset](fn)
diff --git a/create_website.py b/create_website.py
index 88094775d..7cc6481e1 100644
--- a/create_website.py
+++ b/create_website.py
@@ -1,23 +1,16 @@
 import matplotlib as mpl
-mpl.use('Agg')  # noqa
+
+mpl.use("Agg")  # noqa
 import argparse
 import os
-import json
-import pickle
-import yaml
-import numpy
 import hashlib
 from jinja2 import Environment, FileSystemLoader
 
 from ann_benchmarks import results
 from ann_benchmarks.datasets import get_dataset
-from ann_benchmarks.plotting.plot_variants import (all_plot_variants
-                                                   as plot_variants)
+from ann_benchmarks.plotting.plot_variants import all_plot_variants as plot_variants
 from ann_benchmarks.plotting.metrics import all_metrics as metrics
-from ann_benchmarks.plotting.utils import (get_plot_label, compute_metrics,
-                                           compute_all_metrics,
-                                           create_pointset,
-                                           create_linestyles)
+from ann_benchmarks.plotting.utils import get_plot_label, compute_all_metrics, create_pointset, create_linestyles
 import plot
 
 colors = [
@@ -29,7 +22,7 @@
     "rgba(227,26,28,1)",
     "rgba(253,191,111,1)",
     "rgba(255,127,0,1)",
-    "rgba(202,178,214,1)"
+    "rgba(202,178,214,1)",
 ]
 
 point_styles = {
@@ -43,17 +36,19 @@
 
 def convert_color(color):
     r, g, b, a = color
-    return "rgba(%(r)d, %(g)d, %(b)d, %(a)d)" % {
-        "r": r * 255, "g": g * 255, "b": b * 255, "a": a}
+    return "rgba(%(r)d, %(g)d, %(b)d, %(a)d)" % {"r": r * 255, "g": g * 255, "b": b * 255, "a": a}
 
 
 def convert_linestyle(ls):
     new_ls = {}
     for algo in ls.keys():
         algostyle = ls[algo]
-        new_ls[algo] = (convert_color(algostyle[0]),
-                        convert_color(algostyle[1]),
-                        algostyle[2], point_styles[algostyle[3]])
+        new_ls[algo] = (
+            convert_color(algostyle[0]),
+            convert_color(algostyle[1]),
+            algostyle[2],
+            point_styles[algostyle[3]],
+        )
     return new_ls
 
 
@@ -74,8 +69,7 @@ def get_distance_from_desc(desc):
 
 
 def get_dataset_label(desc):
-    return "{} (k = {})".format(get_dataset_from_desc(desc),
-                                get_count_from_desc(desc))
+    return "{} (k = {})".format(get_dataset_from_desc(desc), get_count_from_desc(desc))
 
 
 def directory_path(s):
@@ -94,132 +88,115 @@ def prepare_data(data, xn, yn):
 
 parser = argparse.ArgumentParser()
 parser.add_argument(
-    '--plottype',
-    help='Generate only the plots specified',
-    nargs='*',
+    "--plottype",
+    help="Generate only the plots specified",
+    nargs="*",
     choices=plot_variants.keys(),
-    default=plot_variants.keys())
-parser.add_argument(
-    '--outputdir',
-    help='Select output directory',
-    default='.',
-    type=directory_path,
-    action='store')
-parser.add_argument(
-    '--latex',
-    help='generates latex code for each plot',
-    action='store_true')
-parser.add_argument(
-    '--scatter',
-    help='create scatterplot for data',
-    action='store_true')
-parser.add_argument(
-    '--recompute',
-    help='Clears the cache and recomputes the metrics',
-    action='store_true')
+    default=plot_variants.keys(),
+)
+parser.add_argument("--outputdir", help="Select output directory", default=".", type=directory_path, action="store")
+parser.add_argument("--latex", help="generates latex code for each plot", action="store_true")
+parser.add_argument("--scatter", help="create scatterplot for data", action="store_true")
+parser.add_argument("--recompute", help="Clears the cache and recomputes the metrics", action="store_true")
 args = parser.parse_args()
 
 
 def get_lines(all_data, xn, yn, render_all_points):
-    """ For each algorithm run on a dataset, obtain its performance
+    """For each algorithm run on a dataset, obtain its performance
     curve coords."""
     plot_data = []
     for algo in sorted(all_data.keys(), key=lambda x: x.lower()):
-        xs, ys, ls, axs, ays, als = \
-            create_pointset(prepare_data(all_data[algo], xn, yn), xn, yn)
+        xs, ys, ls, axs, ays, als = create_pointset(prepare_data(all_data[algo], xn, yn), xn, yn)
         if render_all_points:
             xs, ys, ls = axs, ays, als
-        plot_data.append({"name": algo, "coords": zip(xs, ys), "labels": ls,
-                          "scatter": render_all_points})
+        plot_data.append({"name": algo, "coords": zip(xs, ys), "labels": ls, "scatter": render_all_points})
     return plot_data
 
 
-def create_plot(all_data, xn, yn, linestyle, j2_env, additional_label="",
-                plottype="line"):
+def create_plot(all_data, xn, yn, linestyle, j2_env, additional_label="", plottype="line"):
     xm, ym = (metrics[xn], metrics[yn])
     render_all_points = plottype == "bubble"
     plot_data = get_lines(all_data, xn, yn, render_all_points)
-    latex_code = j2_env.get_template("latex.template").\
-        render(plot_data=plot_data, caption=get_plot_label(xm, ym),
-               xlabel=xm["description"], ylabel=ym["description"])
+    latex_code = j2_env.get_template("latex.template").render(
+        plot_data=plot_data, caption=get_plot_label(xm, ym), xlabel=xm["description"], ylabel=ym["description"]
+    )
     plot_data = get_lines(all_data, xn, yn, render_all_points)
-    button_label = hashlib.sha224((get_plot_label(xm, ym) + additional_label)
-                                  .encode("utf-8")).hexdigest()
-    return j2_env.get_template("chartjs.template").\
-        render(args=args, latex_code=latex_code, button_label=button_label,
-               data_points=plot_data,
-               xlabel=xm["description"], ylabel=ym["description"],
-               plottype=plottype, plot_label=get_plot_label(xm, ym),
-               label=additional_label, linestyle=linestyle,
-               render_all_points=render_all_points)
+    button_label = hashlib.sha224((get_plot_label(xm, ym) + additional_label).encode("utf-8")).hexdigest()
+    return j2_env.get_template("chartjs.template").render(
+        args=args,
+        latex_code=latex_code,
+        button_label=button_label,
+        data_points=plot_data,
+        xlabel=xm["description"],
+        ylabel=ym["description"],
+        plottype=plottype,
+        plot_label=get_plot_label(xm, ym),
+        label=additional_label,
+        linestyle=linestyle,
+        render_all_points=render_all_points,
+    )
 
 
 def build_detail_site(data, label_func, j2_env, linestyles, batch=False):
     for (name, runs) in data.items():
         print("Building '%s'" % name)
-        all_runs = runs.keys()
+        runs.keys()
         label = label_func(name)
         data = {"normal": [], "scatter": []}
 
         for plottype in args.plottype:
             xn, yn = plot_variants[plottype]
-            data["normal"].append(create_plot(
-                runs, xn, yn, convert_linestyle(linestyles), j2_env))
+            data["normal"].append(create_plot(runs, xn, yn, convert_linestyle(linestyles), j2_env))
             if args.scatter:
                 data["scatter"].append(
-                    create_plot(runs, xn, yn, convert_linestyle(linestyles),
-                                j2_env, "Scatterplot ", "bubble"))
+                    create_plot(runs, xn, yn, convert_linestyle(linestyles), j2_env, "Scatterplot ", "bubble")
+                )
 
         # create png plot for summary page
         data_for_plot = {}
         for k in runs.keys():
-            data_for_plot[k] = prepare_data(runs[k], 'k-nn', 'qps')
+            data_for_plot[k] = prepare_data(runs[k], "k-nn", "qps")
         plot.create_plot(
-            data_for_plot, False,
-            'linear', 'log', 'k-nn', 'qps',
-            args.outputdir + name + '.png',
-            linestyles, batch)
-        output_path = \
-            args.outputdir + name + '.html'
+            data_for_plot, False, "linear", "log", "k-nn", "qps", args.outputdir + name + ".png", linestyles, batch
+        )
+        output_path = args.outputdir + name + ".html"
         with open(output_path, "w") as text_file:
-            text_file.write(j2_env.get_template("detail_page.html").
-                            render(title=label, plot_data=data,
-                                   args=args, batch=batch))
+            text_file.write(
+                j2_env.get_template("detail_page.html").render(title=label, plot_data=data, args=args, batch=batch)
+            )
 
 
 def build_index_site(datasets, algorithms, j2_env, file_name):
-    dataset_data = {'batch': [], 'non-batch': []}
-    for mode in ['batch', 'non-batch']:
-        distance_measures = sorted(
-            set([get_distance_from_desc(e) for e in datasets[mode].keys()]))
-        sorted_datasets = sorted(
-            set([get_dataset_from_desc(e) for e in datasets[mode].keys()]))
+    dataset_data = {"batch": [], "non-batch": []}
+    for mode in ["batch", "non-batch"]:
+        distance_measures = sorted(set([get_distance_from_desc(e) for e in datasets[mode].keys()]))
+        sorted_datasets = sorted(set([get_dataset_from_desc(e) for e in datasets[mode].keys()]))
 
         for dm in distance_measures:
             d = {"name": dm.capitalize(), "entries": []}
             for ds in sorted_datasets:
-                matching_datasets = [e for e in datasets[mode].keys()
-                                     if get_dataset_from_desc(e) == ds and  # noqa
-                                     get_distance_from_desc(e) == dm]
-                sorted_matches = sorted(
-                    matching_datasets,
-                    key=lambda e: int(get_count_from_desc(e)))
+                matching_datasets = [
+                    e
+                    for e in datasets[mode].keys()
+                    if get_dataset_from_desc(e) == ds and get_distance_from_desc(e) == dm  # noqa
+                ]
+                sorted_matches = sorted(matching_datasets, key=lambda e: int(get_count_from_desc(e)))
                 for idd in sorted_matches:
-                    d["entries"].append(
-                        {"name": idd, "desc": get_dataset_label(idd)})
+                    d["entries"].append({"name": idd, "desc": get_dataset_label(idd)})
             dataset_data[mode].append(d)
 
     with open(args.outputdir + "index.html", "w") as text_file:
-        text_file.write(j2_env.get_template("summary.html").
-                        render(title="ANN-Benchmarks",
-                               dataset_with_distances=dataset_data,
-                               algorithms=algorithms))
+        text_file.write(
+            j2_env.get_template("summary.html").render(
+                title="ANN-Benchmarks", dataset_with_distances=dataset_data, algorithms=algorithms
+            )
+        )
 
 
 def load_all_results():
     """Read all result files and compute all metrics"""
-    all_runs_by_dataset = {'batch': {}, 'non-batch': {}}
-    all_runs_by_algorithm = {'batch': {}, 'non-batch': {}}
+    all_runs_by_dataset = {"batch": {}, "non-batch": {}}
+    all_runs_by_algorithm = {"batch": {}, "non-batch": {}}
     cached_true_dist = []
     old_sdn = None
     for mode in ["non-batch", "batch"]:
@@ -230,15 +207,12 @@ def load_all_results():
                 cached_true_dist = list(dataset["distances"])
                 old_sdn = sdn
             algo_ds = get_dataset_label(sdn)
-            desc_suffix = ("-batch" if mode == "batch" else "")
+            desc_suffix = "-batch" if mode == "batch" else ""
             algo = properties["algo"] + desc_suffix
             sdn += desc_suffix
-            ms = compute_all_metrics(
-                cached_true_dist, f, properties, args.recompute)
-            all_runs_by_algorithm[mode].setdefault(
-                algo, {}).setdefault(algo_ds, []).append(ms)
-            all_runs_by_dataset[mode].setdefault(
-                sdn, {}).setdefault(algo, []).append(ms)
+            ms = compute_all_metrics(cached_true_dist, f, properties, args.recompute)
+            all_runs_by_algorithm[mode].setdefault(algo, {}).setdefault(algo_ds, []).append(ms)
+            all_runs_by_dataset[mode].setdefault(sdn, {}).setdefault(algo, []).append(ms)
 
     return (all_runs_by_dataset, all_runs_by_algorithm)
 
@@ -246,27 +220,17 @@ def load_all_results():
 j2_env = Environment(loader=FileSystemLoader("./templates/"), trim_blocks=True)
 j2_env.globals.update(zip=zip, len=len)
 runs_by_ds, runs_by_algo = load_all_results()
-dataset_names = [get_dataset_label(x) for x in list(
-    runs_by_ds['batch'].keys()) + list(runs_by_ds['non-batch'].keys())]
-algorithm_names = list(runs_by_algo['batch'].keys(
-)) + list(runs_by_algo['non-batch'].keys())
+dataset_names = [get_dataset_label(x) for x in list(runs_by_ds["batch"].keys()) + list(runs_by_ds["non-batch"].keys())]
+algorithm_names = list(runs_by_algo["batch"].keys()) + list(runs_by_algo["non-batch"].keys())
 
-linestyles = {**create_linestyles(dataset_names),
-              **create_linestyles(algorithm_names)}
+linestyles = {**create_linestyles(dataset_names), **create_linestyles(algorithm_names)}
 
-build_detail_site(
-    runs_by_ds['non-batch'],
-    lambda label: get_dataset_label(label), j2_env, linestyles, False)
+build_detail_site(runs_by_ds["non-batch"], lambda label: get_dataset_label(label), j2_env, linestyles, False)
 
-build_detail_site(
-    runs_by_ds['batch'],
-    lambda label: get_dataset_label(label), j2_env, linestyles, True)
+build_detail_site(runs_by_ds["batch"], lambda label: get_dataset_label(label), j2_env, linestyles, True)
 
-build_detail_site(
-    runs_by_algo['non-batch'],
-    lambda x: x, j2_env, linestyles, False)
+build_detail_site(runs_by_algo["non-batch"], lambda x: x, j2_env, linestyles, False)
 
-build_detail_site(
-    runs_by_algo['batch'], lambda x: x, j2_env, linestyles, True)
+build_detail_site(runs_by_algo["batch"], lambda x: x, j2_env, linestyles, True)
 
 build_index_site(runs_by_ds, runs_by_algo, j2_env, "index.html")
diff --git a/data_export.py b/data_export.py
index d253219b6..343f3acc3 100644
--- a/data_export.py
+++ b/data_export.py
@@ -2,19 +2,13 @@
 import csv
 
 from ann_benchmarks.datasets import DATASETS, get_dataset
-from ann_benchmarks.plotting.utils  import compute_metrics_all_runs
+from ann_benchmarks.plotting.utils import compute_metrics_all_runs
 from ann_benchmarks.results import load_all_results
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
-    parser.add_argument(
-        '--output',
-        help='Path to the output file',
-        required=True)
-    parser.add_argument(
-        '--recompute',
-        action='store_true',
-        help='Recompute metrics')
+    parser.add_argument("--output", help="Path to the output file", required=True)
+    parser.add_argument("--recompute", action="store_true", help="Recompute metrics")
     args = parser.parse_args()
 
     datasets = DATASETS.keys()
@@ -26,13 +20,12 @@
             dataset, _ = get_dataset(dataset_name)
             results = compute_metrics_all_runs(dataset, results, args.recompute)
             for res in results:
-                res['dataset'] = dataset_name
+                res["dataset"] = dataset_name
                 dfs.append(res)
     if len(dfs) > 0:
-        with open(args.output, 'w', newline='') as csvfile:
+        with open(args.output, "w", newline="") as csvfile:
             names = list(dfs[0].keys())
             writer = csv.DictWriter(csvfile, fieldnames=names)
             writer.writeheader()
             for res in dfs:
                 writer.writerow(res)
-
diff --git a/install.py b/install.py
index 4e41c0892..178cb81a9 100644
--- a/install.py
+++ b/install.py
@@ -7,19 +7,20 @@
 
 
 def build(library, args):
-    print('Building %s...' % library)
+    print("Building %s..." % library)
     if args is not None and len(args) != 0:
         q = " ".join(["--build-arg " + x.replace(" ", "\\ ") for x in args])
     else:
         q = ""
-    
+
     try:
         subprocess.check_call(
-            'docker build %s --rm -t ann-benchmarks-%s -f'
-            ' install/Dockerfile.%s .' % (q, library, library), shell=True)
-        return {library: 'success'}
+            "docker build %s --rm -t ann-benchmarks-%s -f" " install/Dockerfile.%s ." % (q, library, library),
+            shell=True,
+        )
+        return {library: "success"}
     except subprocess.CalledProcessError:
-        return {library: 'fail'}
+        return {library: "fail"}
 
 
 def build_multiprocess(args):
@@ -27,37 +28,27 @@ def build_multiprocess(args):
 
 
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser(
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-    parser.add_argument(
-        "--proc",
-        default=1,
-        type=positive_int,
-        help="the number of process to build docker images")
-    parser.add_argument(
-        '--algorithm',
-        metavar='NAME',
-        help='build only the named algorithm image',
-        default=None)
-    parser.add_argument(
-        '--build-arg',
-        help='pass given args to all docker builds',
-        nargs="+")
+    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+    parser.add_argument("--proc", default=1, type=positive_int, help="the number of process to build docker images")
+    parser.add_argument("--algorithm", metavar="NAME", help="build only the named algorithm image", default=None)
+    parser.add_argument("--build-arg", help="pass given args to all docker builds", nargs="+")
     args = parser.parse_args()
 
-    print('Building base image...')
+    print("Building base image...")
     subprocess.check_call(
-        'docker build \
-        --rm -t ann-benchmarks -f install/Dockerfile .', shell=True)
+        "docker build \
+        --rm -t ann-benchmarks -f install/Dockerfile .",
+        shell=True,
+    )
 
     if args.algorithm:
         tags = [args.algorithm]
-    elif os.getenv('LIBRARY'):
-        tags = [os.getenv('LIBRARY')]
+    elif os.getenv("LIBRARY"):
+        tags = [os.getenv("LIBRARY")]
     else:
-        tags = [fn.split('.')[-1] for fn in os.listdir('install') if fn.startswith('Dockerfile.')]
+        tags = [fn.split(".")[-1] for fn in os.listdir("install") if fn.startswith("Dockerfile.")]
 
-    print('Building algorithm images... with (%d) processes' % args.proc)
+    print("Building algorithm images... with (%d) processes" % args.proc)
 
     if args.proc == 1:
         install_status = [build(tag, args.build_arg) for tag in tags]
@@ -67,10 +58,10 @@ def build_multiprocess(args):
         pool.close()
         pool.join()
 
-    print('\n\nInstall Status:\n' + '\n'.join(str(algo) for algo in install_status))
+    print("\n\nInstall Status:\n" + "\n".join(str(algo) for algo in install_status))
 
     # Exit 1 if any of the installations fail.
     for x in install_status:
-        for (k,v) in x.items():
-            if v == 'fail':
+        for (k, v) in x.items():
+            if v == "fail":
                 sys.exit(1)
diff --git a/plot.py b/plot.py
index f9784dbb6..660060d4e 100644
--- a/plot.py
+++ b/plot.py
@@ -1,21 +1,17 @@
-import os
 import matplotlib as mpl
-mpl.use('Agg')  # noqa
+
+mpl.use("Agg")  # noqa
 import matplotlib.pyplot as plt
 import numpy as np
 import argparse
 
 from ann_benchmarks.datasets import get_dataset
-from ann_benchmarks.algorithms.definitions import get_definitions
 from ann_benchmarks.plotting.metrics import all_metrics as metrics
-from ann_benchmarks.plotting.utils import (get_plot_label, compute_metrics,
-                                           create_linestyles, create_pointset)
-from ann_benchmarks.results import (store_results, load_all_results,
-                                    get_unique_algorithms)
+from ann_benchmarks.plotting.utils import get_plot_label, compute_metrics, create_linestyles, create_pointset
+from ann_benchmarks.results import load_all_results, get_unique_algorithms
 
 
-def create_plot(all_data, raw, x_scale, y_scale, xn, yn, fn_out, linestyles,
-                batch):
+def create_plot(all_data, raw, x_scale, y_scale, xn, yn, fn_out, linestyles, batch):
     xm, ym = (metrics[xn], metrics[yn])
     # Now generate each plot
     handles = []
@@ -26,134 +22,119 @@ def create_plot(all_data, raw, x_scale, y_scale, xn, yn, fn_out, linestyles,
     def mean_y(algo):
         xs, ys, ls, axs, ays, als = create_pointset(all_data[algo], xn, yn)
         return -np.log(np.array(ys)).mean()
+
     # Find range for logit x-scale
     min_x, max_x = 1, 0
     for algo in sorted(all_data.keys(), key=mean_y):
         xs, ys, ls, axs, ays, als = create_pointset(all_data[algo], xn, yn)
-        min_x = min([min_x]+[x for x in xs if x > 0])
-        max_x = max([max_x]+[x for x in xs if x < 1])
+        min_x = min([min_x] + [x for x in xs if x > 0])
+        max_x = max([max_x] + [x for x in xs if x < 1])
         color, faded, linestyle, marker = linestyles[algo]
-        handle, = plt.plot(xs, ys, '-', label=algo, color=color,
-                           ms=7, mew=3, lw=3, linestyle=linestyle,
-                           marker=marker)
+        (handle,) = plt.plot(
+            xs, ys, "-", label=algo, color=color, ms=7, mew=3, lw=3, linestyle=linestyle, marker=marker
+        )
         handles.append(handle)
         if raw:
-            handle2, = plt.plot(axs, ays, '-', label=algo, color=faded,
-                                ms=5, mew=2, lw=2, linestyle=linestyle,
-                                marker=marker)
+            (handle2,) = plt.plot(
+                axs, ays, "-", label=algo, color=faded, ms=5, mew=2, lw=2, linestyle=linestyle, marker=marker
+            )
         labels.append(algo)
 
     ax = plt.gca()
-    ax.set_ylabel(ym['description'])
-    ax.set_xlabel(xm['description'])
+    ax.set_ylabel(ym["description"])
+    ax.set_xlabel(xm["description"])
     # Custom scales of the type --x-scale a3
-    if x_scale[0] == 'a':
+    if x_scale[0] == "a":
         alpha = float(x_scale[1:])
-        fun = lambda x: 1-(1-x)**(1/alpha)
-        inv_fun = lambda x: 1-(1-x)**alpha
-        ax.set_xscale('function', functions=(fun, inv_fun))
+
+        def fun(x):
+            return 1 - (1 - x) ** (1 / alpha)
+
+        def inv_fun(x):
+            return 1 - (1 - x) ** alpha
+
+        ax.set_xscale("function", functions=(fun, inv_fun))
         if alpha <= 3:
-            ticks = [inv_fun(x) for x in np.arange(0,1.2,.2)]
+            ticks = [inv_fun(x) for x in np.arange(0, 1.2, 0.2)]
             plt.xticks(ticks)
         if alpha > 3:
             from matplotlib import ticker
+
             ax.xaxis.set_major_formatter(ticker.LogitFormatter())
-            #plt.xticks(ticker.LogitLocator().tick_values(min_x, max_x))
-            plt.xticks([0, 1/2, 1-1e-1, 1-1e-2, 1-1e-3, 1-1e-4, 1])
+            # plt.xticks(ticker.LogitLocator().tick_values(min_x, max_x))
+            plt.xticks([0, 1 / 2, 1 - 1e-1, 1 - 1e-2, 1 - 1e-3, 1 - 1e-4, 1])
     # Other x-scales
     else:
         ax.set_xscale(x_scale)
     ax.set_yscale(y_scale)
     ax.set_title(get_plot_label(xm, ym))
-    box = plt.gca().get_position()
+    plt.gca().get_position()
     # plt.gca().set_position([box.x0, box.y0, box.width * 0.8, box.height])
-    ax.legend(handles, labels, loc='center left',
-              bbox_to_anchor=(1, 0.5), prop={'size': 9})
-    plt.grid(b=True, which='major', color='0.65', linestyle='-')
+    ax.legend(handles, labels, loc="center left", bbox_to_anchor=(1, 0.5), prop={"size": 9})
+    plt.grid(b=True, which="major", color="0.65", linestyle="-")
     plt.setp(ax.get_xminorticklabels(), visible=True)
 
     # Logit scale has to be a subset of (0,1)
-    if 'lim' in xm and x_scale != 'logit':
-        x0, x1 = xm['lim']
-        plt.xlim(max(x0,0), min(x1,1))
-    elif x_scale == 'logit':
+    if "lim" in xm and x_scale != "logit":
+        x0, x1 = xm["lim"]
+        plt.xlim(max(x0, 0), min(x1, 1))
+    elif x_scale == "logit":
         plt.xlim(min_x, max_x)
-    if 'lim' in ym:
-        plt.ylim(ym['lim'])
+    if "lim" in ym:
+        plt.ylim(ym["lim"])
 
     # Workaround for bug https://github.com/matplotlib/matplotlib/issues/6789
-    ax.spines['bottom']._adjust_location()
+    ax.spines["bottom"]._adjust_location()
 
-    plt.savefig(fn_out, bbox_inches='tight')
+    plt.savefig(fn_out, bbox_inches="tight")
     plt.close()
 
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
+    parser.add_argument("--dataset", metavar="DATASET", default="glove-100-angular")
+    parser.add_argument("--count", default=10)
     parser.add_argument(
-        '--dataset',
-        metavar="DATASET",
-        default='glove-100-angular')
+        "--definitions", metavar="FILE", help="load algorithm definitions from FILE", default="algos.yaml"
+    )
+    parser.add_argument("--limit", default=-1)
+    parser.add_argument("-o", "--output")
     parser.add_argument(
-        '--count',
-        default=10)
+        "-x", "--x-axis", help="Which metric to use on the X-axis", choices=metrics.keys(), default="k-nn"
+    )
     parser.add_argument(
-        '--definitions',
-        metavar='FILE',
-        help='load algorithm definitions from FILE',
-        default='algos.yaml')
+        "-y", "--y-axis", help="Which metric to use on the Y-axis", choices=metrics.keys(), default="qps"
+    )
     parser.add_argument(
-        '--limit',
-        default=-1)
+        "-X", "--x-scale", help="Scale to use when drawing the X-axis. Typically linear, logit or a2", default="linear"
+    )
     parser.add_argument(
-        '-o', '--output')
-    parser.add_argument(
-        '-x', '--x-axis',
-        help='Which metric to use on the X-axis',
-        choices=metrics.keys(),
-        default="k-nn")
-    parser.add_argument(
-        '-y', '--y-axis',
-        help='Which metric to use on the Y-axis',
-        choices=metrics.keys(),
-        default="qps")
-    parser.add_argument(
-        '-X', '--x-scale',
-        help='Scale to use when drawing the X-axis. Typically linear, logit or a2',
-        default='linear')
-    parser.add_argument(
-        '-Y', '--y-scale',
-        help='Scale to use when drawing the Y-axis',
+        "-Y",
+        "--y-scale",
+        help="Scale to use when drawing the Y-axis",
         choices=["linear", "log", "symlog", "logit"],
-        default='linear')
-    parser.add_argument(
-        '--raw',
-        help='Show raw results (not just Pareto frontier) in faded colours',
-        action='store_true')
-    parser.add_argument(
-        '--batch',
-        help='Plot runs in batch mode',
-        action='store_true')
+        default="linear",
+    )
     parser.add_argument(
-        '--recompute',
-        help='Clears the cache and recomputes the metrics',
-        action='store_true')
+        "--raw", help="Show raw results (not just Pareto frontier) in faded colours", action="store_true"
+    )
+    parser.add_argument("--batch", help="Plot runs in batch mode", action="store_true")
+    parser.add_argument("--recompute", help="Clears the cache and recomputes the metrics", action="store_true")
     args = parser.parse_args()
 
     if not args.output:
-        args.output = 'results/%s.png' % (args.dataset + ('-batch' if args.batch else ''))
-        print('writing output to %s' % args.output)
+        args.output = "results/%s.png" % (args.dataset + ("-batch" if args.batch else ""))
+        print("writing output to %s" % args.output)
 
     dataset, _ = get_dataset(args.dataset)
     count = int(args.count)
     unique_algorithms = get_unique_algorithms()
     results = load_all_results(args.dataset, count, args.batch)
     linestyles = create_linestyles(sorted(unique_algorithms))
-    runs = compute_metrics(np.array(dataset["distances"]),
-                           results, args.x_axis, args.y_axis, args.recompute)
+    runs = compute_metrics(np.array(dataset["distances"]), results, args.x_axis, args.y_axis, args.recompute)
     if not runs:
-        raise Exception('Nothing to plot')
+        raise Exception("Nothing to plot")
 
-    create_plot(runs, args.raw, args.x_scale,
-                args.y_scale, args.x_axis, args.y_axis, args.output,
-                linestyles, args.batch)
+    create_plot(
+        runs, args.raw, args.x_scale, args.y_scale, args.x_axis, args.y_axis, args.output, linestyles, args.batch
+    )