Skip to content

Commit bcbcb5d

Browse files
committed
add performance test for commits
1 parent cca297a commit bcbcb5d

File tree

2 files changed

+78
-0
lines changed

2 files changed

+78
-0
lines changed

examples/git_tester.py

+16
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
from pydriller import Repository
2+
import os
3+
import datetime
4+
os.system("cp examples/speedtest.py examples/speedtest2.py")
5+
for commit in Repository('.', from_tag="v0.5.2").traverse_commits():
6+
print(commit.hash)
7+
print(commit.msg)
8+
9+
os.system(f"git checkout {commit.hash}; rm -rf build; ")
10+
os.system("python -m pip install .")
11+
os.system(f'python examples/speedtest2.py -n "{commit.msg}" -d 4 -t 1')
12+
os.system(f'python examples/speedtest2.py -n "{commit.msg}" -d 64 -t 1')
13+
os.system(f'python examples/speedtest2.py -n "{commit.msg}" -d 128 -t 1')
14+
os.system(f'python examples/speedtest2.py -n "{commit.msg}" -d 4 -t 24')
15+
os.system(f'python examples/speedtest2.py -n "{commit.msg}" -d 128 -t 24')
16+

examples/speedtest.py

+62
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
import hnswlib
2+
import numpy as np
3+
import os.path
4+
import time
5+
import argparse
6+
7+
# Use nargs to specify how many arguments an option should take.
8+
ap = argparse.ArgumentParser()
9+
ap.add_argument('-d')
10+
ap.add_argument('-n')
11+
ap.add_argument('-t')
12+
args = ap.parse_args()
13+
dim = int(args.d)
14+
name = args.n
15+
threads=int(args.t)
16+
num_elements = 1000000 * 4//dim
17+
18+
# Generating sample data
19+
np.random.seed(1)
20+
data = np.float32(np.random.random((num_elements, dim)))
21+
22+
23+
index_path=f'speed_index{dim}.bin'
24+
# Declaring index
25+
p = hnswlib.Index(space='l2', dim=dim) # possible options are l2, cosine or ip
26+
27+
if not os.path.isfile(index_path) :
28+
29+
p.init_index(max_elements=num_elements, ef_construction=100, M=16)
30+
31+
# Controlling the recall by setting ef:
32+
# higher ef leads to better accuracy, but slower search
33+
p.set_ef(10)
34+
35+
# Set number of threads used during batch search/construction
36+
# By default using all available cores
37+
p.set_num_threads(12)
38+
39+
p.add_items(data)
40+
41+
# Serializing and deleting the index:
42+
43+
print("Saving index to '%s'" % index_path)
44+
p.save_index(index_path)
45+
p.set_num_threads(threads)
46+
times=[]
47+
time.sleep(10)
48+
p.set_ef(100)
49+
for _ in range(3):
50+
p.load_index(index_path)
51+
for _ in range(10):
52+
t0=time.time()
53+
labels, distances = p.knn_query(data, k=1)
54+
tt=time.time()-t0
55+
times.append(tt)
56+
print(f"{tt} seconds")
57+
str_out=f"mean time:{np.mean(times)}, median time:{np.median(times)}, std time {np.std(times)} {name}"
58+
print(str_out)
59+
with open (f"log_{dim}_t{threads}.txt","a") as f:
60+
f.write(str_out+"\n")
61+
f.flush()
62+

0 commit comments

Comments
 (0)