@@ -18,7 +18,6 @@ def awesome_cossim_topn(
18
18
lower_bound = 0 ,
19
19
use_threads = False ,
20
20
n_jobs = 1 ,
21
- ntop_is_flexible = False ,
22
21
mem_manager_is_C = False ,
23
22
return_best_topn = False
24
23
):
@@ -35,13 +34,9 @@ def awesome_cossim_topn(
35
34
lower_bound: a threshold that the element of A*B must be greater than
36
35
use_threads: use multi-thread or not
37
36
n_jobs: number of thread, must be >= 1
38
- ntop_is_flexible: (default: False) if True, memory management will be handed
39
- over to C/C++ whenever python's attempt at allocating
40
- memory fails.
41
37
mem_manager_is_C: (default: False) this is mainly for testing purposes. if
42
38
True, will force memory management to be handed over to
43
- C/C++. Should be used only when ntop >= number of columns
44
- of B or ntop_is_flexible=True.
39
+ C/C++.
45
40
return_best_topn: (default: False) if True, will return best_topn together
46
41
with C as a tuple: (C, best_topn)
47
42
@@ -82,58 +77,46 @@ def awesome_cossim_topn(
82
77
return output
83
78
84
79
# filled matrices from here on
85
- indptr = np .empty (M + 1 , dtype = idx_dtype )
80
+ indptr = np .empty (M + 1 , dtype = idx_dtype )
86
81
try :
87
82
indices = np .empty (nnz_max , dtype = idx_dtype )
88
83
data = np .empty (nnz_max , dtype = A .dtype )
89
-
90
84
if mem_manager_is_C : raise MemoryError # This is mainly for testing purposes
91
-
92
85
except MemoryError :
93
86
# if mem_manager_is_C: print('Exception raised! Continuing ...', flush=True)
94
- if ntop_is_flexible or ntop >= N :
95
87
# It is likely you are here because nnz_max is too large. But don't give up just yet!
96
88
# sparse_dot_topn will hand over the memory allocation/management to C++. C++ will
97
89
# grow the memory allocations for these arrays as needed without any need for nnz_max.
98
90
# Note that reallocations could occur causing data to be copied to other locations
99
91
# in memory thus impacting performance
100
- indices = np .empty (0 , dtype = idx_dtype )
101
- data = np .empty (0 , dtype = A .dtype )
102
- if not use_threads :
103
-
104
- indices , data , best_topn = ct .sparse_dot_free (
105
- M , N , np .asarray (A .indptr , dtype = idx_dtype ),
106
- np .asarray (A .indices , dtype = idx_dtype ),
107
- A .data ,
108
- np .asarray (B .indptr , dtype = idx_dtype ),
109
- np .asarray (B .indices , dtype = idx_dtype ),
110
- B .data ,
111
- lower_bound ,
112
- indptr
113
- )
114
- else :
115
-
116
- indices , data , best_topn = ct_thread .sparse_dot_free_threaded (
117
- M , N , np .asarray (A .indptr , dtype = idx_dtype ),
118
- np .asarray (A .indices , dtype = idx_dtype ),
119
- A .data ,
120
- np .asarray (B .indptr , dtype = idx_dtype ),
121
- np .asarray (B .indices , dtype = idx_dtype ),
122
- B .data ,
123
- lower_bound ,
124
- indptr , n_jobs
125
- )
92
+ indices = np .empty (0 , dtype = idx_dtype )
93
+ data = np .empty (0 , dtype = A .dtype )
94
+ if not use_threads :
95
+
96
+ indices , data , best_topn = ct .sparse_dot_free (
97
+ M , N , np .asarray (A .indptr , dtype = idx_dtype ),
98
+ np .asarray (A .indices , dtype = idx_dtype ),
99
+ A .data ,
100
+ np .asarray (B .indptr , dtype = idx_dtype ),
101
+ np .asarray (B .indices , dtype = idx_dtype ),
102
+ B .data ,
103
+ ntop , lower_bound ,
104
+ indptr
105
+ )
106
+
126
107
else :
127
108
128
- if mem_manager_is_C :
129
- raise Exception (
130
- 'When mem_manager_is_C=True, set ntop >= B.shape[1], or set ntop_is_flexible=True'
131
- )
132
- else :
133
- raise Exception (
134
- 'Not enough memory! Data array is too large. Try reducing the value of ntop.'
135
- 'or set ntop_is_flexible=True'
136
- )
109
+ indices , data , best_topn = ct_thread .sparse_dot_free_threaded (
110
+ M , N , np .asarray (A .indptr , dtype = idx_dtype ),
111
+ np .asarray (A .indices , dtype = idx_dtype ),
112
+ A .data ,
113
+ np .asarray (B .indptr , dtype = idx_dtype ),
114
+ np .asarray (B .indices , dtype = idx_dtype ),
115
+ B .data ,
116
+ ntop , lower_bound ,
117
+ indptr , n_jobs
118
+ )
119
+
137
120
else :
138
121
# no exception was raised; then use old function (as it is expected to be the fastest)
139
122
@@ -152,6 +135,7 @@ def awesome_cossim_topn(
152
135
lower_bound ,
153
136
indptr , indices , data , best_topn_arr
154
137
)
138
+
155
139
else :
156
140
if n_jobs < 1 :
157
141
err_str = 'Whenever you select the multi-thread mode, n_job must be greater than or equal to 1!'
@@ -168,6 +152,7 @@ def awesome_cossim_topn(
168
152
lower_bound ,
169
153
indptr , indices , data , best_topn_arr , n_jobs
170
154
)
155
+
171
156
best_topn = best_topn_arr [0 ]
172
157
173
158
# prepare and return the output:
0 commit comments