-
Notifications
You must be signed in to change notification settings - Fork 4
Expand file tree
/
Copy pathutils.py
More file actions
225 lines (170 loc) · 6.71 KB
/
utils.py
File metadata and controls
225 lines (170 loc) · 6.71 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
import streamlit as st
import numpy as np
import base64
import sys
import os
from urllib import request
def radec_string_to_degrees(ra_str, dec_str, ra_unit_formats, dec_unit_formats):
"""convert from weird astronomer units to useful ones (degrees)"""
ra_err_str = "The RA entered is not in the proper form: {:s}".format(ra_unit_formats)
dec_err_str = "The Dec entered is not in the proper form: {:s}".format(dec_unit_formats)
if ':' in ra_str:
try:
HH, MM, SS = [float(i) for i in ra_str.split(':')]
except ValueError:
st.write(ra_err_str)
sys.exit(ra_err_str)
ra_str = 360./24 * (HH + MM/60 + SS/3600)
if ':' in dec_str:
try:
DD, MM, SS = [float(i) for i in dec_str.split(':')]
except ValueError:
st.write(dec_err_str)
sys.exit(dec_err_str)
dec_str = DD/abs(DD) * (abs(DD) + MM/60 + SS/3600)
try:
ra = float(ra_str)
except ValueError:
st.write(ra_err_str)
sys.exit(ra_err_str)
try:
dec = float(dec_str)
except ValueError:
st.write(dec_err_str)
sys.exit(dec_err_str)
return ra, dec
def angular_separation(ra1, dec1, ra2, dec2):
"""
Angular separation between two points on a sphere.
Parameters
----------
ra1, dec1, ra2, dec2, : ra and dec in degrees
Returns
-------
angular separation in degrees
Notes
-----
see https://en.wikipedia.org/wiki/Great-circle_distance
Adapted from Astropy https://github.com/astropy/astropy/blob/main/astropy/coordinates/angle_utilities.py. I am avoiding Astropy as it can be very slow
"""
ra1 = np.radians(ra1)
ra2 = np.radians(ra2)
dec1 = np.radians(dec1)
dec2 = np.radians(dec2)
dsin_ra = np.sin(ra2 - ra1)
dcos_ra = np.cos(ra2 - ra1)
sin_dec1 = np.sin(dec1)
sin_dec2 = np.sin(dec2)
cos_dec1 = np.cos(dec1)
cos_dec2 = np.cos(dec2)
num1 = cos_dec2 * dsin_ra
num2 = cos_dec1 * sin_dec2 - sin_dec1 * cos_dec2 * dcos_ra
denominator = sin_dec1 * sin_dec2 + cos_dec1 * cos_dec2 * dcos_ra
return np.degrees(np.arctan2(np.hypot(num1, num2), denominator))
def calculate_similarity_faiss(rep, query_ind, metric='IP', nnearest=10):
#import faiss
"""
Uses Facebook's Faiss library. Has the option to reduce dimensionality
but right now it is only using simply matrix operations (dot product)
Parameters
----------
rep: representations
Metric:
IP: Inner Product
L2: L2norm
Returns
-------
indices of similar, similarity measure
"""
if not isinstance(metric, str):
sys.exit('Metric {0} must be a string'.format(metric))
dim = rep.shape[-1] # assuming 2D array (N_rep, N_dim)
if metric=='IP':
index = faiss.IndexFlatIP(dim) # distance
elif metric=='L2':
index = faiss.IndexFlatL2(dim) # distance
else:
sys.exit('Metric {0} does not exist'.format(metric))
# add all representations to index
index.add(rep)
# search for nearest instances, and return distance and indices
dist, similar_inds = index.search(rep[query_ind][None, ...], nnearest)
return similar_inds[0], dist[0]
def calculate_similarity(rep, query_ind, nnearest=10, similarity_inv=False):
"""
Calculate cosine similarity of query feature vector with all image representations
Vectors are already normalized, so cosine similarity becomes dot product
Parameters
----------
rep: array
Image representations
Returns
-------
indices of similar, similarity measure
"""
# Search for nearest instances, and return distance and indices
dist = rep @ rep[query_ind]
if similarity_inv:
# Smallest values first
similar_inds = np.argsort(dist)
else:
# Largest values first
similar_inds = np.argsort(dist)[::-1]
dist = dist[similar_inds][:nnearest]
similar_inds = similar_inds[:nnearest]
return similar_inds, dist
def retrieve_similarity(query_ind, model_version='v1'):
"""
Retreives precalculated similarity indices and distance values.
Indices and values are saved in binary files of size (sim_chunksize, nnearest),
of dtype=np.int32 and np.float32, respectively
"""
sim_chunksize = 10000
nnearest = 1000
bytes_per_dtype = 4
if model_version=='v1':
model_string = '8hour_south'
if model_version=='v2':
model_string = '8hour_south_torgb'
url_head = 'https://portal.nersc.gov/project/cusp/ssl_galaxy_surveys/galaxy_search/data/similarity_arrays/{:s}/small_chunks/'.format(model_string)
ichunk = query_ind // sim_chunksize
istart = ichunk*sim_chunksize
iend = (ichunk+1)*sim_chunksize
ngal_tot = 42272646
iend = min(iend, ngal_tot)
url_dist = os.path.join(url_head, 'dist_knearest1000_{:09d}_{:09d}.bin'.format(istart, iend))
url_inds = os.path.join(url_head, 'inds_knearest1000_{:09d}_{:09d}.bin'.format(istart, iend))
query_line = query_ind % sim_chunksize
skip_bytes = query_line*nnearest*bytes_per_dtype
with request.urlopen(request.Request(url_dist, headers={'Range': 'bytes={:d}-'.format(skip_bytes)})) as f:
dist = np.frombuffer(f.read(nnearest*bytes_per_dtype), dtype=np.float32)
with request.urlopen(request.Request(url_inds, headers={'Range': 'bytes={:d}-'.format(skip_bytes)})) as f:
similar_inds = np.frombuffer(f.read(nnearest*bytes_per_dtype), dtype=np.int32)
return similar_inds, dist
def urls_from_coordinates(catalogue, pixscale=0.262, npix=256):
"""
gets url for image cutout from https://www.legacysurvey.org/
e.g. https://www.legacysurvey.org/viewer/jpeg-cutout?ra=190.1086&dec=1.2005&width=96&layer=dr-9-south&pixscale=0.262&bands=grz
"""
urls = []
url_head = 'https://www.legacysurvey.org/viewer/jpeg-cutout?'
for i in range(catalogue['ra'].shape[0]):
urls += [url_head + 'ra={:.4f}&dec={:.4f}&size={:d}&layer=ls-dr9-south&pixscale={:.3f}&bands=grz'.format(catalogue['ra'][i], catalogue['dec'][i], npix, pixscale)]
return urls
def get_table_download_link(df):
"""
Generates a link allowing the data in a given panda dataframe to be downloaded
Parameters
----------
df: Pandas dataframe
Returns
-------
href string
Notes
-----
From https://discuss.streamlit.io/t/how-to-download-file-in-streamlit/1806/2
"""
csv = df.to_csv(index=True)
b64 = base64.b64encode(csv.encode()).decode() # some strings <-> bytes conversions necessary here
href = f'<a href="data:file/csv;base64,{b64}" download="most_similar_galaxies.csv">Download table as csv file</a>'
return href