Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions example/minecraft.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
metrics: [SNR_3x2]
bands: riz
training_file: data_buzzard/training.hdf5
validation_file: data_buzzard/validation.hdf5
output_file: example/minecraft_output.txt

run:
MineCraft:
run_0:
blocks: 62
zbins: 11

159 changes: 159 additions & 0 deletions tomo_challenge/classifiers/minecraft.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
"""
Trivial classifiers. Used as example and testing.

Every classifier module needs to:
- have construction of the type
__init__ (self, bands, options) (see examples below)
- implement two functions:
train (self, training_data,training_z)
apply (self, data).
- define valid_options class varible.

See Classifier Documentation below.


"""
import numpy as np
import matplotlib.pyplot as plt
from .base import Tomographer

class MineCraft(Tomographer):
"""Completely random classifier.

Every object goes into a random bin.
"""

## see constructor below
valid_options = ['blocks','zbins']

def __init__ (self, bands, options):
"""Constructor

Parameters:
-----------
bands: str
string containg valid bands, like 'riz' or 'griz'
options: dict
options come through here. Valid keys are listed as valid_options
class variable.

Note:
-----
Valiad options are:
'bins' - number of blocks per in direction

"""
self.opt = {'blocks':40, 'zbins':10}
self.bands = bands
self.Nb = len(bands)
self.opt.update(options)


def train (self,training_data, training_z):
"""Trains the classifier

Parameters:
-----------
training_data: numpy array, size Ngalaxes x Nbands
training data, each row is a galaxy, each column is a band as per
band defined above
training_z: numpy array, size Ngalaxies
true redshift for the training sample

"""
#data = np.vstack([training_data[band] for band in self.bands]).T
bins=[]
self.bmin,self.bmax = [],[]
for band in self.bands:
da=training_data[band]
mn,mx = da.min(), da.max()*1.00001
self.bmin.append(mn)
self.bmax.append(mx)
step = (mx-mn)/self.opt['blocks']
bins.append(((da-mn)/step).astype(int))
bins = np.array(bins).T
print (bins[:4])
zmin,zmax = training_z.min(), training_z.max()
dz= (zmax-zmin)/self.opt['zbins']
self.target = {}
for i in range(self.opt['zbins']):
czmin = zmin+i*dz
czmax = czmin+dz
print (czmin,czmax)
w = np.where((training_z>czmin) & (training_z<=czmax))
print (bins.shape)
xbins = bins[w[0],:]
xbins = set([tuple(b) for b in xbins])
## now that we have unique elements, let's add
for bin in xbins:
if bin in self.target:
self.target[bin].append(i)
else:
self.target[bin] = [i]
## now reorganize
for key in self.target.keys():
self.target[key] = tuple(self.target[key])#[# key])
# if len(l)<6:
# self.target[key] = tuple(self.target[key])
# else:
# self.target[key] = ()
self.idmap={}
for binid,vals in enumerate(set(self.target.values())):
self.idmap[vals]=binid
print ('id = ',binid,':',vals)
self.idmap[()] = len(self.target.keys()) #last +1
print (self.idmap)

if False:
bincount=self.apply(training_data)
for i in range(bincount.max()):
a,b=np.histogram (training_z[bincount==i],bins=30)
b=0.5*(b[:1]+b[:-1])
plt.plot(b,a)
plt.xlabel('z')
plt.ylabel('prob')
plt.show()

#stop()


def apply (self,data):
"""Applies training to the data.

Parameters:
-----------
Data: numpy array, size Ngalaxes x Nbands
testing data, each row is a galaxy, each column is a band as per
band defined above

Returns:
tomographic_selections: numpy array, int, size Ngalaxies
tomographic selection for galaxies return as bin number for
each galaxy.
"""
bins=[]
for band,mn,mx in zip(self.bands,self.bmin,self.bmax):
da=data[band]
step = (mx-mn)/self.opt['blocks']
bins.append(((da-mn)/step).astype(int))
bins = [tuple(b) for b in np.array(bins).T]
print (bins[:4])
tomo_bin = np.array([self.idmap[dict.get(self.target,b,())] for b in bins])
No = len(tomo_bin)
binc=np.bincount(tomo_bin)
print ('bincount before=',binc/No)
minobj = int(No*0.03)
## now combine all the samples that are less than 3%:
w=np.where(binc<minobj)[0]
for j in w[1:]:
tomo_bin [tomo_bin==j] = w[0]
## now reindex, again, to get rid of intermediate zeros:
for i,j in enumerate(sorted(set(tomo_bin))):
if (i!=j):
tomo_bin[tomo_bin==j]=i

binc=np.bincount(tomo_bin)
## now get rid of zeros:

print ('bincount after=',binc/No)
return tomo_bin