Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 17 additions & 3 deletions bin/challenge.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
sys.path.append(root_dir)
import tomo_challenge as tc

from tomo_challenge.jax_metrics import compute_scores as jc_compute_scores

@click.command()
@click.argument('config_yaml', type=str)
def main(config_yaml):
Expand Down Expand Up @@ -62,7 +64,7 @@ def main(config_yaml):
config['metrics'])

output_file.write (f"{classifier_name} {run} {settings} {scores} \n")

print("scores= ",scores)


def run_one(classifier_name, bands, settings, train_data, train_z, valid_data,
Expand All @@ -75,12 +77,22 @@ def run_one(classifier_name, bands, settings, train_data, train_z, valid_data,
train_data = tc.dict_to_array(train_data, bands, errors=errors, colors=colors)
valid_data = tc.dict_to_array(valid_data, bands, errors=errors, colors=colors)


#JEC 23/7/2020 restrict data
train_data=train_data[:1000000,:]
valid_data=valid_data[:50000,:]


#JEC 23/7/2020 restrict data
train_z = train_z[:1000000]
valid_z = valid_z[:50000]

print ("Executing: ", classifier_name, bands, settings)

## first check if options are valid
for key in settings.keys():
if key not in classifier.valid_options and key not in ['errors', 'colors']:
raise ValueError(f"Key {key} is not recognized by classifier {name}")
raise ValueError(f"Key {key} is not recognized by classifier {classifier_name}")

print ("Initializing classifier...")
C=classifier(bands, settings)
Expand All @@ -92,7 +104,9 @@ def run_one(classifier_name, bands, settings, train_data, train_z, valid_data,
results = C.apply(valid_data)

print ("Getting metric...")
scores = tc.compute_scores(results, valid_z, metrics=metrics)
# scores = tc.compute_scores(results, valid_z, metrics=metrics)
#Use JAX code 23/7/2020
scores = jc_compute_scores(results, valid_z, metrics="SNR_3x2,FOM_3x2,FOM_DETF_3x2")

return scores

Expand Down
Binary file not shown.
7 changes: 4 additions & 3 deletions example/my_GB.yaml → example/jec_GB.yaml
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
metrics: [SNR_ww, SNR_3x2, FOM_3x2]
bands: riz
bands: griz
training_file: data/training.hdf5
validation_file: data/validation.hdf5
output_file: example/my_GB_output.txt
output_file: example/my_GB_output-6bins-50estim-griz-testDump.txt

run:
# This is a class name which will be looked up
myGradientBosster:
run_3:
# my new arg for the classifier
# to save file
savefile: /Users/campagne/Travail/Software/tomo_challenge-orig/data/jec_GB-6bins-50estim-griz-testDump.joblib
n_estimators: 50
# This setting is sent to the classifier
bins: 6
Expand Down
1 change: 1 addition & 0 deletions example/jec_MultiClf_10bins_50x2est_JAX_opt9_BEST_DETF.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
myCombinedClassifiers run_3 {'savefile': '/sps/lsst/users/campagne/tomo_challenge/data/jec__MultiClf-GB+RFNoDepthLimit-StdScaler-10bins-50estim-griz-JAX-Opt9_BEST_DETF.joblib', 'n_estimators': 50, 'bins': 10, 'colors': True, 'errors': False} {'SNR_3x2': 1770.5667724609375, 'FOM_3x2': 11953.8701171875, 'FOM__DETF_3x2': 176.66757202148438}
19 changes: 19 additions & 0 deletions example/jec_multiclf.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
metrics: [SNR_3x2]
bands: griz
training_file: data/training.hdf5
validation_file: data/validation.hdf5
output_file: example/jec_MultiClf_10bins_50x2est_JAX_opt9_BEST_DETF.txt

run:
# This is a class name which will be looked up
myCombinedClassifiers:
run_3:
savefile: /sps/lsst/users/campagne/tomo_challenge/data/jec__MultiClf-GB+RFNoDepthLimit-StdScaler-10bins-50estim-griz-JAX-Opt9_BEST_DETF.joblib
n_estimators: 50
# This setting is sent to the classifier
bins: 10
# These special settings decide whether the
# color and error colums are passed to the classifier
# as well as the magnitudes
colors: True
errors: False
6 changes: 3 additions & 3 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
scikit-learn
sacc
git+https://github.com/LSSTDESC/firecrown/
#git+https://github.com/LSSTDESC/firecrown/
sacc
h5py
matplotlib
git+https://github.com/cmbant/camb
cosmosis-standalone
pyccl
#cosmosis-standalone
#pyccl
camb
click
progressbar
200 changes: 200 additions & 0 deletions tomo_challenge/classifiers/jec_CombineClassifier.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,200 @@
"""
This is an example tomographic bin generator using a random forest.

Every classifier module needs to:
- have construction of the type
__init__ (self, bands, options) (see examples below)
- implement two functions:
train (self, training_data,training_z)
apply (self, data).
- define valid_options class varible.

See Classifier Documentation below.
"""

from .base import Tomographer
import numpy as np
from sklearn.ensemble import GradientBoostingClassifier,RandomForestClassifier
from sklearn.svm import LinearSVC
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.ensemble import StackingClassifier
from sklearn.linear_model import LogisticRegression
#JEC 15/7/20 use joblib to save the model
from joblib import dump, load


class myCombinedClassifiers(Tomographer):
""" Multi Classifiers """

# valid parameter -- see below
# JEC 15/7/20 savefile opt to dump the model
valid_options = ['bins', 'n_estimators', 'savefile']

# this settings means arrays will be sent to train and apply instead
# of dictionaries
wants_arrays = True

def __init__ (self, bands, options):
"""Constructor

Parameters:
-----------
bands: str
string containg valid bands, like 'riz' or 'griz'
options: dict
options come through here. Valid keys are listed as valid_options
class variable.

Note:
-----
Valiad options are:
'bins' - number of tomographic bins

"""
self.bands = bands
self.opt = options

def train (self, training_data, training_z):
"""Trains the classifier

Parameters:
-----------
training_data: numpy array, size Ngalaxes x Nbands
training data, each row is a galaxy, each column is a band as per
band defined above
training_z: numpy array, size Ngalaxies
true redshift for the training sample

"""
#JEC
n_estimators=self.opt['n_estimators']

n_bin = self.opt['bins']
print("Finding bins for training data")
# Now put the training data into redshift bins.
# Use zero so that the one object with minimum
# z in the whole survey will be in the lowest bin
training_bin = np.zeros(training_z.size)

# Find the edges that split the redshifts into n_z bins of
# equal number counts in each
p = np.linspace(0, 100, n_bin + 1)
z_edges = np.percentile(training_z, p)



#JEC 23/7/2020
#Resultat de la sepoaration a # de gal constant per bin
## z_edges = np.array([0.02450252, 0.29473031, 0.44607811, 0.58123241,\
## 0.70399809,0.8120476 , 0.91746771, 1.03793607,\
## 1.20601892, 1.49989052, 3.03609133])

## #Chge1 z_edges[1]

## z_edges = np.array([0.02450252, 0.15961642, 0.44607811, 0.58123241,\
## 0.70399809,0.8120476 , 0.91746771, 1.03793607,\
## 1.20601892, 1.49989052, 3.03609133])

## #Chge2 z_edges[2]

## z_edges = np.array([0.02450252, 0.15961642, 0.37035, 0.58123241,\
## 0.70399809,0.8120476 , 0.91746771, 1.03793607,\
## 1.20601892, 1.49989052, 3.03609133])


## #Chge2 z_edges[3]
## z_edges = np.array([0.02450252, 0.15961642, 0.37035, 0.475175,\
## 0.70399809,0.8120476 , 0.91746771, 1.03793607,\
## 1.20601892, 1.49989052, 3.03609133])



## #Chge2 z_edges[4]
## z_edges = np.array([0.02450252, 0.15961642, 0.37035, 0.475175,\
## 0.58955, 0.8120476 , 0.91746771, 1.03793607,\
## 1.20601892, 1.49989052, 3.03609133])



## #Chge2 z_edges[5]
## z_edges = np.array([0.02450252, 0.15961642, 0.37035, 0.475175,\
## 0.58955, 0.700775 , 0.91746771, 1.03793607,\
## 1.20601892, 1.49989052, 3.03609133])


## #Chge2 z_edges[6] apres des essais +/- pas d'amlioration
## z_edges = np.array([0.02450252, 0.15961642, 0.37035, 0.475175,\
## 0.58955, 0.700775 , 0.91746771, 1.03793607,\
## 1.20601892, 1.49989052, 3.03609133])


## #Chge2 z_edges[7] apres des essais +/- pas d'amlioration
## z_edges = np.array([0.02450252, 0.15961642, 0.37035, 0.475175,\
## 0.58955, 0.700775 , 0.91746771, 1.03793607,\
## 1.20601892, 1.49989052, 3.03609133])

## #Chge2 z_edges[8]
## z_edges = np.array([0.02450252, 0.15961642, 0.37035, 0.475175,\
## 0.58955, 0.700775 , 0.91746771, 1.03793607,\
## 1.352945, 1.49989052, 3.03609133])

#Chge2 z_edges[9]
z_edges = np.array([0.02450252, 0.15961642, 0.37035, 0.475175,\
0.58955, 0.700775 , 0.91746771, 1.03793607,\
1.352945, 1.8, 3.03609133])




print("new set: ",z_edges)



# Now find all the objects in each of these bins
for i in range(n_bin):
z_low = z_edges[i]
z_high = z_edges[i + 1]
training_bin[(training_z > z_low) & (training_z < z_high)] = i

# for speed, cut down to 5% of original size
cut = np.random.uniform(0, 1, training_z.size) < 0.05
training_bin = training_bin[cut]
training_data = training_data[cut]

# Can be replaced with any classifier


estimators = [ ('gd', make_pipeline(StandardScaler(), GradientBoostingClassifier(n_estimators=n_estimators, verbose=1))),
('rf', make_pipeline(StandardScaler(),
RandomForestClassifier(n_estimators=n_estimators,verbose=1))) ]
classifier = StackingClassifier(estimators=estimators,
final_estimator=LogisticRegression(max_iter=5000))

print("Fitting classifier")
# Lots of data, so this will take some time
classifier.fit(training_data, training_bin)

self.classifier = classifier
self.z_edges = z_edges

#JEC 15/7/20 dump clf
dump(classifier, self.opt['savefile'])

def apply (self, data):
"""Applies training to the data.

Parameters:
-----------
Data: numpy array, size Ngalaxes x Nbands
testing data, each row is a galaxy, each column is a band as per
band defined above

Returns:
tomographic_selections: numpy array, int, size Ngalaxies
tomographic selection for galaxies return as bin number for
each galaxy.
"""
tomo_bin = self.classifier.predict(data)
return tomo_bin

Loading