LSSTDESC · jecampagne · Jun 22, 2020 · Jun 23, 2020 · Jun 23, 2020 · Jul 19, 2020
diff --git a/bin/challenge.py b/bin/challenge.py
@@ -10,6 +10,8 @@
 sys.path.append(root_dir)
 import tomo_challenge as tc
 
+from tomo_challenge.jax_metrics import compute_scores as jc_compute_scores
+
 @click.command()
 @click.argument('config_yaml', type=str)
 def main(config_yaml):
@@ -62,7 +64,7 @@ def main(config_yaml):
                                  config['metrics'])
 
                 output_file.write (f"{classifier_name} {run} {settings} {scores} \n")
-
+                print("scores= ",scores)
 
 
 def run_one(classifier_name, bands, settings, train_data, train_z, valid_data,
@@ -75,12 +77,22 @@ def run_one(classifier_name, bands, settings, train_data, train_z, valid_data,
         train_data = tc.dict_to_array(train_data, bands, errors=errors, colors=colors)
         valid_data = tc.dict_to_array(valid_data, bands, errors=errors, colors=colors)
 
+
+        #JEC 23/7/2020 restrict  data
+        train_data=train_data[:1000000,:]
+        valid_data=valid_data[:50000,:]
+
+
+    #JEC 23/7/2020 restrict  data
+    train_z = train_z[:1000000]
+    valid_z = valid_z[:50000]
+
     print ("Executing: ", classifier_name, bands, settings)
 
     ## first check if options are valid
     for key in settings.keys():
         if key not in classifier.valid_options and key not in ['errors', 'colors']:
-            raise ValueError(f"Key {key} is not recognized by classifier {name}")
+            raise ValueError(f"Key {key} is not recognized by classifier {classifier_name}")
 
     print ("Initializing classifier...")
     C=classifier(bands, settings)
@@ -92,7 +104,9 @@ def run_one(classifier_name, bands, settings, train_data, train_z, valid_data,
     results = C.apply(valid_data)
 
     print ("Getting metric...")
-    scores = tc.compute_scores(results, valid_z, metrics=metrics)
+    #    scores = tc.compute_scores(results, valid_z, metrics=metrics)
+    #Use JAX code 23/7/2020
+    scores = jc_compute_scores(results, valid_z, metrics="SNR_3x2,FOM_3x2,FOM_DETF_3x2")
 
     return scores
 

diff --git a/data/jec__MultiClf-GB+RFNoDepthLimit-StdScaler-10bins-50estim-griz-JAX-Opt9_BEST_DETF.joblib b/data/jec__MultiClf-GB+RFNoDepthLimit-StdScaler-10bins-50estim-griz-JAX-Opt9_BEST_DETF.joblib
diff --git a/example/my_GB.yaml → example/jec_GB.yaml b/example/my_GB.yaml → example/jec_GB.yaml
@@ -1,14 +1,15 @@
 metrics:  [SNR_ww, SNR_3x2, FOM_3x2]
-bands: riz
+bands: griz
 training_file: data/training.hdf5
 validation_file: data/validation.hdf5
-output_file: example/my_GB_output.txt
+output_file: example/my_GB_output-6bins-50estim-griz-testDump.txt
 
 run:
   # This is a class name which will be looked up
   myGradientBosster:
     run_3:
-      # my new arg for the classifier
+      # to save file
+      savefile: /Users/campagne/Travail/Software/tomo_challenge-orig/data/jec_GB-6bins-50estim-griz-testDump.joblib
       n_estimators: 50
       # This setting is sent to the classifier
       bins: 6

diff --git a/example/jec_MultiClf_10bins_50x2est_JAX_opt9_BEST_DETF.txt b/example/jec_MultiClf_10bins_50x2est_JAX_opt9_BEST_DETF.txt
@@ -0,0 +1 @@
+myCombinedClassifiers run_3 {'savefile': '/sps/lsst/users/campagne/tomo_challenge/data/jec__MultiClf-GB+RFNoDepthLimit-StdScaler-10bins-50estim-griz-JAX-Opt9_BEST_DETF.joblib', 'n_estimators': 50, 'bins': 10, 'colors': True, 'errors': False} {'SNR_3x2': 1770.5667724609375, 'FOM_3x2': 11953.8701171875, 'FOM__DETF_3x2': 176.66757202148438} 
diff --git a/example/jec_multiclf.yaml b/example/jec_multiclf.yaml
@@ -0,0 +1,19 @@
+metrics:  [SNR_3x2]
+bands: griz
+training_file: data/training.hdf5
+validation_file: data/validation.hdf5
+output_file: example/jec_MultiClf_10bins_50x2est_JAX_opt9_BEST_DETF.txt
+
+run:
+  # This is a class name which will be looked up
+  myCombinedClassifiers:
+    run_3:
+      savefile: /sps/lsst/users/campagne/tomo_challenge/data/jec__MultiClf-GB+RFNoDepthLimit-StdScaler-10bins-50estim-griz-JAX-Opt9_BEST_DETF.joblib
+      n_estimators: 50
+      # This setting is sent to the classifier
+      bins: 10
+      # These special settings decide whether the
+      # color and error colums are passed to the classifier
+      # as well as the magnitudes
+      colors: True
+      errors: False
diff --git a/requirements.txt b/requirements.txt
@@ -1,12 +1,12 @@
 scikit-learn
 sacc
-git+https://github.com/LSSTDESC/firecrown/
+#git+https://github.com/LSSTDESC/firecrown/
 sacc
 h5py
 matplotlib
 git+https://github.com/cmbant/camb
-cosmosis-standalone
-pyccl
+#cosmosis-standalone
+#pyccl
 camb
 click
 progressbar
diff --git a/tomo_challenge/classifiers/jec_CombineClassifier.py b/tomo_challenge/classifiers/jec_CombineClassifier.py
@@ -0,0 +1,200 @@
+"""
+This is an example tomographic bin generator using a random forest.
+
+Every classifier module needs to:
+ - have construction of the type 
+       __init__ (self, bands, options) (see examples below)
+ -  implement two functions: 
+        train (self, training_data,training_z)
+        apply (self, data).
+ - define valid_options class varible.
+
+See Classifier Documentation below.
+"""
+
+from .base import Tomographer
+import numpy as np
+from sklearn.ensemble import GradientBoostingClassifier,RandomForestClassifier
+from sklearn.svm import LinearSVC
+from sklearn.preprocessing import StandardScaler
+from sklearn.pipeline import make_pipeline
+from sklearn.ensemble import StackingClassifier
+from sklearn.linear_model import LogisticRegression
+#JEC 15/7/20 use joblib to save the model
+from joblib import dump, load
+
+
+class myCombinedClassifiers(Tomographer):
+    """ Multi Classifiers """
+
+    # valid parameter -- see below
+    # JEC 15/7/20 savefile opt to dump the model
+    valid_options = ['bins', 'n_estimators', 'savefile']
+
+    # this settings means arrays will be sent to train and apply instead
+    # of dictionaries
+    wants_arrays = True
+
+    def __init__ (self, bands, options):
+        """Constructor
+
+        Parameters:
+        -----------
+        bands: str
+          string containg valid bands, like 'riz' or 'griz'
+        options: dict
+          options come through here. Valid keys are listed as valid_options
+          class variable. 
+
+        Note:
+        -----
+        Valiad options are:
+            'bins' - number of tomographic bins
+
+        """
+        self.bands = bands
+        self.opt = options
+
+    def train (self, training_data, training_z):
+        """Trains the classifier
+
+        Parameters:
+        -----------
+        training_data: numpy array, size Ngalaxes x Nbands
+          training data, each row is a galaxy, each column is a band as per
+          band defined above
+        training_z: numpy array, size Ngalaxies
+          true redshift for the training sample
+
+        """
+        #JEC 
+        n_estimators=self.opt['n_estimators']
+
+        n_bin = self.opt['bins']
+        print("Finding bins for training data")
+        # Now put the training data into redshift bins.
+        # Use zero so that the one object with minimum
+        # z in the whole survey will be in the lowest bin
+        training_bin = np.zeros(training_z.size)
+
+        # Find the edges that split the redshifts into n_z bins of
+        # equal number counts in each
+        p = np.linspace(0, 100, n_bin + 1)
+        z_edges = np.percentile(training_z, p)
+
+
+
+        #JEC 23/7/2020
+        #Resultat de la sepoaration a # de gal constant per bin
+##         z_edges = np.array([0.02450252, 0.29473031, 0.44607811, 0.58123241,\
+##                             0.70399809,0.8120476 , 0.91746771, 1.03793607,\
+##                             1.20601892, 1.49989052, 3.03609133])
+
+##         #Chge1 z_edges[1]
+
+##         z_edges = np.array([0.02450252, 0.15961642, 0.44607811, 0.58123241,\
+##                             0.70399809,0.8120476 , 0.91746771, 1.03793607,\
+##                             1.20601892, 1.49989052, 3.03609133])
+
+##         #Chge2 z_edges[2]
+
+##         z_edges = np.array([0.02450252, 0.15961642, 0.37035, 0.58123241,\
+##                             0.70399809,0.8120476 , 0.91746771, 1.03793607,\
+##                             1.20601892, 1.49989052, 3.03609133])
+
+
+##         #Chge2 z_edges[3]
+##         z_edges = np.array([0.02450252, 0.15961642, 0.37035, 0.475175,\
+##                             0.70399809,0.8120476 , 0.91746771, 1.03793607,\
+##                             1.20601892, 1.49989052, 3.03609133])
+
+
+
+##         #Chge2 z_edges[4]
+##         z_edges = np.array([0.02450252, 0.15961642, 0.37035, 0.475175,\
+##                             0.58955, 0.8120476 , 0.91746771, 1.03793607,\
+##                             1.20601892, 1.49989052, 3.03609133])
+
+
+
+##         #Chge2 z_edges[5]
+##         z_edges = np.array([0.02450252, 0.15961642, 0.37035, 0.475175,\
+##                             0.58955, 0.700775 , 0.91746771, 1.03793607,\
+##                             1.20601892, 1.49989052, 3.03609133])
+
+
+##         #Chge2 z_edges[6] apres des essais +/- pas d'amlioration 
+##         z_edges = np.array([0.02450252, 0.15961642, 0.37035, 0.475175,\
+##                             0.58955, 0.700775 , 0.91746771, 1.03793607,\
+##                             1.20601892, 1.49989052, 3.03609133])
+
+
+##         #Chge2 z_edges[7] apres des essais +/- pas d'amlioration 
+##         z_edges = np.array([0.02450252, 0.15961642, 0.37035, 0.475175,\
+##                             0.58955, 0.700775 , 0.91746771, 1.03793607,\
+##                             1.20601892, 1.49989052, 3.03609133])
+
+##         #Chge2 z_edges[8]
+##         z_edges = np.array([0.02450252, 0.15961642, 0.37035, 0.475175,\
+##                             0.58955, 0.700775 , 0.91746771, 1.03793607,\
+##                             1.352945, 1.49989052, 3.03609133])
+
+        #Chge2 z_edges[9]
+        z_edges = np.array([0.02450252, 0.15961642, 0.37035, 0.475175,\
+                            0.58955, 0.700775 , 0.91746771, 1.03793607,\
+                            1.352945, 1.8, 3.03609133])
+
+
+
+
+        print("new set: ",z_edges)
+
+
+
+        # Now find all the objects in each of these bins
+        for i in range(n_bin):
+            z_low = z_edges[i]
+            z_high = z_edges[i + 1]
+            training_bin[(training_z > z_low) & (training_z < z_high)] = i
+
+        # for speed, cut down to 5% of original size
+        cut = np.random.uniform(0, 1, training_z.size) < 0.05
+        training_bin = training_bin[cut]
+        training_data = training_data[cut]
+
+        # Can be replaced with any classifier
+
+
+        estimators = [ ('gd', make_pipeline(StandardScaler(), GradientBoostingClassifier(n_estimators=n_estimators, verbose=1))),
+                        ('rf', make_pipeline(StandardScaler(),
+                                             RandomForestClassifier(n_estimators=n_estimators,verbose=1)))  ]
+        classifier = StackingClassifier(estimators=estimators,
+                                        final_estimator=LogisticRegression(max_iter=5000))
+
+        print("Fitting classifier")
+        # Lots of data, so this will take some time
+        classifier.fit(training_data, training_bin)
+
+        self.classifier = classifier
+        self.z_edges = z_edges
+
+        #JEC 15/7/20 dump clf
+        dump(classifier, self.opt['savefile'])
+
+    def apply (self, data):
+        """Applies training to the data.
+
+        Parameters:
+        -----------
+        Data: numpy array, size Ngalaxes x Nbands
+          testing data, each row is a galaxy, each column is a band as per
+          band defined above
+
+        Returns: 
+        tomographic_selections: numpy array, int, size Ngalaxies
+          tomographic selection for galaxies return as bin number for 
+          each galaxy.
+        """
+        tomo_bin = self.classifier.predict(data)
+        return tomo_bin
+
diff --git a/tomo_challenge/classifiers/my_GB.py → tomo_challenge/classifiers/jec_GB.py b/tomo_challenge/classifiers/my_GB.py → tomo_challenge/classifiers/jec_GB.py
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		myCombinedClassifiers run_3 {'savefile': '/sps/lsst/users/campagne/tomo_challenge/data/jec__MultiClf-GB+RFNoDepthLimit-StdScaler-10bins-50estim-griz-JAX-Opt9_BEST_DETF.joblib', 'n_estimators': 50, 'bins': 10, 'colors': True, 'errors': False} {'SNR_3x2': 1770.5667724609375, 'FOM_3x2': 11953.8701171875, 'FOM__DETF_3x2': 176.66757202148438}