Source code for rail.estimation.algos.trainZ

"""
Implementation of the 'pathological photo-z PDF estimator,
as used in arXiv:2001.03621 (see section 3.3). It assigns each test set galaxy
a photo-z PDF equal to the normalized redshift distribution
N (z) of the training set.
"""

import numpy as np
from ceci.config import StageParameter as Param
from rail.estimation.estimator import CatEstimator, CatInformer
from rail.core.common_params import SHARED_PARAMS
import qp


[docs]class trainZmodel: """ Temporary class to store the single trainZ pdf for trained model. Given how simple this is to compute, this seems like overkill. """ def __init__(self, zgrid, pdf, zmode): self.zgrid = zgrid self.pdf = pdf self.zmode = zmode
[docs]class Inform_trainZ(CatInformer): """Train an Estimator which returns a global PDF for all galaxies """ name = 'Inform_trainZ' config_options = CatInformer.config_options.copy() config_options.update(zmin=SHARED_PARAMS, zmax=SHARED_PARAMS, nzbins=SHARED_PARAMS, redshift_col=SHARED_PARAMS) def __init__(self, args, comm=None): CatInformer.__init__(self, args, comm=comm)
[docs] def run(self): if self.config.hdf5_groupname: training_data = self.get_data('input')[self.config.hdf5_groupname] else: # pragma: no cover training_data = self.get_data('input') zbins = np.linspace(self.config.zmin, self.config.zmax, self.config.nzbins + 1) speczs = np.sort(training_data[self.config.redshift_col]) train_pdf, _ = np.histogram(speczs, zbins) midpoints = zbins[:-1] + np.diff(zbins) / 2 zmode = midpoints[np.argmax(train_pdf)] cdf = np.cumsum(train_pdf) cdf = cdf / cdf[-1] norm = cdf[-1] * (zbins[2] - zbins[1]) train_pdf = train_pdf / norm zgrid = midpoints self.model = trainZmodel(zgrid, train_pdf, zmode) self.add_data('model', self.model)
[docs]class TrainZ(CatEstimator): """CatEstimator which returns a global PDF for all galaxies """ name = 'TrainZ' config_options = CatEstimator.config_options.copy() config_options.update(zmin=SHARED_PARAMS, zmax=SHARED_PARAMS, nzbins=SHARED_PARAMS) def __init__(self, args, comm=None): self.zgrid = None self.train_pdf = None self.zmode = None CatEstimator.__init__(self, args, comm=comm)
[docs] def open_model(self, **kwargs): CatEstimator.open_model(self, **kwargs) if self.model is None: # pragma: no cover return self.zgrid = self.model.zgrid self.train_pdf = self.model.pdf self.zmode = self.model.zmode
def _process_chunk(self, start, end, data, first): test_size = len(data['mag_i_lsst']) zmode = np.repeat(self.zmode, test_size) qp_d = qp.Ensemble(qp.interp, data=dict(xvals=self.zgrid, yvals=np.tile(self.train_pdf, (test_size, 1)))) qp_d.set_ancil(dict(zmode=zmode)) self._do_chunk_output(qp_d, start, end, first)