"""
Abstract base classes defining a Creator, which will create synthetic photometric data
and a PosteriorCalculator, which can calculate posteriors for the data with respect
to the distribution defined by the creator.
"""
import pandas as pd
import qp
from rail.core.data import DataHandle, ModelHandle, QPHandle, TableHandle
from rail.core.stage import RailStage
[docs]class Modeler(RailStage):
"""
Base class for creating a model of redshift and photometry.
"""
name = "Modeler"
config_options = RailStage.config_options.copy()
config_options.update(seed=12345)
inputs = [("input", DataHandle)]
outputs = [("model", ModelHandle)]
def __init__(self, args, comm=None):
"""Initialize Modeler"""
RailStage.__init__(self, args, comm=comm)
self.model = None
[docs] def fit_model(self):
"""
Produce a creation model from which photometry and redshifts can be generated
Parameters
----------
[The parameters depend entirely on the modeling approach!]
Returns
-------
[This will definitely be a file, but the filetype and format depend entirely on the modeling approach!]
"""
self.run()
self.finalize()
return self.get_handle("model")
[docs]class Creator(RailStage):
"""Base class for Creators that generate synthetic photometric data from a model.
`Creator` will output a table of photometric data. The details
will depend on the particular engine.
"""
name = "Creator"
config_options = RailStage.config_options.copy()
config_options.update(n_samples=int, seed=12345)
inputs = [("model", ModelHandle)]
outputs = [("output", TableHandle)]
def __init__(self, args, comm=None):
"""Initialize Creator"""
RailStage.__init__(self, args, comm=comm)
self.model = None
if not isinstance(args, dict): # pragma: no cover
args = vars(args)
self.open_model(**args)
[docs] def open_model(self, **kwargs):
"""Load the mode and/or attach it to this Creator
Keywords
--------
model : `object`, `str` or `ModelHandle`
Either an object with a trained model,
a path pointing to a file that can be read to obtain the trained model,
or a `ModelHandle` providing access to the trained model.
Returns
-------
self.model : `object`
The object encapsulating the trained model.
"""
model = kwargs.get("model", None)
if model is None or model == "None": # pragma: no cover
self.model = None
return self.model
if isinstance(model, str): # pragma: no cover
self.model = self.set_data("model", data=None, path=model)
self.config["model"] = model
return self.model
if isinstance(model, ModelHandle): # pragma: no cover
if model.has_path:
self.config["model"] = model.path
self.model = self.set_data("model", model)
return self.model
[docs] def sample(self, n_samples: int, seed: int = None, **kwargs):
"""Draw samples from the model specified in the configuration.
This is a method for running a Creator in interactive mode.
In pipeline mode, the subclass `run` method will be called by itself.
Parameters
----------
n_samples: int
The number of samples to draw
seed: int
The random seed to control sampling
Returns
-------
pd.DataFrame
A Pandas DataFrame of the samples
Notes
-----
This method puts `n_samples` and `seed` into the stage configuration
data, which makes them available to other methods.
It then calls the `run` method, which must be defined by a subclass.
Finally, the `DataHandle` associated to the `output` tag is returned.
"""
self.config["n_samples"] = n_samples
self.config["seed"] = seed
self.config.update(**kwargs)
self.run()
self.finalize()
return self.get_handle("output")
[docs]class PosteriorCalculator(RailStage):
"""Base class for object that calculates the posterior distribution of a
particular field in a table of photometric data (typically the redshift).
The posteriors will be contained in a qp Ensemble.
"""
name = "PosteriorCalculator"
config_options = RailStage.config_options.copy()
config_options.update(column=str)
inputs = [
("model", ModelHandle),
("input", TableHandle),
]
outputs = [("output", QPHandle)]
def __init__(self, args, comm=None):
"""Initialize PosteriorCalculator"""
RailStage.__init__(self, args, comm=comm)
self.model = None
if not isinstance(args, dict): # pragma: no cover
args = vars(args)
self.open_model(**args)
[docs] def open_model(self, **kwargs):
"""Load the mode and/or attach it to this PosteriorCalculator
Keywords
--------
model : `object`, `str` or `ModelHandle`
Either an object with a trained model,
a path pointing to a file that can be read to obtain the trained model,
or a `ModelHandle` providing access to the trained model.
Returns
-------
self.model : `object`
The object encapsulating the trained model.
"""
model = kwargs.get("model", None)
if model is None or model == "None": # pragma: no cover
self.model = None
return self.model
if isinstance(model, str): # pragma: no cover
self.model = self.set_data("model", data=None, path=model)
self.config["model"] = model
return self.model
if isinstance(model, ModelHandle): # pragma: no cover
if model.has_path:
self.config["model"] = model.path
self.model = self.set_data("model", model)
return self.model
[docs] def get_posterior(self, input_data: pd.DataFrame, **kwargs) -> qp.Ensemble:
"""Return posteriors for the given column.
This is a method for running a Creator in interactive mode.
In pipeline mode, the subclass `run` method will be called by itself.
Parameters
----------
data: pd.DataFrame
A Pandas DataFrame of the galaxies for which posteriors are calculated
Notes
-----
This will put the `data` argument input this Stages the DataStore using this stages `input` tag.
This will put the additional functional arguments into this Stages configuration data.
It will then call `self.run()` and return the `DataHandle` associated to the `output` tag
"""
self.set_data("input", input_data)
self.config.update(**kwargs)
self.run()
self.finalize()
return self.get_handle("output")