from typing import TypeVar, Type, Union, Dict
import pathlib
import yaml
import numpy as np
from .utils import check_2d_input, rotate_data, get_angle
T2 = TypeVar("T2", bound="TwoStateClassifier")
T3 = TypeVar("T3", bound="ThreeStateClassifier")
[docs]
class TwoStateClassifier:
"""Template for creating two-state classifiers.
The elements to be rewritten for each specific classifier are:
* ``_pdf_func_...``, which specify the PDFs
* ``_param_names``, which specify the parameter names of the PDFs
* ``statistics``, which computes the relevant statistics
* ``fit``, which performs the fit
NB: if the classifier does not use max-likelihood classification,
then ``predict`` needs to the overwritten.
"""
_pdf_func_0 = None
_pdf_func_1 = None
# parameter name ordering must match the ordering in the pdf functions
_param_names = {
0: [],
1: [],
}
_num_states = 2
[docs]
def __init__(self, params: Dict[int, Dict[str, Union[float, np.ndarray]]]):
"""Loads params to this ``TwoStateClassifier``.
Parameters
----------
params : dict
The structure of the dictionary must be
.. code-block:: python
{
0: {"param1": float, ...},
1: {"param1": float, ...}
}
"""
self._check_params(params)
# param values are stored in a vector to run `curve_fit` easily
# because it uses `args` and not `kargs` to fit the functions
self._param_values = {
state: [params[state][n] for n in self._param_names[state]]
for state in range(2)
}
return
def to_yaml(self, filename: Union[str, pathlib.Path]):
"""Stores parameters in a YAML file.
NB: the file can include extra data (e.g. ``self.statistics``)
"""
data = {"params": self.params, "extra": self.statistics}
# convert data to lists or floats to avoid having numpy objects
# inside the YAML file, which do not render correctly
def ndarray_representer(dumper: yaml.Dumper, array: np.ndarray) -> yaml.Node:
if array.shape == (): # corresponds to a scalar
if "int" in array.dtype.__str__():
return dumper.represent_int(int(array))
else:
return dumper.represent_float(float(array))
return dumper.represent_list(array.tolist())
yaml.add_representer(np.ndarray, ndarray_representer)
# the values in "self.params" are np.core.multiarray.scalars,
# not "np.ndarray".
np_types = [np.int64, np.int32, np.float64, np.float32]
for np_type in np_types:
def nptype_representer(dumper: yaml.Dumper, scalar: np_type) -> yaml.Node:
if "int" in np_type.__name__:
return dumper.represent_int(int(scalar))
else:
return dumper.represent_float(float(scalar))
yaml.add_representer(np_type, nptype_representer)
with open(filename, "w") as file:
yaml.dump(data, file, default_flow_style=False)
return
@classmethod
def from_yaml(cls: Type[T2], filename: Union[str, pathlib.Path]) -> T2:
"""
Load the `TwoStateClassifier` from YAML file.
NB: this function does not load any extra data stored in the YAML file
apart from the ``params`` item.
"""
with open(filename, "r") as file:
data = yaml.safe_load(file)
# transform all parameters to np.arrays
params = {s: {n: np.array(v) for n, v in p.items()} for s, p in data["params"].items()}
return cls(params)
@property
def params(self) -> Dict[int, Dict[str, Union[float, np.ndarray]]]:
"""Returns the parameters required to set up the classifier.
The structure of the output dictionary is:
.. code-block:: python
{
0: {"param1": float, ...},
1: {"param1": float, ...},
}
"""
params = {}
for state in range(2):
params[state] = {
k: v
for k, v in zip(self._param_names[state], self._param_values[state])
}
return params
@property
def statistics(self) -> Dict[str, np.ndarray]:
"""Returns dictionary with general statistical data:
* ``mu_0``: ``np.array([float, float])``
* ``mu_1``: ``np.array([float, float])``
* ``cov_0``: ``np.array([[float, float], [float, float]])``
* ``cov_1``: ``np.array([[float, float], [float, float]])``
It can also include other information such as rot_angle, rot_shift, ...
NB: this property is used for plotting and for storing useful
information in the YAML file
"""
return {}
@classmethod
def fit(cls: Type[T2], shots_0: np.ndarray, shots_1: np.ndarray, **kargs) -> T2:
"""
Runs fit to the given data.
Parameters
----------
shots_0 : np.array(N, 2)
IQ data when preparing state 0.
shots_1 : np.array(M, 2)
IQ data when preparing state 1.
Returns
-------
Loaded `TwoStateClassifier`.
"""
check_2d_input(shots_0, axis=1)
check_2d_input(shots_1, axis=1)
# perform fit ...
params = {}
return cls(params)
def predict(self, z: np.ndarray, p_0: float = 1 / 2) -> np.ndarray:
"""
Classifies the given data to 0 or 1 using maximum-likelihood
classification, which is defined by
* 0 if :math:`p(0|z) > p(1|z)`
* 1 otherwise
Parameters
----------
z : np.array(..., 2)
Points to classify.
p_0
Probability to measure outcome 0.
By default 1/2, which in this case :math:`p(0|z) > p(1|z)` is
equivalent to :math:`p(z|0) > p(z|0)`.
Returns
-------
prediction : np.array(...)
Classification of the given data. It only contains 0s and 1s.
"""
if (p_0 > 1) or (p_0 < 0):
raise ValueError(
"The speficied 'p_0' must be a physical probability, "
f"but p_0={p_0} (and p1={1-p_0}) were given"
)
# does not compute p(z) for p(i|z) = p(z|i) * p(i) / p(z)
# because it is the same for all states and we are
# only selecting the one with highest probability
probs = [self.pdf_0(z) * p_0, self.pdf_1(z) * (1 - p_0)]
return np.argmax(probs, axis=0)
def pdf_0(self, z: np.ndarray) -> np.ndarray:
"""
Returns :math:`p(z|0)`.
Parameters
----------
z : np.array(..., 2)
IQ points.
Returns
-------
prob : np.array(...)
Probability of the input IQ points given that the state is 0.
"""
check_2d_input(z)
# the pdf functions are class variables (as opposed to instance variables)
# thus they are available in the class of `self`, not the instance of `self`
return self.__class__._pdf_func_0(z, *self._param_values[0])
def pdf_1(self, z: np.ndarray) -> np.ndarray:
"""
Returns :math:`p(z|1)`.
Parameters
----------
z : np.array(..., 2)
IQ points.
Returns
-------
prob : np.array(...)
Probability of the input IQ points given that the state is 1.
"""
check_2d_input(z)
# the pdf functions are class variables (as opposed to instance variables)
# thus they are available in the class of `self`, not the instance of `self`
return self.__class__._pdf_func_1(z, *self._param_values[1])
def _check_params(self, params: Dict[int, Dict[str, Union[float, np.ndarray]]]):
"""Checks if the given params are valid to initialize this classifier."""
if not isinstance(params, dict):
raise TypeError(f"'params' must be a dict, but {type(params)} was given")
if set(params) != set([0, 1]):
raise ValueError(
f"'params' must have keys [0,1], but {list(params)} were given"
)
for state, p in params.items():
if not isinstance(p, dict):
raise TypeError(
f"'params[{state}]' must be a dict, but {type(p)} was given"
)
if set(p) != set(self._param_names[state]):
raise ValueError(
f"'params[{state}]' must have keys {self._param_names[state]}, "
f" but {list(p)} were given"
)
for key, value in p.items():
if (
(not isinstance(value, float))
and (not isinstance(value, int))
and (not isinstance(value, np.ndarray))
):
raise TypeError(
f"'params[{state}][{key}]' must be a float/int/np.ndarray, "
f"but {type(value)} was given"
)
return
[docs]
class TwoStateLinearClassifier(TwoStateClassifier):
"""Template for creating two-state linear classifiers.
The elements to be rewritten for each specific classifier are:
* ``_pdf_func_...``, which specify the PDFs
* ``_pdf_func_..._proj``, which specify the PDFs for the projected data
* ``_param_names``, which specify the parameter names of the PDFs
* ``_param_names_proj``, which specify the parameter names of the PDFs for the projected data.
* ``statistics``, which computes the relevant statistics
* ``fit``, which performs the fit
NB: if the classifier does not use max-likelihood classification,
then ``predict`` needs to the overwritten.
"""
_pdf_func_0 = None
_pdf_func_1 = None
# parameter name ordering must match the ordering in the pdf functions
_param_names = {
0: [],
1: [],
}
_pdf_func_0_proj = None
_pdf_func_1_proj = None
# parameter name ordering must match the ordering in the pdf functions
_param_names_proj = {
0: [],
1: [],
}
_num_states = 2
[docs]
def __init__(self, params: Dict[int, Dict[str, Union[float, np.ndarray]]]):
"""
Loads params to this ``TwoStateLinearClassifier``.
Parameters
----------
params
The structure of the dictionary must be
.. code-block:: python
{
0: {"param1": float, ...},
1: {"param1": float, ...}
}
"""
self._check_params(params)
# param values are stored in a vector to run `curve_fit` easily
# because it uses `args` and not `kargs` to fit the functions
self._param_values = {
state: [params[state][n] for n in self._param_names[state]]
for state in range(2)
}
# compute parameters for the projected pdfs from `params`
# this step needs to be done after loading the standard `params`
self._param_values_proj = {
state: [self.params_proj[state][n] for n in self._param_names_proj[state]]
for state in range(2)
}
return
def to_yaml(self, filename: Union[str, pathlib.Path]):
"""Stores parameters in a YAML file.
NB: the file can include extra data (e.g. ``self.statistics``)
"""
data = {
"params": self.params,
"params_proj": self.params_proj,
"extra": self.statistics,
}
# convert data to lists or floats to avoid having numpy objects
# inside the YAML file, which do not render correctly
def ndarray_representer(dumper: yaml.Dumper, array: np.ndarray) -> yaml.Node:
if array.shape == (): # corresponds to a scalar
if "int" in array.dtype.__str__():
return dumper.represent_int(int(array))
else:
return dumper.represent_float(float(array))
return dumper.represent_list(array.tolist())
yaml.add_representer(np.ndarray, ndarray_representer)
# the values in "self.params" can be np.core.multiarray.scalars,
# not "np.ndarray".
np_types = [np.int64, np.int32, np.float64, np.float32]
for np_type in np_types:
def nptype_representer(dumper: yaml.Dumper, scalar: np_type) -> yaml.Node:
if "int" in np_type.__name__:
return dumper.represent_int(int(scalar))
else:
return dumper.represent_float(float(scalar))
yaml.add_representer(np_type, nptype_representer)
with open(filename, "w") as file:
yaml.dump(data, file, default_flow_style=False)
return
@property
def params_proj(self) -> Dict[int, Dict[str, Union[float, np.ndarray]]]:
"""Returns the parameters for the projected PDFs, computed
from ``params``.
The structure of the output dictionary is:
.. code-block:: python
{
0: {"param1": float, ...},
1: {"param1": float, ...},
}
"""
# compute `params_proj` from `params` ...
params_proj = {}
return params_proj
def project(self, z: np.ndarray) -> np.ndarray:
"""Returns the projection of the given IQ data to
the :math:`\\mu_0 - \\mu_1` axis.
Parameters
----------
z : np.array(..., 2)
IQ points.
Returns
-------
z_proj : np.array(...)
Projection of IQ points to :math:`\\mu_0 - \\mu_1` axis.
"""
check_2d_input(z)
mu_0, mu_1 = self.statistics["mu_0"], self.statistics["mu_1"]
rot_angle = get_angle(mu_1 - mu_0)
return rotate_data(z, -rot_angle)[..., 0]
def pdf_0_projected(self, z_proj: np.ndarray) -> np.ndarray:
"""Returns :math:`p_{proj}(z_{proj}|0)`.
NB: :math:`p_{proj}(z_{proj}|0) \\neq p(z|0)`.
Parameters
----------
z_proj : np.array(...)
Projection of IQ points to :math:`\\mu_0 - \\mu_1` axis.
See ``self.project``.
Returns
-------
prob : np.array(...)
Probability of the input projected points given state 0.
"""
# the pdf functions are class variables (as opposed to instance variables)
# thus they are available in the class of `self`, not the instance of `self`
return self.__class__._pdf_func_0_proj(z_proj, *self._param_values_proj[0])
def pdf_1_projected(self, z_proj: np.ndarray) -> np.ndarray:
"""Returns :math:`p_{proj}(z_{proj}|1)`.
NB: :math:`p_{proj}(z_{proj}|1) \\neq p(z|1)`.
Parameters
----------
z_proj : np.array(...)
Projection of IQ points to :math:`\\mu_0 - \\mu_1` axis.
See ``self.project``.
Returns
-------
prob : np.array(...)
Probability of the input projected points given state 1.
"""
# the pdf functions are class variables (as opposed to instance variables)
# thus they are available in the class of `self`, not the instance of `self`
return self.__class__._pdf_func_1_proj(z_proj, *self._param_values_proj[1])
[docs]
class ThreeStateClassifier:
"""Template for creating three-state classifiers.
The elements to be rewritten for each specific classifier are:
* ``_pdf_func_...``, which specify the PDFs
* ``_param_names``, which specify the parameter names of the PDFs
* ``statistics``, which computes the relevant statistics
* ``fit``, which performs the fit
NB: if the classifier does not use max-likelihood classification,
then ``predict`` needs to the overwritten.
"""
_pdf_func_0 = None
_pdf_func_1 = None
_pdf_func_2 = None
# parameter name ordering must match the ordering in the pdf functions
_param_names = {
0: [],
1: [],
2: [],
}
_num_states = 3
[docs]
def __init__(self, params: Dict[int, Dict[str, Union[float, np.ndarray]]]):
"""Loads params to this ``ThreeStateClassifier``.
Parameters
----------
params
The structure of the dictionary must be
.. code-block:: python
{
0: {"param1": float, ...},
1: {"param1": float, ...},
2: {"param1": float, ...},
}
"""
self._check_params(params)
# param values are stored in a vector to run `curve_fit` easily
# because it uses `args` and not `kargs` to fit the functions
self._param_values = {
state: [params[state][n] for n in self._param_names[state]]
for state in range(3)
}
return
def to_yaml(self, filename: Union[str, pathlib.Path]):
"""Stores parameters in a YAML file.
NB: the file can include extra data (e.g. ``self.statistics``)
"""
data = {"params": self.params, "extra": self.statistics}
# convert data to lists or floats to avoid having numpy objects
# inside the YAML file, which do not render correctly
def ndarray_representer(dumper: yaml.Dumper, array: np.ndarray) -> yaml.Node:
if array.shape == (): # corresponds to a scalar
if "int" in array.dtype.__str__():
return dumper.represent_int(int(array))
else:
return dumper.represent_float(float(array))
return dumper.represent_list(array.tolist())
yaml.add_representer(np.ndarray, ndarray_representer)
# the values in "self.params" are np.core.multiarray.scalars,
# not "np.ndarray".
np_types = [np.int64, np.int32, np.float64, np.float32]
for np_type in np_types:
def nptype_representer(dumper: yaml.Dumper, scalar: np_type) -> yaml.Node:
if "int" in np_type.__name__:
return dumper.represent_int(int(scalar))
else:
return dumper.represent_float(float(scalar))
yaml.add_representer(np_type, nptype_representer)
with open(filename, "w") as file:
yaml.dump(data, file, default_flow_style=False)
return
@classmethod
def from_yaml(cls: Type[T3], filename: Union[str, pathlib.Path]) -> T3:
"""
Load `ThreeStateClassifier` from YAML file.
NB: this function does not load any extra data stored in the YAML file
apart from ``params``.
"""
with open(filename, "r") as file:
data = yaml.safe_load(file)
# transform all parameters to np.arrays
params = {s: {n: np.array(v) for n, v in p.items()} for s, p in data["params"].items()}
return cls(params)
@property
def params(self) -> Dict[int, Dict[str, Union[float, np.ndarray]]]:
"""Returns the parameters required to set up the classifier.
The structure of the output dictionary is:
.. code-block:: python
{
0: {"param1": float, ...},
1: {"param1": float, ...},
2: {"param1": float, ...},
}
"""
params = {}
for state in range(3):
params[state] = {
k: v
for k, v in zip(self._param_names[state], self._param_values[state])
}
return params
@property
def statistics(self) -> Dict[str, np.ndarray]:
"""Returns dictionary with general statistical data:
* ``mu_0``: ``np.array([float, float])``
* ``mu_1``: ``np.array([float, float])``
* ``mu_2``: ``np.array([float, float])``
* ``cov_0``: ``np.array([[float, float], [float, float]])``
* ``cov_1``: ``np.array([[float, float], [float, float]])``
* ``cov_2``: ``np.array([[float, float], [float, float]])``
It can also include other information.
NB: this property is used for plotting and for storing useful
information in the YAML file
"""
return {}
@classmethod
def fit(
cls: Type[T3],
shots_0: np.ndarray,
shots_1: np.ndarray,
shots_2: np.ndarray,
**kargs,
) -> T3:
"""
Runs fit to the given data.
Parameters
----------
shots_0 : np.array(N, 2)
IQ data when preparing state 0.
shots_1 : np.array(M, 2)
IQ data when preparing state 1.
shots_2 : np.array(P, 2)
IQ data when preparing state 2.
Returns
-------
Loaded `ThreeStateClassifier`.
"""
check_2d_input(shots_0, axis=1)
check_2d_input(shots_1, axis=1)
check_2d_input(shots_2, axis=1)
# perform fit ...
params = {}
return cls(params)
def predict(
self, z: np.ndarray, p_0: float = 1 / 3, p_1: float = 1 / 3
) -> np.ndarray:
"""
Classifies the given data to 0, 1 or 2 using maximum-likelihood
classification, which is defined by
* 0 if :math:`p(0|z) > p(1|z), p(2|z)`
* 1 if :math:`p(1|z) > p(0|z), p(2|z)`
* 2 otherwise
Parameters
----------
z : np.array(..., 2)
Points to classify.
p_0
Probability to measure outcome 0.
p_1
Probability to measure outcome 1.
By default :math:`p_0=p_1=1/3`, thus using :math:`p(i|z)` is equivalent
to using :math:`p(z|i)`.
Returns
-------
prediction : np.array(...)
Classification of the given data. It only contains 0s, 1s, and 2s.
"""
if (p_0 + p_1 > 1) or (p_0 < 0) or (p_1 < 0):
raise ValueError(
"The speficied 'p_0' and 'p_1' must be physical probabilities, "
f"but p_0={p_0} and p1={p_1} (and p2={1-p_0-p_1}) were given"
)
# does not compute p(z) for p(i|z) = p(z|i) * p(i) / p(z)
# because it is the same for all states and we are
# only selecting the one with highest probability
probs = [
self.pdf_0(z) * p_0,
self.pdf_1(z) * p_1,
self.pdf_2(z) * (1 - p_0 - p_1),
]
return np.argmax(probs, axis=0)
def pdf_0(self, z: np.ndarray) -> np.ndarray:
"""
Returns :math:`p(z|0)`.
Parameters
----------
z : np.array(..., 2)
IQ points.
Returns
-------
prob : np.array(...)
Probability of the input IQ points given that the state is 0.
"""
check_2d_input(z)
# the pdf functions are class variables (as opposed to instance variables)
# thus they are available in the class of `self`, not the instance of `self`
return self.__class__._pdf_func_0(z, *self._param_values[0])
def pdf_1(self, z: np.ndarray) -> np.ndarray:
"""
Returns :math:`p(z|1)`.
Parameters
----------
z : np.array(..., 2)
IQ points.
Returns
-------
prob : np.array(...)
Probability of the input IQ points given that the state is 1.
"""
check_2d_input(z)
# the pdf functions are class variables (as opposed to instance variables)
# thus they are available in the class of `self`, not the instance of `self`
return self.__class__._pdf_func_1(z, *self._param_values[1])
def pdf_2(self, z: np.ndarray) -> np.ndarray:
"""
Returns :math:`p(z|2)`.
Parameters
----------
z : np.array(..., 2)
IQ points.
Returns
-------
prob : np.array(...)
Probability of the input IQ points given that the state is 2.
"""
check_2d_input(z)
# the pdf functions are class variables (as opposed to instance variables)
# thus they are available in the class of `self`, not the instance of `self`
return self.__class__._pdf_func_2(z, *self._param_values[2])
def _check_params(self, params: Dict[int, Dict[str, Union[float, np.ndarray]]]):
"""Check if params are valid to initialize this classifier."""
if not isinstance(params, dict):
raise TypeError(f"'params' must be a dict, but {type(params)} was given")
if set(params) != set([0, 1, 2]):
raise ValueError(
f"'params' must have keys [0,1,2], but {list(params)} were given"
)
for state, p in params.items():
if not isinstance(p, dict):
raise TypeError(
f"'params[{state}]' must be a dict, but {type(p)} was given"
)
if set(p) != set(self._param_names[state]):
raise ValueError(
f"'params[{state}]' must have keys {self._param_names[state]}, "
f" but {list(p)} were given"
)
for key, value in p.items():
if (
(not isinstance(value, float))
and (not isinstance(value, int))
and (not isinstance(value, np.ndarray))
):
raise TypeError(
f"'params[{state}][{key}]' must be a float/int/np.ndarray, "
f"but {type(value)} was given"
)
return