flavio.measurements module
Functions for parsing the measurement data files
"""Functions for parsing the measurement data files""" import yaml import pkgutil from flavio.classes import Measurement, Observable from flavio._parse_errors import constraints_from_string, errors_from_string from flavio.statistics import probability import numpy as np from math import sqrt import warnings def _load(obj): """Read measurements from a YAML stream or file.""" measurements = yaml.safe_load(obj) for m_name, m_data in measurements.items(): m = Measurement(m_name) for arg in ['inspire', 'hepdata', 'experiment', 'url', 'description']: if arg in m_data: setattr(m, arg, m_data[arg]) if 'observables' in m_data: # for multivariate constraints pd = probability.dict2dist(m_data['values']) pd = probability.convolve_distributions(pd) # for observables without arguments (i.e. strings), this is trivial; obs_list = [obs if isinstance(obs, str) # for obs. with arguments, need to convert dict of the form # {'name': myname, 'arg1': v1, ...} to a tuple of the form # (myname, v1, ...) else tuple( [obs['name']] + [obs[arg] for arg in Observable[obs['name']].arguments]) for obs in m_data['observables']] m.add_constraint(obs_list, pd) elif 'correlation' not in m_data: # for univariate constraints if isinstance(m_data['values'], list): for value_dict in m_data['values']: args = Observable[value_dict['name']].arguments # numerical values of arguments, e.g. [1, 6] args_num = [value_dict[a] for a in args] # insert string name in front of argument values and turn it # into a tuple, e.g. ('FL(B0->K*mumu)', 1, 6) args_num.insert(0, value_dict['name']) obs_tuple = tuple(args_num) if isinstance(value_dict['value'], dict): m.set_constraint(obs_tuple, constraint_dict=value_dict['value']) else: m.set_constraint(obs_tuple, value_dict['value']) else: # otherwise, 'values' is a dict just containing name: constraint_string for obs, value in m_data['values'].items(): if isinstance(value, dict) or isinstance(value, list): m.set_constraint(obs, constraint_dict=value) else: m.set_constraint(obs, value) else: # for multivariate normal constraints observables = [] central_values = [] errors = [] if isinstance(m_data['values'], list): for value_dict in m_data['values']: # if "value" is a list, it contains the values of observable # arguments (like q^2) args = Observable[value_dict['name']].arguments args_num = [value_dict[a] for a in args] error_dict = errors_from_string(value_dict['value']) args_num.insert(0, value_dict['name']) obs_tuple = tuple(args_num) observables.append(obs_tuple) central_values.append(error_dict['central_value']) squared_error = 0. for sym_err in error_dict['symmetric_errors']: squared_error += sym_err**2 for asym_err in error_dict['asymmetric_errors']: squared_error += asym_err[0]*asym_err[1] errors.append(sqrt(squared_error)) else: # otherwise, 'values' is a dict just containing name: constraint_string for obs, value in m_data['values'].items(): observables.append(obs) error_dict = errors_from_string(value) central_values.append(error_dict['central_value']) squared_error = 0. for sym_err in error_dict['symmetric_errors']: squared_error += sym_err**2 for asym_err in error_dict['asymmetric_errors']: squared_error += asym_err[0]*asym_err[1] errors.append(sqrt(squared_error)) correlation = _fix_correlation_matrix(m_data['correlation'], len(observables)) covariance = np.outer(np.asarray(errors), np.asarray(errors))*correlation m.add_constraint(observables, probability.MultivariateNormalDistribution(central_values, covariance)) return list(measurements.keys()) def _load_new(obj): """Read measurements from a YAML stream or file that are compatible with the format generated by the `get_yaml` method of `flavio.classes.Measurement`.""" measurements = yaml.safe_load(obj) if isinstance(measurements, dict): measurements = [Measurement.from_yaml_dict(measurements, pname='observables')] else: measurements = [Measurement.from_yaml_dict(m, pname='observables') for m in measurements] return [m.name for m in measurements] def _fix_correlation_matrix(corr, n_dim): """In the input file, the correlation matrix can be specified as a list of lists containing only the upper right of the symmetric correlation matrix, e.g. [[1, 0.1, 0.2], [1, 0.3], [1, 0.05]]. This function builds the full matrix. Alternatively, if only a number x is given, the correlation matrix is reconstructed as [[1, x, x, ...], ..., [..., x, x, 1]]""" try: float(corr) except TypeError: # if it's not a number, go on below pass else: # if it's a number, return delta_ij + (1-delta_ij)*x return np.eye(n_dim) + (np.ones((n_dim, n_dim))-np.eye(n_dim))*float(corr) if not isinstance(corr, list): raise TypeError("Correlation matrix must be of type list") if len(corr) != n_dim: raise ValueError("The correlation matrix has inappropriate number of dimensions") corr_out = np.zeros((n_dim, n_dim)) for i, line in enumerate(corr): if len(line) == n_dim: if line[i] != 1: raise ValueError("The correlation matrix must have 1.0 on the diagonal") corr_out[i] = line elif len(line) == n_dim - i: if line[0] != 1: raise ValueError("The correlation matrix must have 1.0 on the diagonal") corr_out[i,i:] = line else: raise ValueError("Correlation matrix not understood") if not np.allclose(corr_out, corr_out.T): # if the covariance is not symmetric, it is assumed that only the values above the diagonal are present. # then: M -> M + M^T - diag(M) corr_out = corr_out + corr_out.T - np.diag(np.diag(corr_out)) return corr_out def write_file(filename, measurements): """Write measurements to a YAML file. measurements can be a list of string names or a list of measurement instances.""" measurement_instances = [m if isinstance(m, Measurement) else Measurement[m] for m in measurements] with open(filename, 'w') as f: yaml.dump([m.get_yaml_dict(pname='observables') for m in measurement_instances], f) def read_file(filename): """Read measurements from a YAML file.""" with open(filename, 'r') as f: try: return _load_new(f) except: f.seek(0) # rewind return _load(f) def read_url(url): """Read measurements from a URL.""" try: import requests except: raise ImportError("You need to install the python requests module to load measurements from a URL.") res = requests.get(url) return _load(res.text) def load(obj): """Alias for `read_file` for backwards compatibility. Don't use.""" warnings.warn("The function `flavio.measurements.load` was replaced " "by `flavio.measurements.read_file` in v0.13 " "and might be removed in the future. " "Please update your code.", FutureWarning) return read_file(obj) def read_default(): """Read all measurements from `data/measurements.yml`. This function is invoked once when the package is loaded.""" return _load(pkgutil.get_data('flavio', 'data/measurements.yml')) read_default()
Functions
def load(
obj)
Alias for read_file
for backwards compatibility. Don't use.
def load(obj): """Alias for `read_file` for backwards compatibility. Don't use.""" warnings.warn("The function `flavio.measurements.load` was replaced " "by `flavio.measurements.read_file` in v0.13 " "and might be removed in the future. " "Please update your code.", FutureWarning) return read_file(obj)
def read_default(
)
Read all measurements from data/measurements.yml
.
This function is invoked once when the package is loaded.
def read_default(): """Read all measurements from `data/measurements.yml`. This function is invoked once when the package is loaded.""" return _load(pkgutil.get_data('flavio', 'data/measurements.yml'))
def read_file(
filename)
Read measurements from a YAML file.
def read_file(filename): """Read measurements from a YAML file.""" with open(filename, 'r') as f: try: return _load_new(f) except: f.seek(0) # rewind return _load(f)
def read_url(
url)
Read measurements from a URL.
def read_url(url): """Read measurements from a URL.""" try: import requests except: raise ImportError("You need to install the python requests module to load measurements from a URL.") res = requests.get(url) return _load(res.text)
def write_file(
filename, measurements)
Write measurements to a YAML file.
measurements can be a list of string names or a list of measurement instances.
def write_file(filename, measurements): """Write measurements to a YAML file. measurements can be a list of string names or a list of measurement instances.""" measurement_instances = [m if isinstance(m, Measurement) else Measurement[m] for m in measurements] with open(filename, 'w') as f: yaml.dump([m.get_yaml_dict(pname='observables') for m in measurement_instances], f)