Skip to content

Commit

Permalink
Feature/corr matrix and inverse cov matrix as input in least squares …
Browse files Browse the repository at this point in the history
…function for correlated fits (fjosw#223)

* feat: corr_matrix kwargs as input for least squares fit

* feat/tests: inverse covariance matrix and correlation matrix kwargs as input for least squares function

* feat/tests/example: reduced new kwargs to 'inv_chol_cov_matrix' and outsourced the inversion & cholesky decomposition of the covariance matrix (function 'invert_corr_cov_cholesky(corr, covdiag)')

* tests: added tests for inv_chol_cov_matrix kwarg for the case of combined fits

* fix: renamed covdiag to inverrdiag needed for the cholesky decomposition and corrected its documentation

* examples: added an example of a correlated combined fit to the least_squares documentation

* feat/tests/fix(of typos): added function 'sort_corr()' (and a test of it) to sort correlation matrix according to a list of alphabetically sorted keys

* docs: added more elaborate documentation/example of sort_corr(), fixed typos in documentation of invert_corr_cov_cholesky()
  • Loading branch information
PiaLJP authored Sep 13, 2024
1 parent 3830e3f commit 1d6f7f6
Show file tree
Hide file tree
Showing 5 changed files with 595 additions and 45 deletions.
331 changes: 297 additions & 34 deletions examples/04_fit_example.ipynb

Large diffs are not rendered by default.

84 changes: 73 additions & 11 deletions pyerrors/fits.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from autograd import elementwise_grad as egrad
from numdifftools import Jacobian as num_jacobian
from numdifftools import Hessian as num_hessian
from .obs import Obs, derived_observable, covariance, cov_Obs
from .obs import Obs, derived_observable, covariance, cov_Obs, invert_corr_cov_cholesky


class Fit_result(Sequence):
Expand Down Expand Up @@ -151,6 +151,14 @@ def func_b(a, x):
For details about how the covariance matrix is estimated see `pyerrors.obs.covariance`.
In practice the correlation matrix is Cholesky decomposed and inverted (instead of the covariance matrix).
This procedure should be numerically more stable as the correlation matrix is typically better conditioned (Jacobi preconditioning).
inv_chol_cov_matrix [array,list], optional
array: shape = (no of y values) X (no of y values)
list: for an uncombined fit: [""]
for a combined fit: list of keys belonging to the corr_matrix saved in the array, must be the same as the keys of the y dict in alphabetical order
If correlated_fit=True is set as well, can provide an inverse covariance matrix (y errors, dy_f included!) of your own choosing for a correlated fit.
The matrix must be a lower triangular matrix constructed from a Cholesky decomposition: The function invert_corr_cov_cholesky(corr, inverrdiag) can be
used to construct it from a correlation matrix (corr) and the errors dy_f of the data points (inverrdiag = np.diag(1 / np.asarray(dy_f))). For the correct
ordering the correlation matrix (corr) can be sorted via the function sort_corr(corr, kl, yd) where kl is the list of keys and yd the y dict.
expected_chisquare : bool
If True estimates the expected chisquare which is
corrected by effects caused by correlated input data (default False).
Expand All @@ -165,6 +173,57 @@ def func_b(a, x):
-------
output : Fit_result
Parameters and information on the fitted result.
Examples
------
>>> # Example of a correlated (correlated_fit = True, inv_chol_cov_matrix handed over) combined fit, based on a randomly generated data set
>>> import numpy as np
>>> from scipy.stats import norm
>>> from scipy.linalg import cholesky
>>> import pyerrors as pe
>>> # generating the random data set
>>> num_samples = 400
>>> N = 3
>>> x = np.arange(N)
>>> x1 = norm.rvs(size=(N, num_samples)) # generate random numbers
>>> x2 = norm.rvs(size=(N, num_samples)) # generate random numbers
>>> r = r1 = r2 = np.zeros((N, N))
>>> y = {}
>>> for i in range(N):
>>> for j in range(N):
>>> r[i, j] = np.exp(-0.8 * np.fabs(i - j)) # element in correlation matrix
>>> errl = np.sqrt([3.4, 2.5, 3.6]) # set y errors
>>> for i in range(N):
>>> for j in range(N):
>>> r[i, j] *= errl[i] * errl[j] # element in covariance matrix
>>> c = cholesky(r, lower=True)
>>> y = {'a': np.dot(c, x1), 'b': np.dot(c, x2)} # generate y data with the covariance matrix defined
>>> # random data set has been generated, now the dictionaries and the inverse covariance matrix to be handed over are built
>>> x_dict = {}
>>> y_dict = {}
>>> chol_inv_dict = {}
>>> data = []
>>> for key in y.keys():
>>> x_dict[key] = x
>>> for i in range(N):
>>> data.append(pe.Obs([[i + 1 + o for o in y[key][i]]], ['ens'])) # generate y Obs from the y data
>>> [o.gamma_method() for o in data]
>>> corr = pe.covariance(data, correlation=True)
>>> inverrdiag = np.diag(1 / np.asarray([o.dvalue for o in data]))
>>> chol_inv = pe.obs.invert_corr_cov_cholesky(corr, inverrdiag) # gives form of the inverse covariance matrix needed for the combined correlated fit below
>>> y_dict = {'a': data[:3], 'b': data[3:]}
>>> # common fit parameter p[0] in combined fit
>>> def fit1(p, x):
>>> return p[0] + p[1] * x
>>> def fit2(p, x):
>>> return p[0] + p[2] * x
>>> fitf_dict = {'a': fit1, 'b':fit2}
>>> fitp_inv_cov_combined_fit = pe.least_squares(x_dict,y_dict, fitf_dict, correlated_fit = True, inv_chol_cov_matrix = [chol_inv,['a','b']])
Fit with 3 parameters
Method: Levenberg-Marquardt
`ftol` termination condition is satisfied.
chisquare/d.o.f.: 0.5388013574561786 # random
fit parameters [1.11897846 0.96361162 0.92325319] # random
'''
output = Fit_result()

Expand Down Expand Up @@ -297,15 +356,19 @@ def chisqfunc_uncorr(p):
return anp.sum(general_chisqfunc_uncorr(p, y_f, p_f) ** 2)

if kwargs.get('correlated_fit') is True:
corr = covariance(y_all, correlation=True, **kwargs)
covdiag = np.diag(1 / np.asarray(dy_f))
condn = np.linalg.cond(corr)
if condn > 0.1 / np.finfo(float).eps:
raise Exception(f"Cannot invert correlation matrix as its condition number exceeds machine precision ({condn:1.2e})")
if condn > 1e13:
warnings.warn("Correlation matrix may be ill-conditioned, condition number: {%1.2e}" % (condn), RuntimeWarning)
chol = np.linalg.cholesky(corr)
chol_inv = scipy.linalg.solve_triangular(chol, covdiag, lower=True)
if 'inv_chol_cov_matrix' in kwargs:
chol_inv = kwargs.get('inv_chol_cov_matrix')
if (chol_inv[0].shape[0] != len(dy_f)):
raise TypeError('The number of columns of the inverse covariance matrix handed over needs to be equal to the number of y errors.')
if (chol_inv[0].shape[0] != chol_inv[0].shape[1]):
raise TypeError('The inverse covariance matrix handed over needs to have the same number of rows as columns.')
if (chol_inv[1] != key_ls):
raise ValueError('The keys of inverse covariance matrix are not the same or do not appear in the same order as the x and y values.')
chol_inv = chol_inv[0]
else:
corr = covariance(y_all, correlation=True, **kwargs)
inverrdiag = np.diag(1 / np.asarray(dy_f))
chol_inv = invert_corr_cov_cholesky(corr, inverrdiag)

def general_chisqfunc(p, ivars, pr):
model = anp.concatenate([anp.array(funcd[key](p, xd[key])).reshape(-1) for key in key_ls])
Expand Down Expand Up @@ -350,7 +413,6 @@ def chisqfunc_residuals_uncorr(p):

fit_result = scipy.optimize.least_squares(chisqfunc_residuals_uncorr, x0, method='lm', ftol=1e-15, gtol=1e-15, xtol=1e-15)
if kwargs.get('correlated_fit') is True:

def chisqfunc_residuals(p):
return general_chisqfunc(p, y_f, p_f)

Expand Down
86 changes: 86 additions & 0 deletions pyerrors/obs.py
Original file line number Diff line number Diff line change
Expand Up @@ -1544,6 +1544,92 @@ def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs):
return cov


def invert_corr_cov_cholesky(corr, inverrdiag):
"""Constructs a lower triangular matrix `chol` via the Cholesky decomposition of the correlation matrix `corr`
and then returns the inverse covariance matrix `chol_inv` as a lower triangular matrix by solving `chol * x = inverrdiag`.
Parameters
----------
corr : np.ndarray
correlation matrix
inverrdiag : np.ndarray
diagonal matrix, the entries are the inverse errors of the data points considered
"""

condn = np.linalg.cond(corr)
if condn > 0.1 / np.finfo(float).eps:
raise Exception(f"Cannot invert correlation matrix as its condition number exceeds machine precision ({condn:1.2e})")
if condn > 1e13:
warnings.warn("Correlation matrix may be ill-conditioned, condition number: {%1.2e}" % (condn), RuntimeWarning)
chol = np.linalg.cholesky(corr)
chol_inv = scipy.linalg.solve_triangular(chol, inverrdiag, lower=True)

return chol_inv


def sort_corr(corr, kl, yd):
""" Reorders a correlation matrix to match the alphabetical order of its underlying y data.
The ordering of the input correlation matrix `corr` is given by the list of keys `kl`.
The input dictionary `yd` (with the same keys `kl`) must contain the corresponding y data
that the correlation matrix is based on.
This function sorts the list of keys `kl` alphabetically and sorts the matrix `corr`
according to this alphabetical order such that the sorted matrix `corr_sorted` corresponds
to the y data `yd` when arranged in an alphabetical order by its keys.
Parameters
----------
corr : np.ndarray
A square correlation matrix constructed using the order of the y data specified by `kl`.
The dimensions of `corr` should match the total number of y data points in `yd` combined.
kl : list of str
A list of keys that denotes the order in which the y data from `yd` was used to build the
input correlation matrix `corr`.
yd : dict of list
A dictionary where each key corresponds to a unique identifier, and its value is a list of
y data points. The total number of y data points across all keys must match the dimensions
of `corr`. The lists in the dictionary can be lists of Obs.
Returns
-------
np.ndarray
A new, sorted correlation matrix that corresponds to the y data from `yd` when arranged alphabetically by its keys.
Example
-------
>>> import numpy as np
>>> import pyerrors as pe
>>> corr = np.array([[1, 0.2, 0.3], [0.2, 1, 0.4], [0.3, 0.4, 1]])
>>> kl = ['b', 'a']
>>> yd = {'a': [1, 2], 'b': [3]}
>>> sorted_corr = pe.obs.sort_corr(corr, kl, yd)
>>> print(sorted_corr)
array([[1. , 0.3, 0.4],
[0.3, 1. , 0.2],
[0.4, 0.2, 1. ]])
"""
kl_sorted = sorted(kl)

posd = {}
ofs = 0
for ki, k in enumerate(kl):
posd[k] = [i + ofs for i in range(len(yd[k]))]
ofs += len(posd[k])

mapping = []
for k in kl_sorted:
for i in range(len(yd[k])):
mapping.append(posd[k][i])

corr_sorted = np.zeros_like(corr)
for i in range(corr.shape[0]):
for j in range(corr.shape[0]):
corr_sorted[i][j] = corr[mapping[i]][mapping[j]]

return corr_sorted


def _smooth_eigenvalues(corr, E):
"""Eigenvalue smoothing as described in hep-lat/9412087
Expand Down
118 changes: 118 additions & 0 deletions tests/fits_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,124 @@ def func(a, x):
chisquare_values = np.array(chisquare_values)
assert np.all(np.isclose(chisquare_values, chisquare_values[0]))

def test_inv_cov_matrix_input_least_squares():


num_samples = 400
N = 10

x = norm.rvs(size=(N, num_samples)) # generate random numbers

r = np.zeros((N, N))
for i in range(N):
for j in range(N):
r[i, j] = np.exp(-0.8 * np.fabs(i - j)) # element in correlation matrix

errl = np.sqrt([3.4, 2.5, 3.6, 2.8, 4.2, 4.7, 4.9, 5.1, 3.2, 4.2]) # set y errors
for i in range(N):
for j in range(N):
r[i, j] *= errl[i] * errl[j] # element in covariance matrix

c = cholesky(r, lower=True)
y = np.dot(c, x)
x = np.arange(N)
x_dict = {}
y_dict = {}
for i,item in enumerate(x):
x_dict[str(item)] = [x[i]]

for linear in [True, False]:
data = []
for i in range(N):
if linear:
data.append(pe.Obs([[i + 1 + o for o in y[i]]], ['ens']))
else:
data.append(pe.Obs([[np.exp(-(i + 1)) + np.exp(-(i + 1)) * o for o in y[i]]], ['ens']))

[o.gamma_method() for o in data]

data_dict = {}
for i,item in enumerate(x):
data_dict[str(item)] = [data[i]]

corr = pe.covariance(data, correlation=True)
chol = np.linalg.cholesky(corr)
covdiag = np.diag(1 / np.asarray([o.dvalue for o in data]))
chol_inv = scipy.linalg.solve_triangular(chol, covdiag, lower=True)
chol_inv_keys = [""]
chol_inv_keys_combined_fit = [str(item) for i,item in enumerate(x)]

if linear:
def fitf(p, x):
return p[1] + p[0] * x
fitf_dict = {}
for i,item in enumerate(x):
fitf_dict[str(item)] = fitf
else:
def fitf(p, x):
return p[1] * anp.exp(-p[0] * x)
fitf_dict = {}
for i,item in enumerate(x):
fitf_dict[str(item)] = fitf

fitpc = pe.least_squares(x, data, fitf, correlated_fit=True)
fitp_inv_cov = pe.least_squares(x, data, fitf, correlated_fit = True, inv_chol_cov_matrix = [chol_inv,chol_inv_keys])
fitp_inv_cov_combined_fit = pe.least_squares(x_dict, data_dict, fitf_dict, correlated_fit = True, inv_chol_cov_matrix = [chol_inv,chol_inv_keys_combined_fit])
for i in range(2):
diff_inv_cov = fitp_inv_cov[i] - fitpc[i]
diff_inv_cov.gamma_method()
assert(diff_inv_cov.is_zero(atol=0.0))
diff_inv_cov_combined_fit = fitp_inv_cov_combined_fit[i] - fitpc[i]
diff_inv_cov_combined_fit.gamma_method()
assert(diff_inv_cov_combined_fit.is_zero(atol=1e-12))

def test_least_squares_invalid_inv_cov_matrix_input():
xvals = []
yvals = []
err = 0.1
def func_valid(a,x):
return a[0] + a[1] * x
for x in range(1, 8, 2):
xvals.append(x)
yvals.append(pe.pseudo_Obs(x + np.random.normal(0.0, err), err, 'test1') + pe.pseudo_Obs(0, err / 100, 'test2', samples=87))

[o.gamma_method() for o in yvals]

#dictionaries for a combined fit
xvals_dict = { }
yvals_dict = { }
for i,item in enumerate(np.arange(1, 8, 2)):
xvals_dict[str(item)] = [xvals[i]]
yvals_dict[str(item)] = [yvals[i]]
chol_inv_keys_combined_fit = ['1', '3', '5', '7']
chol_inv_keys_combined_fit_invalid = ['2', '7', '100', '8']
func_dict_valid = {"1": func_valid,"3": func_valid,"5": func_valid,"7": func_valid}

corr_valid = pe.covariance(yvals, correlation = True)
chol = np.linalg.cholesky(corr_valid)
covdiag = np.diag(1 / np.asarray([o.dvalue for o in yvals]))
chol_inv_valid = scipy.linalg.solve_triangular(chol, covdiag, lower=True)
chol_inv_keys = [""]
pe.least_squares(xvals, yvals,func_valid, correlated_fit = True, inv_chol_cov_matrix = [chol_inv_valid,chol_inv_keys])
pe.least_squares(xvals_dict, yvals_dict,func_dict_valid, correlated_fit = True, inv_chol_cov_matrix = [chol_inv_valid,chol_inv_keys_combined_fit])
chol_inv_invalid_shape1 = np.zeros((len(yvals),len(yvals)-1))
chol_inv_invalid_shape2 = np.zeros((len(yvals)+2,len(yvals)))

# for an uncombined fit
with pytest.raises(TypeError):
pe.least_squares(xvals, yvals, func_valid, correlated_fit = True, inv_chol_cov_matrix = [chol_inv_invalid_shape1,chol_inv_keys])
with pytest.raises(TypeError):
pe.least_squares(xvals, yvals, func_valid,correlated_fit = True, inv_chol_cov_matrix = [chol_inv_invalid_shape2,chol_inv_keys])
with pytest.raises(ValueError):
pe.least_squares(xvals, yvals, func_valid,correlated_fit = True, inv_chol_cov_matrix = [chol_inv_valid,chol_inv_keys_combined_fit_invalid])

#repeat for a combined fit
with pytest.raises(TypeError):
pe.least_squares(xvals_dict, yvals_dict,func_dict_valid, correlated_fit = True, inv_chol_cov_matrix = [chol_inv_invalid_shape1,chol_inv_keys_combined_fit])
with pytest.raises(TypeError):
pe.least_squares(xvals_dict, yvals_dict,func_dict_valid, correlated_fit = True, inv_chol_cov_matrix = [chol_inv_invalid_shape2,chol_inv_keys_combined_fit])
with pytest.raises(ValueError):
pe.least_squares(xvals_dict, yvals_dict,func_dict_valid, correlated_fit = True, inv_chol_cov_matrix = [chol_inv_valid,chol_inv_keys_combined_fit_invalid])

def test_correlated_fit():
num_samples = 400
Expand Down
21 changes: 21 additions & 0 deletions tests/obs_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -1063,6 +1063,27 @@ def test_covariance_reorder_non_overlapping_data():
assert np.isclose(corr1[0, 1], corr2[0, 1], atol=1e-14)


def test_sort_corr():
xd = {
'b': [1, 2, 3],
'a': [2.2, 4.4],
'c': [3.7, 5.1]
}

yd = {k : pe.cov_Obs(xd[k], [.2 * o for o in xd[k]], k) for k in xd}
key_orig = list(yd.keys())
y_all = np.concatenate([np.array(yd[key]) for key in key_orig])
[o.gm() for o in y_all]
cov = pe.covariance(y_all)

key_ls = key_sorted = sorted(key_orig)
y_sorted = np.concatenate([np.array(yd[key]) for key in key_sorted])
[o.gm() for o in y_sorted]
cov_sorted = pe.covariance(y_sorted)
retcov = pe.obs.sort_corr(cov, key_orig, yd)
assert np.sum(retcov - cov_sorted) == 0


def test_empty_obs():
o = pe.Obs([np.random.rand(100)], ['test'])
q = o + pe.Obs([], [], means=[])
Expand Down

0 comments on commit 1d6f7f6

Please sign in to comment.