Source code for rsatoolbox.model.fitter

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Parameter fitting methods for models
"""

import numpy as np
import scipy.optimize as opt
import scipy.sparse
from rsatoolbox.rdm import compare
from rsatoolbox.util.matrix import get_v
from rsatoolbox.util.pooling import pool_rdm
from rsatoolbox.util.rdm_utils import _parse_nan_vectors


[docs]class Fitter: """Object to specify a fitting function and parameters Effectively this gives the user a convenient way to specify fitting functions with different settings than the defaults. Create this object with the fitting function to use and additional keyword arguments for settings you wish to change. The resulting object then behaves as the fitting function itself with the keyword arguments set to the different values provided at object creation. Example: generate Fitter-object for ridge regression: :: fit = pyrsa.model.Fitter(pyrsa.model.fit_regress, ridge_weight=1) the resulting object 'fit' now does the same as pyrsa.model.fit_regress when run with the additional argument ``ridge_weight=1``, i.e. the following two lines now yield equal results: :: fit(model, data) pyrsa.model.fit_regress(model, data, ridge_weight=1) For a general introduction to flexible models see demo_flex. """ def __init__(self, fit_fun, **kwargs): self.fit_fun = fit_fun self.kwargs = kwargs def __call__(self, model, data, *args, **more_args): return self.fit_fun(model, data, *args, **more_args, **self.kwargs)
[docs]def fit_mock(model, data, method='cosine', pattern_idx=None, pattern_descriptor=None, sigma_k=None): """ formally acceptable fitting method which always returns a vector of zeros Args: model(rsatoolbox.model.Model): model to be fit data(rsatoolbox.rdm.RDMs): Data to fit to method(String): Evaluation method pattern_idx(numpy.ndarray): Which patterns are sampled pattern_descriptor(String): Which descriptor is used sigma_k(matrix): pattern-covariance matrix used only for whitened distances (ending in _cov) to compute the covariance matrix for rdms Returns: theta(numpy.ndarray): parameter vector """ return np.zeros(model.n_param)
[docs]def fit_select(model, data, method='cosine', pattern_idx=None, pattern_descriptor=None, sigma_k=None): """ fits selection models by evaluating each rdm and selcting the one with best performance. Works only for ModelSelect Args: model(rsatoolbox.model.Model): model to be fit data(rsatoolbox.rdm.RDMs): Data to fit to method(String): Evaluation method pattern_idx(numpy.ndarray): Which patterns are sampled pattern_descriptor(String): Which descriptor is used sigma_k(matrix): pattern-covariance matrix used only for whitened distances (ending in _cov) to compute the covariance matrix for rdms Returns: theta(int): parameter vector """ evaluations = np.zeros(model.n_rdm) for i_rdm in range(model.n_rdm): pred = model.predict_rdm(i_rdm) if not (pattern_idx is None or pattern_descriptor is None): pred = pred.subsample_pattern(pattern_descriptor, pattern_idx) evaluations[i_rdm] = np.mean( compare(pred, data, method=method, sigma_k=sigma_k)) theta = np.argmax(evaluations) return theta
[docs]def fit_optimize(model, data, method='cosine', pattern_idx=None, pattern_descriptor=None, sigma_k=None, ridge_weight=0, normalize=True): """ fitting theta using optimization currently allowed for ModelWeighted only Args: model(Model): the model to be fit data(rsatoolbox.rdm.RDMs): data to be fit method(String, optional): evaluation metric The default is 'cosine'. pattern_idx(numpy.ndarray, optional) sampled patterns The default is None. pattern_descriptor (String, optional) descriptor used for fitting. The default is None. sigma_k(matrix): pattern-covariance matrix used only for whitened distances (ending in _cov) to compute the covariance matrix for rdms normalize(bool): whether to normalize the theta vector default = True If true, theta is normalized to norm 1. This is sensible for many models where the norm of theta does not vary the loss. Returns: numpy.ndarray: theta, parameter vector for the model """ def _loss_opt(theta): return _loss(theta, model, data, method=method, pattern_idx=pattern_idx, pattern_descriptor=pattern_descriptor, sigma_k=sigma_k, ridge_weight=ridge_weight) thetas = [] losses = [] for _ in range(2 * model.n_param): theta0 = np.random.rand(model.n_param) theta = opt.minimize( _loss_opt, theta0, method='BFGS', tol=0.000001 ) thetas.append(theta.x) losses.append(theta.fun) id = np.argmin(losses) theta = thetas[id] if not normalize: return theta.flatten() norm = np.sum(theta ** 2) if norm == 0: return theta.flatten() return theta.flatten() / np.sqrt(norm)
[docs]def fit_optimize_positive( model, data, method='cosine', pattern_idx=None, pattern_descriptor=None, sigma_k=None, ridge_weight=0, normalize=True): """ fitting theta using optimization enforcing positive weights currently allowed for ModelWeighted only Args: model(Model): the model to be fit data(pyrsa.rdm.RDMs): data to be fit method(String, optional): evaluation metric The default is 'cosine'. pattern_idx(numpy.ndarray, optional) sampled patterns The default is None. pattern_descriptor (String, optional) descriptor used for fitting. The default is None. sigma_k(matrix): pattern-covariance matrix used only for whitened distances (ending in _cov) to compute the covariance matrix for rdms normalize(bool): whether to normalize the theta vector default = True If true, theta is normalized to norm 1. This is sensible for many models where the norm of theta does not vary the loss. Returns: numpy.ndarray: theta, parameter vector for the model """ def _loss_opt(theta): return _loss(theta ** 2, model, data, method=method, pattern_idx=pattern_idx, pattern_descriptor=pattern_descriptor, sigma_k=sigma_k, ridge_weight=ridge_weight) theta0 = np.zeros(model.n_param) thetas = [theta0] losses = [_loss_opt(theta0)] theta0 = np.random.rand(model.n_param) theta = opt.minimize( fun=_loss_opt, x0=theta0, method='BFGS', tol=0.000001 ) thetas.append(theta.x) losses.append(theta.fun) for i in range(model.n_param): theta0 = np.ones(model.n_param) * 0.001 theta0[i] = 1 theta = opt.minimize( fun=_loss_opt, x0=theta0, method='BFGS', tol=0.000001 ) thetas.append(theta.x) losses.append(theta.fun) id = np.argmin(losses) theta = thetas[id] ** 2 if not normalize: return theta.flatten() norm = np.sum(theta ** 2) if norm == 0: return theta.flatten() return theta.flatten() / np.sqrt(norm)
[docs]def fit_interpolate(model, data, method='cosine', pattern_idx=None, pattern_descriptor=None, sigma_k=None): """ fitting theta using bisection optimization allowed for ModelInterpolate only Args: model(Model): the model to be fit data(rsatoolbox.rdm.RDMs): data to be fit method(String, optional): evaluation metric The default is 'cosine'. pattern_idx(numpy.ndarray, optional) sampled patterns The default is None. pattern_descriptor (String, optional) descriptor used for fitting. The default is None. sigma_k(matrix): pattern-covariance matrix used only for whitened distances (ending in _cov) to compute the covariance matrix for rdms Returns: numpy.ndarray: theta, parameter vector for the model """ results = [] for i_pair in range(model.n_rdm - 1): def loss_opt(w): theta = np.zeros(model.n_param) theta[i_pair] = w theta[i_pair + 1] = 1 - w return _loss(theta, model, data, method=method, pattern_idx=pattern_idx, pattern_descriptor=pattern_descriptor, sigma_k=sigma_k) results.append( opt.minimize_scalar(loss_opt, np.array([.5]), method='bounded', bounds=(0, 1))) losses = [r.fun for r in results] i_pair = np.argmin(losses) result = results[i_pair] theta = np.zeros(model.n_rdm) theta[i_pair] = result.x theta[i_pair + 1] = 1 - result.x return theta
[docs]def fit_regress(model, data, method='cosine', pattern_idx=None, pattern_descriptor=None, ridge_weight=0, sigma_k=None, normalize=True): """ fitting theta using linear algebra solutions to the OLS problem allowed for ModelWeighted only This method first normalizes the data and model RDMs appropriately for the measure to be optimized. For 'cosine' similarity this is a normalization of the data-RDMs to vector length 1. For correlation the mean is removed from both model and data rdms additionally. Then the parameters are estimated using ordinary least squares. Args: model(Model): the model to be fit data(pyrsa.rdm.RDMs): data to be fit method(String, optional): evaluation metric The default is 'cosine'. pattern_idx(numpy.ndarray, optional) sampled patterns The default is None. pattern_descriptor (String, optional) descriptor used for fitting. The default is None. ridge_weight (float, default=0) weight for the ridge-regularisation of the regression weight is in comparison to the final regression problem on the appropriately normalized regressors sigma_k(matrix): pattern-covariance matrix used only for whitened distances (ending in _cov) to compute the covariance matrix for rdms normalize(bool): whether to normalize the theta vector default = True If true, theta is normalized to norm 1. This is sensible for many models where the norm of theta does not vary the loss. Returns: numpy.ndarray: theta, parameter vector for the model """ if not (pattern_idx is None or pattern_descriptor is None): pred = model.rdm_obj.subsample_pattern(pattern_descriptor, pattern_idx) else: pred = model.rdm_obj vectors = pred.get_vectors() data_mean = pool_rdm(data, method=method) y = data_mean.get_vectors() vectors, y, nan_idx = _parse_nan_vectors(vectors, y) # Normalizations if method == 'cosine': v = None elif method == 'corr': vectors = vectors - np.mean(vectors, 1, keepdims=True) v = None elif method == 'cosine_cov': v = get_v(pred.n_cond, sigma_k) v = v[nan_idx[0]][:, nan_idx[0]] elif method == 'corr_cov': vectors = vectors - np.mean(vectors, 1, keepdims=True) y = y - np.mean(y) v = get_v(pred.n_cond, sigma_k) v = v[nan_idx[0]][:, nan_idx[0]] else: raise ValueError('method argument invalid') if v is None: X = vectors @ vectors.T + ridge_weight * np.eye(vectors.shape[0]) y = vectors @ y.T else: v_inv_x = np.array([scipy.sparse.linalg.cg(v, vectors[i], atol=10 ** -9)[0] for i in range(vectors.shape[0])]) y = v_inv_x @ y.T X = vectors @ v_inv_x.T + ridge_weight * np.eye(vectors.shape[0]) theta = np.linalg.solve(X, y) if not normalize: return theta.flatten() norm = np.sum(theta ** 2) if norm == 0: return theta.flatten() return theta.flatten() / np.sqrt(np.sum(theta ** 2))
[docs]def fit_regress_nn(model, data, method='cosine', pattern_idx=None, pattern_descriptor=None, ridge_weight=0, sigma_k=None, normalize=True): """ fitting theta using linear algebra solutions to the OLS problem allowed for ModelWeighted only This method first normalizes the data and model RDMs appropriately for the measure to be optimized. For 'cosine' similarity this is a normalization of the data-RDMs to vector length 1. For correlation the mean is removed from both model and data rdms additionally. Then the parameters are estimated using ordinary least squares. Args: model(Model): the model to be fit data(pyrsa.rdm.RDMs): data to be fit method(String, optional): evaluation metric The default is 'cosine'. pattern_idx(numpy.ndarray, optional) sampled patterns The default is None. pattern_descriptor (String, optional) descriptor used for fitting. The default is None. ridge_weight (float, default=0) weight for the ridge-regularisation of the regression weight is in comparison to the final regression problem on the appropriately normalized regressors sigma_k(matrix): pattern-covariance matrix used only for whitened distances (ending in _cov) to compute the covariance matrix for rdms normalize(bool): whether to normalize the theta vector default = True If true, theta is normalized to norm 1. This is sensible for many models where the norm of theta does not vary the loss. Returns: numpy.ndarray: theta, parameter vector for the model """ if not (pattern_idx is None or pattern_descriptor is None): pred = model.rdm_obj.subsample_pattern(pattern_descriptor, pattern_idx) else: pred = model.rdm_obj vectors = pred.get_vectors() data_mean = pool_rdm(data, method=method) y = data_mean.get_vectors() vectors, y, non_nan_mask = _parse_nan_vectors(vectors, y) # Normalizations if method == 'cosine': v = None elif method == 'corr': vectors = vectors - np.mean(vectors, 1, keepdims=True) v = None elif method == 'cosine_cov': v = get_v(pred.n_cond, sigma_k) v = v[non_nan_mask[0]][:, non_nan_mask[0]] elif method == 'corr_cov': vectors = vectors - np.mean(vectors, 1, keepdims=True) y = y - np.mean(y) v = get_v(pred.n_cond, sigma_k) v = v[non_nan_mask[0]][:, non_nan_mask[0]] else: raise ValueError('method argument invalid') theta, _ = _nn_least_squares(vectors.T, y[0], ridge_weight=ridge_weight, V=v) if not normalize: return theta.flatten() norm = np.sum(theta ** 2) if norm == 0: return theta.flatten() return theta.flatten() / np.sqrt(np.sum(theta ** 2))
def _loss(theta, model, data, method='cosine', sigma_k=None, pattern_descriptor=None, pattern_idx=None, ridge_weight=0): """Method for calculating a loss for a model and parameter combination Args: theta(numpy.ndarray): evaluated parameter value model(Model): the model to be fit data(rsatoolbox.rdm.RDMs): data to be fit method(String, optional): evaluation metric The default is 'cosine'. pattern_idx(numpy.ndarray, optional) sampled patterns The default is None. pattern_descriptor (String, optional) descriptor used for fitting. The default is None. sigma_k(matrix): pattern-covariance matrix used only for whitened distances (ending in _cov) to compute the covariance matrix for rdms ridge_weight(float): weight for a ridge regularisation Returns: numpy.ndarray: loss """ pred = model.predict_rdm(theta) if not (pattern_idx is None or pattern_descriptor is None): pred = pred.subsample_pattern(pattern_descriptor, pattern_idx) return -np.mean(compare(pred, data, method=method, sigma_k=sigma_k)) \ + np.sum(theta * theta) * ridge_weight def _nn_least_squares(A, y, ridge_weight=0, V=None): """ non-negative least squares essentially scipy.optimize.nnls extended to accept a ridge_regression regularisation and/or a covariance matrix V. The algorithm is discribed in detail here: Bro, R., & Jong, S. D. (1997). A fast non-negativity-constrained least squares algorithm. Journal of Chemometrics, 11, 9. This is an active set algorithm which is somewhat optimized by precomputing A^T V^-1 A and A^T V y such that during the optimization only matricies of rank r need to be inverted. This is tested against the scipy solution for ridge_weight=0 and V=None. For other V the validation comes from fitting the same models using general optimization. """ assert A.shape[0] == y.shape[0] assert y.ndim == 1 x = np.zeros(A.shape[1]) p = np.zeros(A.shape[1], bool) if V is None: w = A.T @ y ATA = A.T @ A + ridge_weight * np.eye(A.shape[1]) else: V_A = np.array([scipy.sparse.linalg.cg(V, A[:, i], atol=10 ** -9)[0] for i in range(A.shape[1])]) y_V_A = V_A @ y w = y_V_A ATA = A.T @ V_A.T + ridge_weight * np.eye(A.shape[1]) while np.max(w) > 100 * np.finfo(float).eps: p[np.argmax(w)] = True if V is None: s_p = np.linalg.solve(ATA[p][:, p], A[:, p].T @ y) else: s_p = np.linalg.solve(ATA[p][:, p], y_V_A[p]) while np.any(s_p < 0): alphas = x[p] / (x[p] - s_p) alphas[s_p > 0] = 1 i_alpha = np.argmin(alphas) alpha = alphas[i_alpha] x[p] = x[p] + alpha * (s_p - x[p]) i_alpha = np.where(p)[0][i_alpha] x[i_alpha] = 0 p[i_alpha] = False if V is None: s_p = np.linalg.solve(ATA[p][:, p], A[:, p].T @ y) else: s_p = np.linalg.solve(ATA[p][:, p], y_V_A[p]) x[p] = s_p if V is None: w = A.T @ y - ATA @ x else: w = y_V_A - ATA @ x if V is None: loss = np.sum((y - A @ x) ** 2) else: loss = (y - A @ x).T @ V @ (y - A @ x) return x, loss