Source code for rsatoolbox.model.fitter

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Parameter fitting methods for models
"""

import numpy as np
import scipy.optimize as opt
import scipy.sparse
from rsatoolbox.rdm import compare
from rsatoolbox.util.matrix import get_v
from rsatoolbox.util.pooling import pool_rdm
from rsatoolbox.util.rdm_utils import _parse_nan_vectors



[docs]
class Fitter:
    """Object to specify a fitting function and parameters

    Effectively this gives the user a convenient way to specify fitting
    functions with different settings than the defaults.

    Create this object with the fitting function to use and additional
    keyword arguments for settings you wish to change. The resulting
    object then behaves as the fitting function itself with the
    keyword arguments set to the different values provided at object
    creation.

    Example:
        generate Fitter-object for ridge regression:

        ::

            fit = pyrsa.model.Fitter(pyrsa.model.fit_regress, ridge_weight=1)

        the resulting object 'fit' now does the same as
        pyrsa.model.fit_regress when run with the additional argument
        ``ridge_weight=1``, i.e. the following two lines now yield equal results:

        ::

            fit(model, data)
            pyrsa.model.fit_regress(model, data, ridge_weight=1)

    For a general introduction to flexible models see demo_flex.
    """

    def __init__(self, fit_fun, **kwargs):
        self.fit_fun = fit_fun
        self.kwargs = kwargs

    def __call__(self, model, data, *args, **more_args):
        return self.fit_fun(model, data, *args, **more_args, **self.kwargs)




[docs]
def fit_mock(model, data, method='cosine', pattern_idx=None,
             pattern_descriptor=None, sigma_k=None):
    """ formally acceptable fitting method which always returns a vector of
    zeros

    Args:
        model(rsatoolbox.model.Model): model to be fit
        data(rsatoolbox.rdm.RDMs): Data to fit to
        method(String): Evaluation method
        pattern_idx(numpy.ndarray): Which patterns are sampled
        pattern_descriptor(String): Which descriptor is used
        sigma_k(matrix): pattern-covariance matrix
            used only for whitened distances (ending in _cov)
            to compute the covariance matrix for rdms

    Returns:
        theta(numpy.ndarray): parameter vector

    """
    return np.zeros(model.n_param)




[docs]
def fit_select(model, data, method='cosine', pattern_idx=None,
               pattern_descriptor=None, sigma_k=None):
    """ fits selection models by evaluating each rdm and selcting the one
    with best performance. Works only for ModelSelect

    Args:
        model(rsatoolbox.model.Model): model to be fit
        data(rsatoolbox.rdm.RDMs): Data to fit to
        method(String): Evaluation method
        pattern_idx(numpy.ndarray): Which patterns are sampled
        pattern_descriptor(String): Which descriptor is used
        sigma_k(matrix): pattern-covariance matrix
            used only for whitened distances (ending in _cov)
            to compute the covariance matrix for rdms

    Returns:
        theta(int): parameter vector

    """
    evaluations = np.zeros(model.n_rdm)
    for i_rdm in range(model.n_rdm):
        pred = model.predict_rdm(i_rdm)
        if not (pattern_idx is None or pattern_descriptor is None):
            pred = pred.subsample_pattern(pattern_descriptor, pattern_idx)
        evaluations[i_rdm] = np.mean(
            compare(pred, data, method=method, sigma_k=sigma_k))
    theta = np.argmax(evaluations)
    return theta




[docs]
def fit_optimize(model, data, method='cosine', pattern_idx=None,
                 pattern_descriptor=None, sigma_k=None, ridge_weight=0,
                 normalize=True):
    """
    fitting theta using optimization
    currently allowed for ModelWeighted only

    Args:
        model(Model): the model to be fit
        data(rsatoolbox.rdm.RDMs): data to be fit
        method(String, optional): evaluation metric The default is 'cosine'.
        pattern_idx(numpy.ndarray, optional)
            sampled patterns The default is None.
        pattern_descriptor (String, optional)
            descriptor used for fitting. The default is None.
        sigma_k(matrix): pattern-covariance matrix
            used only for whitened distances (ending in _cov)
            to compute the covariance matrix for rdms
        normalize(bool): whether to normalize the theta vector
            default = True
            If true, theta is normalized to norm 1.
            This is sensible for many models where the norm
            of theta does not vary the loss.

    Returns:
        numpy.ndarray: theta, parameter vector for the model

    """
    def _loss_opt(theta):
        return _loss(theta, model, data, method=method,
                     pattern_idx=pattern_idx,
                     pattern_descriptor=pattern_descriptor,
                     sigma_k=sigma_k, ridge_weight=ridge_weight)
    thetas = []
    losses = []
    for _ in range(2 * model.n_param):
        theta0 = np.random.rand(model.n_param)
        theta = opt.minimize(
            _loss_opt,
            theta0,
            method='BFGS',
            tol=0.000001
        )
        thetas.append(theta.x)
        losses.append(theta.fun)
    id = np.argmin(losses)
    theta = thetas[id]
    if not normalize:
        return theta.flatten()
    norm = np.sum(theta ** 2)
    if norm == 0:
        return theta.flatten()
    return theta.flatten() / np.sqrt(norm)




[docs]
def fit_optimize_positive(
        model, data, method='cosine', pattern_idx=None,
        pattern_descriptor=None, sigma_k=None, ridge_weight=0,
        normalize=True):
    """
    fitting theta using optimization enforcing positive weights
    currently allowed for ModelWeighted only

    Args:
        model(Model): the model to be fit
        data(pyrsa.rdm.RDMs): data to be fit
        method(String, optional): evaluation metric The default is 'cosine'.
        pattern_idx(numpy.ndarray, optional)
            sampled patterns The default is None.
        pattern_descriptor (String, optional)
            descriptor used for fitting. The default is None.
        sigma_k(matrix): pattern-covariance matrix
            used only for whitened distances (ending in _cov)
            to compute the covariance matrix for rdms
        normalize(bool): whether to normalize the theta vector
            default = True
            If true, theta is normalized to norm 1.
            This is sensible for many models where the norm
            of theta does not vary the loss.

    Returns:
        numpy.ndarray: theta, parameter vector for the model

    """
    def _loss_opt(theta):
        return _loss(theta ** 2, model, data, method=method,
                     pattern_idx=pattern_idx,
                     pattern_descriptor=pattern_descriptor,
                     sigma_k=sigma_k, ridge_weight=ridge_weight)
    theta0 = np.zeros(model.n_param)
    thetas = [theta0]
    losses = [_loss_opt(theta0)]
    theta0 = np.random.rand(model.n_param)
    theta = opt.minimize(
        fun=_loss_opt,
        x0=theta0,
        method='BFGS',
        tol=0.000001
    )
    thetas.append(theta.x)
    losses.append(theta.fun)
    for i in range(model.n_param):
        theta0 = np.ones(model.n_param) * 0.001
        theta0[i] = 1
        theta = opt.minimize(
            fun=_loss_opt,
            x0=theta0,
            method='BFGS',
            tol=0.000001
        )
        thetas.append(theta.x)
        losses.append(theta.fun)
    id = np.argmin(losses)
    theta = thetas[id] ** 2
    if not normalize:
        return theta.flatten()
    norm = np.sum(theta ** 2)
    if norm == 0:
        return theta.flatten()
    return theta.flatten() / np.sqrt(norm)




[docs]
def fit_interpolate(model, data, method='cosine', pattern_idx=None,
                    pattern_descriptor=None, sigma_k=None):
    """
    fitting theta using bisection optimization
    allowed for ModelInterpolate only

    Args:
        model(Model): the model to be fit
        data(rsatoolbox.rdm.RDMs): data to be fit
        method(String, optional): evaluation metric The default is 'cosine'.
        pattern_idx(numpy.ndarray, optional)
            sampled patterns The default is None.
        pattern_descriptor (String, optional)
            descriptor used for fitting. The default is None.
        sigma_k(matrix): pattern-covariance matrix
            used only for whitened distances (ending in _cov)
            to compute the covariance matrix for rdms

    Returns:
        numpy.ndarray: theta, parameter vector for the model

    """
    results = []
    for i_pair in range(model.n_rdm - 1):
        def loss_opt(w):
            theta = np.zeros(model.n_param)
            theta[i_pair] = w
            theta[i_pair + 1] = 1 - w
            return _loss(theta, model, data, method=method,
                         pattern_idx=pattern_idx,
                         pattern_descriptor=pattern_descriptor,
                         sigma_k=sigma_k)
        results.append(
            opt.minimize_scalar(loss_opt, np.array([.5]),
                                method='bounded', bounds=(0, 1)))
    losses = [r.fun for r in results]
    i_pair = np.argmin(losses)
    result = results[i_pair]
    theta = np.zeros(model.n_rdm)
    theta[i_pair] = result.x
    theta[i_pair + 1] = 1 - result.x
    return theta




[docs]
def fit_regress(model, data, method='cosine', pattern_idx=None,
                pattern_descriptor=None, ridge_weight=0, sigma_k=None,
                normalize=True):
    """
    fitting theta using linear algebra solutions to the OLS problem
    allowed for ModelWeighted only
    This method first normalizes the data and model RDMs appropriately
    for the measure to be optimized. For 'cosine' similarity this is a
    normalization of the data-RDMs to vector length 1. For correlation
    the mean is removed from both model and data rdms additionally.
    Then the parameters are estimated using ordinary least squares.

    Args:
        model(Model): the model to be fit
        data(pyrsa.rdm.RDMs): data to be fit
        method(String, optional): evaluation metric The default is 'cosine'.
        pattern_idx(numpy.ndarray, optional)
            sampled patterns The default is None.
        pattern_descriptor (String, optional)
            descriptor used for fitting. The default is None.
        ridge_weight (float, default=0)
            weight for the ridge-regularisation of the regression
            weight is in comparison to the final regression problem on
            the appropriately normalized regressors
        sigma_k(matrix): pattern-covariance matrix
            used only for whitened distances (ending in _cov)
            to compute the covariance matrix for rdms
        normalize(bool): whether to normalize the theta vector
            default = True
            If true, theta is normalized to norm 1.
            This is sensible for many models where the norm
            of theta does not vary the loss.

    Returns:
        numpy.ndarray: theta, parameter vector for the model

    """
    if not (pattern_idx is None or pattern_descriptor is None):
        pred = model.rdm_obj.subsample_pattern(pattern_descriptor, pattern_idx)
    else:
        pred = model.rdm_obj
    vectors = pred.get_vectors()
    data_mean = pool_rdm(data, method=method)
    y = data_mean.get_vectors()
    vectors, y, nan_idx = _parse_nan_vectors(vectors, y)
    # Normalizations
    if method == 'cosine':
        v = None
    elif method == 'corr':
        vectors = vectors - np.mean(vectors, 1, keepdims=True)
        v = None
    elif method == 'cosine_cov':
        v = get_v(pred.n_cond, sigma_k)
        v = v[nan_idx[0]][:, nan_idx[0]]
    elif method == 'corr_cov':
        vectors = vectors - np.mean(vectors, 1, keepdims=True)
        y = y - np.mean(y)
        v = get_v(pred.n_cond, sigma_k)
        v = v[nan_idx[0]][:, nan_idx[0]]
    else:
        raise ValueError('method argument invalid')
    if v is None:
        X = vectors @ vectors.T + ridge_weight * np.eye(vectors.shape[0])
        y = vectors @ y.T
    else:
        v_inv_x = np.array([scipy.sparse.linalg.cg(v, vectors[i],
                                                   atol=10 ** -9)[0]
                            for i in range(vectors.shape[0])])
        y = v_inv_x @ y.T
        X = vectors @ v_inv_x.T + ridge_weight * np.eye(vectors.shape[0])
    theta = np.linalg.solve(X, y)
    if not normalize:
        return theta.flatten()
    norm = np.sum(theta ** 2)
    if norm == 0:
        return theta.flatten()
    return theta.flatten() / np.sqrt(np.sum(theta ** 2))




[docs]
def fit_regress_nn(model, data, method='cosine', pattern_idx=None,
                   pattern_descriptor=None, ridge_weight=0, sigma_k=None,
                   normalize=True):
    """
    fitting theta using linear algebra solutions to the OLS problem
    allowed for ModelWeighted only
    This method first normalizes the data and model RDMs appropriately
    for the measure to be optimized. For 'cosine' similarity this is a
    normalization of the data-RDMs to vector length 1. For correlation
    the mean is removed from both model and data rdms additionally.
    Then the parameters are estimated using ordinary least squares.

    Args:
        model(Model): the model to be fit
        data(pyrsa.rdm.RDMs): data to be fit
        method(String, optional): evaluation metric The default is 'cosine'.
        pattern_idx(numpy.ndarray, optional)
            sampled patterns The default is None.
        pattern_descriptor (String, optional)
            descriptor used for fitting. The default is None.
        ridge_weight (float, default=0)
            weight for the ridge-regularisation of the regression
            weight is in comparison to the final regression problem on
            the appropriately normalized regressors
        sigma_k(matrix): pattern-covariance matrix
            used only for whitened distances (ending in _cov)
            to compute the covariance matrix for rdms
        normalize(bool): whether to normalize the theta vector
            default = True
            If true, theta is normalized to norm 1.
            This is sensible for many models where the norm
            of theta does not vary the loss.

    Returns:
        numpy.ndarray: theta, parameter vector for the model

    """
    if not (pattern_idx is None or pattern_descriptor is None):
        pred = model.rdm_obj.subsample_pattern(pattern_descriptor, pattern_idx)
    else:
        pred = model.rdm_obj
    vectors = pred.get_vectors()
    data_mean = pool_rdm(data, method=method)
    y = data_mean.get_vectors()
    vectors, y, non_nan_mask = _parse_nan_vectors(vectors, y)
    # Normalizations
    if method == 'cosine':
        v = None
    elif method == 'corr':
        vectors = vectors - np.mean(vectors, 1, keepdims=True)
        v = None
    elif method == 'cosine_cov':
        v = get_v(pred.n_cond, sigma_k)
        v = v[non_nan_mask[0]][:, non_nan_mask[0]]
    elif method == 'corr_cov':
        vectors = vectors - np.mean(vectors, 1, keepdims=True)
        y = y - np.mean(y)
        v = get_v(pred.n_cond, sigma_k)
        v = v[non_nan_mask[0]][:, non_nan_mask[0]]
    else:
        raise ValueError('method argument invalid')
    theta, _ = _nn_least_squares(vectors.T, y[0], ridge_weight=ridge_weight, V=v)
    if not normalize:
        return theta.flatten()
    norm = np.sum(theta ** 2)
    if norm == 0:
        return theta.flatten()
    return theta.flatten() / np.sqrt(np.sum(theta ** 2))



def _loss(theta, model, data, method='cosine', sigma_k=None,
          pattern_descriptor=None, pattern_idx=None,
          ridge_weight=0):
    """Method for calculating a loss for a model and parameter combination

    Args:
        theta(numpy.ndarray): evaluated parameter value
        model(Model): the model to be fit
        data(rsatoolbox.rdm.RDMs): data to be fit
        method(String, optional): evaluation metric The default is 'cosine'.
        pattern_idx(numpy.ndarray, optional)
            sampled patterns The default is None.
        pattern_descriptor (String, optional)
            descriptor used for fitting. The default is None.
        sigma_k(matrix): pattern-covariance matrix
            used only for whitened distances (ending in _cov)
            to compute the covariance matrix for rdms
        ridge_weight(float): weight for a ridge regularisation

    Returns:

        numpy.ndarray: loss

    """
    pred = model.predict_rdm(theta)
    if not (pattern_idx is None or pattern_descriptor is None):
        pred = pred.subsample_pattern(pattern_descriptor, pattern_idx)
    return -np.mean(compare(pred, data, method=method, sigma_k=sigma_k)) \
        + np.sum(theta * theta) * ridge_weight


def _nn_least_squares(A, y, ridge_weight=0, V=None):
    """ non-negative least squares
    essentially scipy.optimize.nnls extended to accept a ridge_regression
    regularisation and/or a covariance matrix V.

    The algorithm is discribed in detail here:
    Bro, R., & Jong, S. D. (1997). A fast non-negativity-constrained
    least squares algorithm. Journal of Chemometrics, 11, 9.


    This is an active set algorithm which is somewhat optimized by
    precomputing A^T V^-1 A and A^T V y such that during the optimization
    only matricies of rank r need to be inverted.

    This is tested against the scipy solution for ridge_weight=0 and V=None.
    For other V the validation comes from fitting the same models using
    general optimization.
    """
    assert A.shape[0] == y.shape[0]
    assert y.ndim == 1
    x = np.zeros(A.shape[1])
    p = np.zeros(A.shape[1], bool)
    if V is None:
        w = A.T @ y
        ATA = A.T @ A + ridge_weight * np.eye(A.shape[1])
    else:
        V_A = np.array([scipy.sparse.linalg.cg(V, A[:, i],
                                               atol=10 ** -9)[0]
                        for i in range(A.shape[1])])
        y_V_A = V_A @ y
        w = y_V_A
        ATA = A.T @ V_A.T + ridge_weight * np.eye(A.shape[1])
    while np.max(w) > 100 * np.finfo(float).eps:
        p[np.argmax(w)] = True
        if V is None:
            s_p = np.linalg.solve(ATA[p][:, p], A[:, p].T @ y)
        else:
            s_p = np.linalg.solve(ATA[p][:, p], y_V_A[p])
        while np.any(s_p < 0):
            alphas = x[p] / (x[p] - s_p)
            alphas[s_p > 0] = 1
            i_alpha = np.argmin(alphas)
            alpha = alphas[i_alpha]
            x[p] = x[p] + alpha * (s_p - x[p])
            i_alpha = np.where(p)[0][i_alpha]
            x[i_alpha] = 0
            p[i_alpha] = False
            if V is None:
                s_p = np.linalg.solve(ATA[p][:, p], A[:, p].T @ y)
            else:
                s_p = np.linalg.solve(ATA[p][:, p], y_V_A[p])
        x[p] = s_p
        if V is None:
            w = A.T @ y - ATA @ x
        else:
            w = y_V_A - ATA @ x
    if V is None:
        loss = np.sum((y - A @ x) ** 2)
    else:
        loss = (y - A @ x).T @ V @ (y - A @ x)
    return x, loss
Source code for rsatoolbox.model.fitter

rsatoolbox

Navigation

Related Topics