Source code for rsatoolbox.util.pooling
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Oct 20 23:58:28 2020
@author: heiko
"""
import numpy as np
import scipy.sparse
from scipy.stats import rankdata
from rsatoolbox.rdm import RDMs
from rsatoolbox.util.matrix import get_v
[docs]def pool_rdm(rdms, method='cosine', sigma_k=None):
"""pools multiple RDMs into the one with maximal performance under a given
evaluation metric
rdm_descriptors of the generated rdms are empty
Args:
rdms (pyrsa.rdm.RDMs):
RDMs to be pooled
method : String, optional
Which comparison method to optimize for. The default is 'cosine'.
Returns:
pyrsa.rdm.RDMs: the pooled RDM, i.e. a RDM with maximal performance
under the chosen method
"""
rdm_vec = rdms.get_vectors()
if method == 'euclid':
rdm_vec = _nan_mean(rdm_vec)
elif method == 'cosine':
rdm_vec = rdm_vec / np.sqrt(np.nanmean(rdm_vec ** 2, axis=1,
keepdims=True))
rdm_vec = _nan_mean(rdm_vec)
elif method == 'corr':
rdm_vec = rdm_vec - np.nanmean(rdm_vec, axis=1, keepdims=True)
rdm_vec = rdm_vec / np.nanstd(rdm_vec, axis=1, keepdims=True)
rdm_vec = _nan_mean(rdm_vec)
rdm_vec = rdm_vec - np.nanmin(rdm_vec) + 0.01
elif method == 'cosine_cov':
v = get_v(rdms.n_cond, sigma_k=sigma_k)
ok_idx = np.all(np.isfinite(rdm_vec), axis=0)
v = v[ok_idx][:, ok_idx]
rdm_vec_nonan = rdm_vec[:, ok_idx]
v_inv_x = np.array([scipy.sparse.linalg.cg(v, rdm_vec_nonan[i],
atol=10 ** -9)[0]
for i in range(rdms.n_rdm)])
rdm_norms = np.einsum('ij, ij->i', rdm_vec_nonan, v_inv_x).reshape(
[rdms.n_rdm, 1])
rdm_vec = rdm_vec / np.sqrt(rdm_norms)
rdm_vec = _nan_mean(rdm_vec)
elif method == 'corr_cov':
rdm_vec = rdm_vec - np.nanmean(rdm_vec, axis=1, keepdims=True)
v = get_v(rdms.n_cond, sigma_k=sigma_k)
ok_idx = np.all(np.isfinite(rdm_vec), axis=0)
v = v[ok_idx][:, ok_idx]
rdm_vec_nonan = rdm_vec[:, ok_idx]
v_inv_x = np.array([scipy.sparse.linalg.cg(v, rdm_vec_nonan[i],
atol=10 ** -9)[0]
for i in range(rdms.n_rdm)])
rdm_norms = np.einsum('ij, ij->i', rdm_vec_nonan, v_inv_x).reshape(
[rdms.n_rdm, 1])
rdm_vec = rdm_vec / np.sqrt(rdm_norms)
rdm_vec = _nan_mean(rdm_vec)
rdm_vec = rdm_vec - np.nanmin(rdm_vec) + 0.01
elif method in ('spearman', 'rho-a'):
rdm_vec = np.array([_nan_rank_data(v) for v in rdm_vec])
rdm_vec = _nan_mean(rdm_vec)
elif method == 'rho-a':
rdm_vec = np.array([_nan_rank_data(v) for v in rdm_vec])
rdm_vec = _nan_mean(rdm_vec)
elif method in ('kendall', 'tau-b', 'tau-a'):
Warning('Noise ceiling for tau based on averaged ranks!')
rdm_vec = np.array([_nan_rank_data(v) for v in rdm_vec])
rdm_vec = _nan_mean(rdm_vec)
else:
raise ValueError('Unknown RDM comparison method requested!')
return RDMs(rdm_vec,
dissimilarity_measure=rdms.dissimilarity_measure,
descriptors=rdms.descriptors,
rdm_descriptors=None,
pattern_descriptors=rdms.pattern_descriptors)
def _nan_mean(rdm_vector):
""" takes the average over a rdm_vector with nans for masked entries
without a warning
Args:
rdm_vector(numpy.ndarray): set of rdm_vectors to be averaged
Returns:
rdm_mean(numpy.ndarray): the mean rdm
"""
nan_idx = ~np.isnan(rdm_vector[0])
mean_values = np.mean(rdm_vector[:, nan_idx], axis=0)
rdm_mean = np.empty((1, rdm_vector.shape[1])) * np.nan
rdm_mean[:, nan_idx] = mean_values
return rdm_mean
def _nan_rank_data(rdm_vector):
""" rank_data for vectors with nan entries
Args:
rdm_vector(numpy.ndarray): the vector to be rank_transformed
Returns:
ranks(numpy.ndarray): the ranks with nans where the original vector
had nans
"""
ranks_no_nan = rankdata(rdm_vector[~np.isnan(rdm_vector)])
ranks = np.ones_like(rdm_vector) * np.nan
ranks[~np.isnan(rdm_vector)] = ranks_no_nan
return ranks