Source code for rsatoolbox.inference.boot_testset

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
boostrap-testset evaluation methods
variants of taking a bootstrap sample and taking the unsampled patterns as
a testset
"""

import numpy as np
from rsatoolbox.util.inference_util import input_check_model
from .bootstrap import bootstrap_sample
from .bootstrap import bootstrap_sample_rdm
from .bootstrap import bootstrap_sample_pattern
from .evaluate import crossval


[docs]def bootstrap_testset(models, data, method='cosine', fitter=None, N=1000, pattern_descriptor=None, rdm_descriptor=None): """takes a bootstrap sample and evaluates on the rdms and patterns not sampled also returns the size of each test_set to allow later weighting or selection if this is desired. Args: models(rsatoolbox.model.Model): Models to be evaluated data(rsatoolbox.rdm.RDMs): RDM data to use method(string): comparison method to use fitter(function): fitting function pattern_descriptor(string): descriptor to group patterns rdm_descriptor(string): descriptor to group rdms Returns: numpy.ndarray: vector of evaluations of length N numpy.ndarray: n_rdm for each test_set numpy.ndarray: n_pattern for each test_set """ models, evaluations, _, fitter = input_check_model(models, None, fitter, N) n_rdm = np.zeros(N, dtype=int) n_pattern = np.zeros(N, dtype=int) if pattern_descriptor is None: data.pattern_descriptors['index'] = np.arange(data.n_cond) pattern_descriptor = 'index' if rdm_descriptor is None: data.rdm_descriptors['index'] = np.arange(data.n_rdm) rdm_descriptor = 'index' for i_sample in range(N): sample, rdm_idx, pattern_idx = bootstrap_sample( data, rdm_descriptor=rdm_descriptor, pattern_descriptor=pattern_descriptor) train_set = [[sample, pattern_idx]] rdm_idx_test = data.rdm_descriptors[rdm_descriptor] rdm_idx_test = np.setdiff1d(rdm_idx_test, rdm_idx) pattern_idx_test = data.pattern_descriptors[pattern_descriptor] pattern_idx_test = np.setdiff1d(pattern_idx_test, pattern_idx) if len(pattern_idx_test) >= 3 and len(rdm_idx_test) >= 1: rdms_test = data.subsample_pattern(pattern_descriptor, pattern_idx_test) rdms_test = rdms_test.subsample(rdm_descriptor, rdm_idx_test) test_set = [[rdms_test, pattern_idx_test]] evaluations[i_sample] = crossval( models, data, train_set, test_set, method=method, fitter=fitter, pattern_descriptor=pattern_descriptor).evaluations[:, 0] else: evaluations[i_sample] = np.nan n_rdm[i_sample] = len(rdm_idx_test) n_pattern[i_sample] = len(pattern_idx_test) return evaluations, n_rdm, n_pattern
[docs]def bootstrap_testset_pattern(models, data, method='cosine', fitter=None, N=1000, pattern_descriptor=None): """takes a bootstrap sample and evaluates on the patterns not sampled also returns the size of each test_set to allow later weighting or selection if this is desired. Args: models(rsatoolbox.model.Model): Model to be evaluated datat(rsatoolbox.rdm.RDMs): RDM data to use method(string): comparison method to use fitter(function): fitting function for the model pattern_descriptor(string): descriptor to group patterns Returns: numpy.ndarray: vector of evaluations of length numpy.ndarray: n_pattern for each test_set """ models, evaluations, _, fitter = input_check_model(models, None, fitter, N) n_pattern = np.zeros(N, dtype=int) if pattern_descriptor is None: data.pattern_descriptors['index'] = np.arange(data.n_cond) pattern_descriptor = 'index' for i_sample in range(N): sample, pattern_idx = bootstrap_sample_pattern( data, pattern_descriptor=pattern_descriptor) train_set = [[sample, pattern_idx]] pattern_idx_test = data.pattern_descriptors[pattern_descriptor] pattern_idx_test = np.setdiff1d(pattern_idx_test, pattern_idx) if len(pattern_idx_test) >= 3: rdms_test = data.subsample_pattern(pattern_descriptor, pattern_idx_test) test_set = [[rdms_test, pattern_idx_test]] evaluations[i_sample] = crossval( models, data, train_set, test_set, method=method, fitter=fitter, pattern_descriptor=pattern_descriptor).evaluations[:, 0] else: evaluations[i_sample] = np.nan n_pattern[i_sample] = len(pattern_idx_test) return evaluations, n_pattern
[docs]def bootstrap_testset_rdm(models, data, method='cosine', fitter=None, N=1000, rdm_descriptor=None): """takes a bootstrap sample and evaluates on the patterns not sampled also returns the size of each test_set to allow later weighting or selection if this is desired. Args: model(rsatoolbox.model.Model): Model to be evaluated datat(rsatoolbox.rdm.RDMs): RDM data to use method(string): comparison method to use fitter(function): fitting function for the model pattern_descriptor(string): descriptor to group patterns Returns: numpy.ndarray: vector of evaluations of length numpy.ndarray: n_pattern for each test_set """ models, evaluations, _, fitter = input_check_model(models, None, fitter, N) n_rdm = np.zeros(N, dtype=int) if rdm_descriptor is None: data.rdm_descriptors['index'] = np.arange(data.n_rdm) rdm_descriptor = 'index' data.pattern_descriptors['index'] = np.arange(data.n_cond) pattern_descriptor = 'index' for i_sample in range(N): sample, rdm_idx = bootstrap_sample_rdm( data, rdm_descriptor=rdm_descriptor) pattern_idx = np.arange(data.n_cond) train_set = [[sample, pattern_idx]] rdm_idx_test = data.rdm_descriptors[rdm_descriptor] rdm_idx_test = np.setdiff1d(rdm_idx_test, rdm_idx) if len(rdm_idx_test) >= 1: rdms_test = data.subsample(rdm_descriptor, rdm_idx_test) test_set = [[rdms_test, pattern_idx]] evaluations[i_sample] = crossval( models, data, train_set, test_set, method=method, fitter=fitter, pattern_descriptor=pattern_descriptor).evaluations[:, 0] else: evaluations[i_sample] = np.nan n_rdm[i_sample] = len(rdm_idx_test) return evaluations, n_rdm