Source code for rsatoolbox.simulation.sim

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Functions for data simulation a specific RSA-model

    make_design: creates design and condition vectors for fMRI design

    make_dataset: creates a data set based on an RDM model

@author: jdiedrichsen
"""
import numpy as np
import scipy.stats as ss
import scipy.linalg as sl
from scipy.spatial.distance import squareform
import rsatoolbox


[docs]def make_design(n_cond, n_part): """ Makes simple fMRI design with n_cond, each measures n_part times Args: n_cond (int): Number of conditions n_part (int): Number of partitions Returns: Tuple (cond_vec, part_vec) cond_vec (np.ndarray): n_obs vector with condition part_vec (np.ndarray): n_obs vector with partition """ p = np.array(range(0, n_part)) c = np.array(range(0, n_cond)) cond_vec = np.kron(np.ones((n_part,)), c) # Condition Vector part_vec = np.kron(p, np.ones((n_cond,))) # Partition vector return (cond_vec, part_vec)
[docs]def make_dataset(model, theta, cond_vec, n_channel=30, n_sim=1, signal=1, noise=1, signal_cov_channel=None, noise_cov_channel=None, noise_cov_trial=None, use_exact_signal=False, use_same_signal=False): """ Simulates a fMRI-style data set Args: model (rsatoolbox.Model): the model from which to generate data theta (numpy.ndarray): vector of parameters (one dimensional) cond_vec (numpy.ndarray): RSA-style model: vector of experimental conditions Encoding-style: design matrix (n_obs x n_cond) n_channel (int): Number of channels (default = 30) n_sim (int): Number of simulation with the same signal (default = 1) signal (float): Signal variance (multiplied by predicted G) signal_cov_channel(numpy.ndarray): Covariance matrix of signal across channels noise (float): Noise variance noise_cov_channel(numpy.ndarray): Covariance matrix of noise (default = identity) noise_cov_trial(numpy.ndarray): Covariance matrix of noise across trials use_exact_signal (bool): Makes the signal so that G is exactly as specified (default: False) use_same_signal (bool): Uses the same signal for all simulation (default: False) Returns: data (list): List of rsatoolbox.Dataset with obs_descriptors """ # Get the model prediction and build second moment matrix # Note that this step assumes that RDM uses squared Euclidean distances RDM = model.predict(theta) D = squareform(RDM) H = rsatoolbox.util.matrix.centering(D.shape[0]) G = -0.5 * (H @ D @ H) # Make design matrix if cond_vec.ndim == 1: Zcond = rsatoolbox.util.matrix.indicator(cond_vec) elif cond_vec.ndim == 2: Zcond = cond_vec else: raise ValueError("cond_vec needs to be either vector or design matrix") n_obs, _ = Zcond.shape # If signal_cov_channel is given, precalculate the cholesky decomp if signal_cov_channel is None: signal_chol_channel = None else: if signal_cov_channel.shape != (n_channel, n_channel): raise ValueError("Signal covariance for channels needs to be \ n_channel x n_channel array") signal_chol_channel = np.linalg.cholesky(signal_cov_channel) # If noise_cov_channel is given, precalculate the cholinsky decomp if noise_cov_channel is None: noise_chol_channel = None else: if noise_cov_channel.shape != (n_channel, n_channel): raise ValueError("noise covariance for channels needs to be \ n_channel x n_channel array") noise_chol_channel = np.linalg.cholesky(noise_cov_channel) # If noise_cov_trial is given, precalculate the cholinsky decomp if noise_cov_trial is None: noise_chol_trial = None else: if noise_cov_trial.shape != (n_channel, n_channel): raise ValueError("noise covariance for trials needs to be \ n_obs x n_obs array") noise_chol_trial = np.linalg.cholesky(noise_cov_trial) # Generate the signal - here same for all simulations if use_same_signal: true_U = make_signal(G, n_channel, use_exact_signal, signal_chol_channel) # Generate noise as a matrix normal, independent across partitions # If noise covariance structure is given, it is assumed that it's the same # across different partitions obs_des = {"cond_vec": cond_vec} des = {"signal": signal, "noise": noise, "model": model.name, "theta": theta} dataset_list = [] for _ in range(0, n_sim): # If necessary - make a new signal if not use_same_signal: true_U = make_signal(G, n_channel, use_exact_signal, signal_chol_channel) # Make noise with normal distribution # - allows later plugin of other dists epsilon = np.random.uniform(0, 1, size=(n_obs, n_channel)) epsilon = ss.norm.ppf(epsilon) * np.sqrt(noise) # Now add spatial and temporal covariance structure as required if noise_chol_channel is not None: epsilon = epsilon @ noise_chol_channel if noise_chol_trial is not None: epsilon = noise_chol_trial @ epsilon # Assemble the data set data = Zcond @ true_U * np.sqrt(signal) + epsilon dataset = rsatoolbox.data.Dataset(data, obs_descriptors=obs_des, descriptors=des) dataset_list.append(dataset) return dataset_list
[docs]def make_signal(G, n_channel, make_exact=False, chol_channel=None): """ Generates signal exactly with a specified second-moment matrix (G) To avoid errors: If the number of channels is smaller than the number of patterns we generate a representation with the minimal number of dimnensions and then delete dimensions to yield the desired number of dimensions. Args: G(np.array) : desired second moment matrix (ncond x ncond) n_channel (int) : Number of channels make_exact (bool) : enforce exact signal distances (default: False) chol_channel: Cholensky decomposition of the signal covariance matrix (default: None - makes signal i.i.d.) Returns: np.array (n_cond x n_channel): random signal """ # Generate the true patterns with exactly correct second moment matrix n_cond = G.shape[0] if n_cond > n_channel: n_channel_final = n_channel n_channel = n_cond else: n_channel_final = None # We use two-step procedure allow for different distributions later on true_U = np.random.uniform(0, 1, size=(n_cond, n_channel)) true_U = ss.norm.ppf(true_U) true_U = true_U - np.mean(true_U, axis=1, keepdims=True) # Make orthonormal row vectors if make_exact: E = true_U @ true_U.transpose() L_E, D_E, _ = sl.ldl(E) D_E[D_E < 1e-15] = 1e-15 # we need an invertible solution! D_E = np.sqrt(D_E) E_chol = L_E @ D_E true_U = np.linalg.solve(E_chol, true_U) * np.sqrt(n_channel) # Impose spatial covariance matrix if chol_channel is not None: true_U = true_U @ chol_channel # Now produce data with the known second-moment matrix # Use positive eigenvectors only # (cholesky does not work with rank-deficient matrices) L, D, _ = sl.ldl(G) D[D < 1e-15] = 0 D = np.sqrt(D) chol_G = L @ D true_U = (chol_G @ true_U) if n_channel_final: true_U = true_U[:, :n_channel_final] return true_U