forked from ntardiff/Analysis_Tardiff_Kang_Correlated
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhelpers.py
58 lines (40 loc) · 1.59 KB
/
helpers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Feb 14 10:46:36 2023
@author: nathan
"""
import numpy as np
#base preprocessing for all data
def base_preproc(data,RTminmax=None):
#there shouldn't be any other missing data in other columns,
assert not data.isnull().values.any(), "Missing data detected!"
data.rename(columns={'Unnamed: 0':'trial','r':'rho','MeanMain':'mu','participant':'subject'},inplace=True)
data.drop('index',axis=1,inplace=True)
if RTminmax:
data = data.loc[(data.rt >= RTminmax[0]) & (data.rt <= RTminmax[1])]
return data
#preprocessing for models that fit a single correlation condition
def onecorr_preproc(data,rho,RTminmax=None):
data = base_preproc(data,RTminmax)
assert rho in data.rho.values, "Requested correlaton not found in data!"
data = data.loc[data.rho==rho]
return data
def split_postproc(data,split,N=2):
#if requested, split data
#defaults to halfs. As currently structured, lets all of the remainder go to
#the last chunk for N=2. Shouldn't make a big difference in our use cases.
split_idx = len(data)/N
splits = np.int64(np.arange(0,N+1)*split_idx)
data = data.iloc[splits[split-1]:splits[split]]
return data
# def split_postproc(data,split):
# #if requested, split data in half
# split_idx = len(data)//2
# if split==1:
# data = data.iloc[0:split_idx]
# elif split==2:
# data = data.iloc[split_idx:]
# else:
# raise ValueError('Split can only be 1 (first half) or 2 (second half)!')
# return data