-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathMPT_function.py
More file actions
292 lines (273 loc) · 14.5 KB
/
MPT_function.py
File metadata and controls
292 lines (273 loc) · 14.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
#!/usr/bin/python
'''
MPT model used in Yim, H, Dennis, S. J., & Sloutsky, V. M. (2013)
The Development of Episodic Memory: Items, Contexts, and Relations, Psychological Science
@author: Hyugwook Yim (yim.31@osu.edu) 2013/3/16
:see http://cogdev.osu.edu/mpt/code for more descriptions
############################################################################
############################# FUNCTIONs #################################
############################################################################
'''
import numpy
import math
import random
import scipy.optimize
from scipy import stats
#>>> 1> Computes response patterns when parameters are given
# The order of the response pattern follows the description on the web-page
# http://cogdev.osu.edu/mpt/code
############################################################################
def compt_para(e, i, l, b, data):
totalABCD = sum(data[0:4])
totalABAC = sum(data[4:9])
totalABABr = sum(data[9:13])
g2 = 1.0/2
g5 = 1.0/5
g6 = 1.0/6
# (1) Response patterns from ABCD
B1 = (e*i*l*b)+(e*i*l*(1-b))+(e*i*(1-l)*b)+(e*i*(1-l)*(1-b))+(e*(1-i)*l*b)+(e*(1-i)*l*(1-b)*g6)+(e*(1-i)*(1-l)*b)+(e*(1-i)*(1-l)*(1-b)*g2*g6)
DFHJL1 = (e*(1-i)*l*(1-b)*(1-g6))+(e*(1-i)*(1-l)*(1-b)*g2*(1-g6))
NPRTVX1 = e*(1-i)*(1-l)*(1-b)*(1-g2)
miss1 = (1-e)
# (2) Response patterns from ABAC
B2 = (e*i*l*b)+(e*i*l*(1-b))+(e*i*(1-l)*b)+(e*i*(1-l)*(1-b)*g2)+(e*(1-i)*l*b)+(e*(1-i)*l*(1-b)*g6)+(e*(1-i)*(1-l)*b)+(e*(1-i)*(1-l)*(1-b)*g2*g6)
DFHJL2 = (e*(1-i)*l*(1-b)*(1-g6))+(e*(1-i)*(1-l)*(1-b)*g2*(1-g6))
N2 = (e*i*(1-l)*(1-b)*(1-g2))+(e*(1-i)*(1-l)*(1-b)*(1-g2)*g6)
PRTVX2 = e*(1-i)*(1-l)*(1-b)*(1-g2)*(1-g6)
miss2 = (1-e)
# (3) Response patterns from ABABr
B3 = (e*i*l*b)+(e*i*l*(1-b)*g2)+(e*i*(1-l)*b)+(e*i*(1-l)*(1-b)*g2)+(e*(1-i)*l*b)+(e*(1-i)*l*(1-b)*g6)+(e*(1-i)*(1-l)*b)+(e*(1-i)*(1-l)*(1-b)*g6)
D3 = (e*i*l*(1-b)*(1-g2))+(e*i*(1-l)*(1-b)*(1-g2))+(e*(1-i)*l*(1-b)*(1-g6)*g5)+(e*(1-i)*(1-l)*(1-b)*(1-g6)*g5)
FHJL3 = (e*(1-i)*l*(1-b)*(1-g6)*(1-g5))+(e*(1-i)*(1-l)*(1-b)*(1-g6)*(1-g5))
miss3 = (1-e)
results = [B1*totalABCD, DFHJL1*totalABCD, NPRTVX1*totalABCD, miss1*totalABCD,
B2*totalABAC, DFHJL2*totalABAC, N2*totalABAC, PRTVX2*totalABAC, miss2*totalABAC,
B3*totalABABr, D3*totalABABr, FHJL3*totalABABr, miss3*totalABABr]
print "(5) Compare results"
print ">> the order of response patterns are described at http://cogdev.osu.edu/mpt/code"
print "Model: ",
for i in xrange(len(results)):
print ("%0.1f " % results[i]),
print ""
print "Data : ",
for i in xrange(len(data)):
print ("%0.1f " % data[i]),
print " "
return(results)
#####################################
#>>> 2> Computes minus log-likelihood
def mpt_mle(x, data):
# To make sure the optimizing algorithm doesn't crash
vsn = 1e-7 #need a Very Small Number so that math.log doesn't get zero values
for i in range(4):
if x[i] <= 0.0:
x[i] = vsn
if x[i] >= 1.0:
x[i] = 1.0-vsn
g2 = 1.0/2
g5 = 1.0/5
g6 = 1.0/6
e = x[0]
i = x[1]
l = x[2]
b = x[3]
# calculate the proportion of responses for each condition
# this is due to the fact that we are optimizing the 3 conditions together
nSum = numpy.sum(data) * 1.0
nABCD = numpy.sum(data[0:4]) /nSum
nABAC = numpy.sum(data[4:9]) /nSum
nABABr = numpy.sum(data[9:13]) /nSum
# log-likelihood calculation for each condition
ABCD = (data[0]*math.log((e*i*l*b)+(e*i*l*(1-b))+(e*i*(1-l)*b)+(e*i*(1-l)*(1-b))+(e*(1-i)*l*b)+(e*(1-i)*l*(1-b)*g6)+(e*(1-i)*(1-l)*b)+(e*(1-i)*(1-l)*(1-b)*g2*g6))
+ data[1]*math.log((e*(1-i)*l*(1-b)*(1-g6))+(e*(1-i)*(1-l)*(1-b)*g2*(1-g6)))
+ data[2]*math.log(e*(1-i)*(1-l)*(1-b)*(1-g2))
+ data[3]*math.log((1-e))
)
ABAC = (data[4]*math.log((e*i*l*b)+(e*i*l*(1-b))+(e*i*(1-l)*b)+(e*i*(1-l)*(1-b)*g2)+(e*(1-i)*l*b)+(e*(1-i)*l*(1-b)*g6)+(e*(1-i)*(1-l)*b)+(e*(1-i)*(1-l)*(1-b)*g2*g6))
+ data[5]*math.log((e*(1-i)*l*(1-b)*(1-g6))+(e*(1-i)*(1-l)*(1-b)*g2*(1-g6)))
+ data[6]*math.log((e*i*(1-l)*(1-b)*(1-g2))+(e*(1-i)*(1-l)*(1-b)*(1-g2)*g6))
+ data[7]*math.log(e*(1-i)*(1-l)*(1-b)*(1-g2)*(1-g6))
+ data[8]*math.log((1-e))
)
ABABr= (data[9]*math.log((e*i*l*b)+(e*i*l*(1-b)*g2)+(e*i*(1-l)*b)+(e*i*(1-l)*(1-b)*g2)+(e*(1-i)*l*b)+(e*(1-i)*l*(1-b)*g6)+(e*(1-i)*(1-l)*b)+(e*(1-i)*(1-l)*(1-b)*g6))
+ data[10]*math.log((e*i*l*(1-b)*(1-g2))+(e*i*(1-l)*(1-b)*(1-g2))+(e*(1-i)*l*(1-b)*(1-g6)*g5)+(e*(1-i)*(1-l)*(1-b)*(1-g6)*g5))
+ data[11]*math.log((e*(1-i)*l*(1-b)*(1-g6)*(1-g5))+(e*(1-i)*(1-l)*(1-b)*(1-g6)*(1-g5)))
+ data[12]*math.log((1-e))
)
constant1 = (math.log(math.factorial(nSum) / ( math.factorial(data[0])
* math.factorial(data[1])
* math.factorial(data[2])
* math.factorial(data[3])
* math.factorial(data[4])
* math.factorial(data[5])
* math.factorial(data[6])
* math.factorial(data[7])
* math.factorial(data[8])
* math.factorial(data[9])
* math.factorial(data[10])
* math.factorial(data[11])
* math.factorial(data[12])
)#denominator end
)#log end
)
constant2 = ( (data[0] + data[1] + data[2] + data[3])*math.log(nABCD)
+ (data[4] + data[5] + data[6] + data[7] + data[8])*math.log(nABAC)
+ (data[9] + data[10] + data[11] + data[12])*math.log(nABABr)
)
# THE log-likelihood
MLE = ABCD + ABAC + ABABr + constant1 + constant2
# Return it with a minus since we will use an optimizing algorithm that minimizes the function
return(-MLE)
#####################################################################
#>>> 3> BIC, AIC - get the minus log-lik and turn it into a BIC, AIC
def BICs(MLE, k, n):
BIC = 2*MLE + k*math.log(n)
return(BIC)
def AICs(MLE, k, n):
AIC = 2*MLE + 2*k
return(AIC)
#######################################
#>>> 4> Functions for fitting the model
# uses scipy.optimize.fmin_l_bfgs_b and prints out MLE, BIC, AIC, & the parameters
# takes data from the global variable 'data'
# after estimating the parameters, compare it with the data given
def fitMPT(data):
x0 = numpy.random.random(4)
xopt = scipy.optimize.fmin_l_bfgs_b(mpt_mle, x0, fprime=None, args = (data,), approx_grad=1, bounds = [(0.0,1.0),(0.0,1.0),(0.0,1.0),(0.0,1.0)])
print "====================== Optimization =========================="
print "(1) loglike: %f" % -xopt[1]
print "(2) BIC : %f" % BICs(xopt[1], 4, numpy.sum(data))
print "(3) AIC : %f" % AICs(xopt[1], 4, numpy.sum(data))
print "(4) parameter estimation (e, i, l, b)"
print xopt[0]
compt_para(xopt[0][0], xopt[0][1], xopt[0][2], xopt[0][3], data)
################################################
#>>> 5> Functions for bootstrapping
# Resample the data with replacement then sum it
def reSample(matrix):
n = matrix.shape[0]
tempSum = numpy.zeros(matrix.shape[1])
numPool = range(n)
for i in range(n):
random.shuffle(numPool)
tempSum += matrix[numPool[0]]
return(tempSum)
# Merges the resampled data from each condition (i.e. ABCD, ABAC, ABABr)
def reSample_merged(data1, data2, data3):
rsampled1 = reSample(data1)
rsampled2 = reSample(data2)
rsampled3 = reSample(data3)
return(list(rsampled1)+list(rsampled2)+list(rsampled3))
# Execute bootstrapping with X iterations
def bootstrap(NumSample, dataABCD, dataABAC, dataABABr, savefile):
e = []
i = []
l = []
b = []
#start bootstrapping!
# the results could be saved in the file below if savefile option is used
for j in xrange(NumSample):
x0 = numpy.random.random(4)
data = reSample_merged(dataABCD, dataABAC, dataABABr)
xopt = scipy.optimize.fmin_l_bfgs_b(mpt_mle, x0, fprime=None, args = (data,), approx_grad=1, bounds = [(0.0,1.0),(0.0,1.0),(0.0,1.0),(0.0,1.0)])
e.append(xopt[0][0])
i.append(xopt[0][1])
l.append(xopt[0][2])
b.append(xopt[0][3])
if savefile != 0:
f = open(savefile,'w')
f.write("e\ti\tl\tb\n")
for j in xrange(NumSample):
f.write("%f\t%f\t%f\t%f\n" % (e[j], i[j], l[j], b[j]))
f.close()
#(2) or the statistics of the distribution could be calculated
print "====================== Bootstrapping ========================="
print " Percentile "
print "parameter | 1st 25th 50th 75th 99th "
print " e | %2.5f %2.5f %2.5f %2.5f %2.5f" % (stats.scoreatpercentile(e,1),
stats.scoreatpercentile(e,25),
stats.scoreatpercentile(e,50),
stats.scoreatpercentile(e,75),
stats.scoreatpercentile(e,99))
print " i | %2.5f %2.5f %2.5f %2.5f %2.5f" % (stats.scoreatpercentile(i,1),
stats.scoreatpercentile(i,25),
stats.scoreatpercentile(i,50),
stats.scoreatpercentile(i,75),
stats.scoreatpercentile(i,99))
print " l | %2.5f %2.5f %2.5f %2.5f %2.5f" % (stats.scoreatpercentile(l,1),
stats.scoreatpercentile(l,25),
stats.scoreatpercentile(l,50),
stats.scoreatpercentile(l,75),
stats.scoreatpercentile(l,99))
print " b | %2.5f %2.5f %2.5f %2.5f %2.5f" % (stats.scoreatpercentile(b,1),
stats.scoreatpercentile(b,25),
stats.scoreatpercentile(b,50),
stats.scoreatpercentile(b,75),
stats.scoreatpercentile(b,99))
##############################################
#>>> 6> Functions for the randomization test
# Mixup the two groups
def mix_vecsum(matA, matB):
nA = matA.shape[0]
nB = matB.shape[0]
matSUM = numpy.concatenate((matA, matB))
numpy.random.shuffle(matSUM)
newMatA = sum(matSUM[0:nA])
newMatB = sum(matSUM[nA:])
return(newMatA, newMatB)
# Merge the two mixedup groups
def merge_twoRandset(data1ABCD, data1ABAC, data1ABABr, data2ABCD, data2ABAC, data2ABABr):
abcdA, abcdB = mix_vecsum(data1ABCD,data2ABCD)
abacA, abacB = mix_vecsum(data1ABAC,data2ABAC)
ababrA, ababrB = mix_vecsum(data1ABABr,data2ABABr)
return(list(abcdA)+list(abacA)+list(ababrA), list(abcdB)+list(abacB)+list(ababrB))
# Execute the randomization test
def rand_test(NumSample, data1ABCD, data1ABAC, data1ABABr, data2ABCD, data2ABAC, data2ABABr):
#calculate the estimated parameters for the two groups
x0 = numpy.random.random(4)
data1 = (list(sum(data1ABCD, 0))+list(sum(data1ABAC,0))+list(sum(data1ABABr,0)))
xopt1 = scipy.optimize.fmin_l_bfgs_b(mpt_mle, x0, fprime=None, args = (data1,), approx_grad=1, bounds = [(0.0,1.0),(0.0,1.0),(0.0,1.0),(0.0,1.0)])
x0 = numpy.random.random(4)
data2 = (list(sum(data2ABCD, 0))+list(sum(data2ABAC,0))+list(sum(data2ABABr,0)))
xopt2 = scipy.optimize.fmin_l_bfgs_b(mpt_mle, x0, fprime=None, args = (data2,), approx_grad=1, bounds = [(0.0,1.0),(0.0,1.0),(0.0,1.0),(0.0,1.0)])
#calculate the parameter difference between the two groups
para_diff = [xopt1[0][0]-xopt2[0][0], xopt1[0][1]-xopt2[0][1], xopt1[0][2]-xopt2[0][2], xopt1[0][3]-xopt2[0][3]]
# set storage for the samples
perm_e = []
perm_i = []
perm_l = []
perm_b = []
for i in xrange(NumSample):
#print i
# randomize the set
dataA, dataB = merge_twoRandset(data1ABCD, data1ABAC, data1ABABr, data2ABCD, data2ABAC, data2ABABr)
# optimize for the 1st group
x0 = numpy.random.random(4)
xoptA = scipy.optimize.fmin_l_bfgs_b(mpt_mle, x0, fprime=None, args=(dataA,), approx_grad=1, bounds = [(0.0,1.0),(0.0,1.0),(0.0,1.0),(0.0,1.0)])
l_A = list(xoptA[0])
# optimize for the 2nd group
x0 = numpy.random.random(4)
xoptB = scipy.optimize.fmin_l_bfgs_b(mpt_mle, x0, fprime=None, args=(dataB,), approx_grad=1, bounds = [(0.0,1.0),(0.0,1.0),(0.0,1.0),(0.0,1.0)])
l_B = list(xoptB[0])
# save them
perm_e.append(l_A[0] - l_B[0])
perm_i.append(l_A[1] - l_B[1])
perm_l.append(l_A[2] - l_B[2])
perm_b.append(l_A[3] - l_B[3])
# calculate p-values
pvalues = []
pvalues.append(stats.percentileofscore(perm_e, para_diff[0])/100.0)
pvalues.append(stats.percentileofscore(perm_i, para_diff[1])/100.0)
pvalues.append(stats.percentileofscore(perm_l, para_diff[2])/100.0)
pvalues.append(stats.percentileofscore(perm_b, para_diff[3])/100.0)
for i in xrange(4):
if pvalues[i] > .5:
pvalues[i] = 1 - pvalues[i]
print "============= Randomization test results ======================="
print " 1st_group minus 2nd_group"
print " parameter difference p-value"
print " e | %f %1.3f" % (para_diff[0], pvalues[0])
print " i | %f %1.3f" % (para_diff[1], pvalues[1])
print " l | %f %1.3f" % (para_diff[2], pvalues[2])
print " b | %f %1.3f" % (para_diff[3], pvalues[3])
print "================================================================"