1 import numpy
2 from PyML.classifiers.composite import CompositeClassifier
3 from PyML.classifiers.baseClassifiers import Classifier
4 from PyML.evaluators import assess
5 from PyML.datagen import sample
6
7 -class Platt (CompositeClassifier) :
8 """
9 Converts a real valued classifier into a conditional probability estimator.
10 This is achieved by fitting a sigmoid with parameters A and B to the
11 values of the decision function:
12 f(x) --> 1/(1+exp(A*f(x)+B)
13
14 code is a based on Platt's pseudocode from:
15
16 John C. Platt. Probabilistic Outputs for Support Vector
17 Machines and Comparisons to Regularized Likelihood Methods. in:
18 Advances in Large Margin Classifiers
19 A. J. Smola, B. Schoelkopf, D. Schuurmans, eds. MIT Press (1999).
20
21 :Keywords:
22 - `mode` - values: 'holdOut' (default), 'cv'.
23 The Platt object fits a sigmoid to the values of the classifier decision
24 function. The values of the decision function are computed in one of two
25 ways: on a hold-out set (the 'holdOut' mode), or by cross-validation
26 (the 'cv' mode).
27 - `fittingFraction` - which fraction of the training data to use for fitting
28 the sigmoid (the rest is used for the classifier training). default: 0.2
29 - `numFolds` - the number of cross-validation folds to use when in 'cv' mode.
30
31 """
32
33 attributes = {'mode' : 'holdOut',
34 'numFolds' : 3,
35 'fittingFraction' : 0.2}
36
37 - def train(self, data, **args) :
38
39 Classifier.train(self, data, **args)
40 if self.labels.numClasses != 2 :
41 raise ValueError, 'number of classes is not 2'
42
43 if self.mode == 'cv' :
44 self.classifier.train(data, **args)
45
46 numTries = 0
47 maxNumTries = 5
48 success = False
49 while not success and numTries < maxNumTries :
50 numTries += 1
51 if self.mode == 'cv' :
52 fittingData = data
53 r = self.classifier.stratifiedCV(data, self.numFolds)
54 elif self.mode == 'holdOut' :
55 fittingData, trainingData = sample.splitDataset(data, self.fittingFraction)
56 self.classifier.train(trainingData, **args)
57 r = self.classifier.test(fittingData)
58 else :
59 raise ValueError, 'unknown mode for Platt'
60 self.labels = self.classifier.labels
61
62 prior1 = fittingData.labels.classSize[1]
63 prior0 = fittingData.labels.classSize[0]
64 out = numpy.array(r.Y, numpy.float_)
65 try :
66 self.fit_A_B(prior1, prior0, out, r.decisionFunc, r.givenY)
67 success = True
68 except :
69 pass
70
71 if not success :
72 print 'platt not successful'
73 self.A = None
74 self.B = None
75 results = self.classifier.test(data)
76 maxPos = 1e-3
77 minNeg = -1e-3
78 for f in results.decisionFunc :
79 if f > 0 :
80 if f > maxPos :
81 maxPos = f
82 elif f < 0 :
83 if f < minNeg :
84 minNeg = f
85 self.maxPos = maxPos
86 self.minNeg = abs(minNeg)
87
88 self.log.trainingTime = self.getTrainingTime()
89
90 - def fit_A_B(self, prior1, prior0, out, deci, Y) :
91
92 A = 0.0
93 B = math.log((prior0 + 1.0) / (prior1 + 1.0))
94 hiTarget = (prior1 + 1.0) / (prior1 + 2.0)
95 loTarget = 1.0 / (prior0 + 2.0)
96 l = 1e-3
97 olderr = 1e15
98
99 pp = numpy.ones(len(data), numpy.float_) * \
100 (prior1 + 1.0) / (len(data) + 2.0)
101
102 count = 0
103 t = numpy.zeros(len(data), numpy.float_)
104 for i in range(len(data)) :
105 if Y[i] == 1 :
106 t[i] = hiTarget
107 else :
108 t[i] = loTarget
109
110 for it in range(1,101) :
111 d1 = pp - t
112 d2 = pp * (1 - pp)
113 a = numpy.sum(out * out * d2)
114 b = numpy.sum(d2)
115 c = numpy.sum(out * d1)
116 d = numpy.sum(out * d1)
117 e = numpy.sum(d1)
118 if abs(d) < 1e-9 and abs(e) < 1e-9 :
119 break
120 oldA = A
121 oldB = B
122 err = 0.0
123 while 1 :
124 det = (a + l) * (b + l) - c * c
125 if det == 0 :
126 l *= 10
127 continue
128 A = oldA + ((b + l) * d - c * e) / det
129 B = oldB + ((a + l) * e - c * d) / det
130
131 pp = 1.0 / (1 + numpy.exp(out * A + B))
132 pp2 = 1.0 / (1 + numpy.exp(-out * A - B))
133 err = - numpy.sum(t * numpy.log(pp) +
134 (1-t) * numpy.log(pp2))
135 if err < olderr * (1 + 1e-7) :
136 l *= 0.1
137 break
138
139 l *= 10
140 if l > 1e6 :
141 raise ValueError, 'lambda too big'
142 diff = err - olderr
143 scale = 0.5 * (err + olderr + 1.0)
144 if diff > -1e-3*scale and diff < 1e-7 * scale :
145 count += 1
146 else :
147 count = 0
148 olderr = err
149 if count == 3 :
150 break
151
152 self.A = A
153 self.B = B
154 self.log.trainingTime = self.getTrainingTime()
155
157
158 f = self.classifier.decisionFunc(data, i)
159 if self.A is not None :
160 return 1.0 / (1 + math.exp(self.A * f + self.B))
161 else :
162 if f > 0 :
163 return f / self.maxPos
164 else :
165 return f / self.minNeg
166
168
169 prob = self.decisionFunc(data ,i)
170 if prob > 0.5 :
171 return (1,prob)
172 else:
173 return (0,prob)
174
175 test = assess.test
176
177 - def save(self, fileName) :
178
179 if type(fileName) == type('') :
180 outfile = open(fileName, 'w')
181 else :
182 outfile = fileName
183
184 outfile.write('#A=' + str(self.A) + '\n')
185 outfile.write('#B=' + str(self.B) + '\n')
186
187 self.classifier.save(outfile)
188
189 - def load(self, fileName) :
190
191 A = None
192 B = None
193 infile = open(fileName)
194 for line in infile :
195 if line.find('A=') > 0 :
196 self.A = float(line[3:])
197 if line.find('B=') > 0 :
198 self.B = float(line[3:])
199 break
200 infile.close()
201 self.classifier = svm.loadSVM(fileName)
202 self.labels = self.classifier.labels
203
205 '''
206 Converts a real valued classifier into a conditional probability estimator.
207 This is achieved by fitting a sigmoid with parameters A and B to the
208 values of the decision function:
209 f(x) --> 1/(1+exp(A*f(x)+B)
210
211 The fitting procedure is a Levenberg-Marquardt
212 optimization derived by Tobias Mann using
213 Mathematica, to optimize the objective function
214 in:
215
216 John C. Platt. Probabilistic Outputs for Support Vector
217 Machines and Comparisons to Regularized Likelihood Methods. in:
218 Advances in Large Margin Classifiers
219 A. J. Smola, B. Schoelkopf, D. Schuurmans, eds. MIT Press (1999).
220 '''
221
222 - def fit_A_B(self, prior1, prior0, out, deci, Y) :
223
224 hiTarget = (prior1 + 1.0) / (prior1 + 2.0)
225 loTarget = 1.0 / (prior0 + 2.0)
226 t = numpy.zeros(len(Y), numpy.float_)
227 for i in range(len(Y)) :
228 if Y[i] == 1 :
229 t[i] = hiTarget
230 else :
231 t[i] = loTarget
232
233 maxiter = 100
234 minstep = 1e-10
235 sigma = 1e-3
236 A = 0.0
237 B = math.log((prior0 + 1.0) / (prior1 + 1.0))
238 A_init = A
239 B_init = B
240 ll = self.log_likelihood(t,deci,A,B)
241 lm_lambda = 1e-4
242 for it in range(maxiter) :
243 H = self.hessian(t,deci,A,B)
244 grad = self.gradient(t,deci,A,B)
245 H_for_inversion = H
246 H_for_inversion[0][0] = H_for_inversion[0][0]+lm_lambda
247 H_for_inversion[1][1] = H_for_inversion[1][1]+lm_lambda
248 cond = self.condition_number(H_for_inversion)
249 if cond is None or cond > 1e5:
250 A = A_init
251 B = B_init
252 break
253
254 inverse_H = self.two_by_two_inverse( H_for_inversion )
255 update_vec = self.get_proposed_update_vec(inverse_H, grad)
256 proposed_ll = self.log_likelihood(t,deci,A-update_vec[0],
257 B-update_vec[1])
258 if proposed_ll < ll:
259 A = A-update_vec[0]
260 B = B-update_vec[1]
261 lm_lambda = lm_lambda/10
262 delta = ll-proposed_ll
263 ll = proposed_ll
264 if delta < 1e-4:
265 break
266 else:
267 lm_lambda = lm_lambda * 10
268
269 self.A = A
270 self.B = B
271
273 update_vec = [0,0]
274 update_vec[0] = m[0][0]*v[0]+m[0][1]*v[1]
275 update_vec[1] = m[1][0]*v[0]+m[1][1]*v[1]
276 return update_vec
277
279
280 M_inverse = self.two_by_two_inverse(M)
281
282 if M_inverse is None:
283 condition_number = None
284 else:
285 M_norm = math.sqrt(M[0][0]**2+
286 M[0][1]**2+
287 M[1][0]**2+
288 M[1][1]**2)
289 M_inverse_norm = math.sqrt(M_inverse[0][0]**2+
290 M_inverse[0][1]**2+
291 M_inverse[1][0]**2+
292 M_inverse[1][1]**2)
293 condition_number = M_norm*M_inverse_norm
294
295 return condition_number
296
297
298
300
301
302
303
304
305 ll = 0
306 small = 1e-15
307
308 for i in range(len(t)):
309 exp_term = math.exp(A*f[i]+B)
310 p_i = 1/(1+exp_term)
311
312
313 if p_i < small:
314 p_i = small
315
316
317 if abs(p_i-1) < small:
318 p_i = 1-small
319
320 ll = ll + t[i]*math.log(p_i) + \
321 (1-t[i])*math.log(1-p_i)
322
323 return -ll
324
326
327
328
329
330
331
332
333 a = M[0][0]
334 b = M[0][1]
335 c = M[1][0]
336 d = M[1][1]
337 det = a*d-b*c
338 I = [[0,0],[0,0]]
339 if det == 0:
340 I = None
341 else:
342 I[0][0] = d/det
343 I[0][1] = -b/det
344 I[1][0] = -c/det
345 I[1][1] = a/det
346 return I
347
354
356 d2f_dA2 = self.dF_dAA(t,f,A,B)
357 d2f_dB2 = self.dF_dBB(t,f,A,B)
358 d2f_dAB = self.dF_dAB(t,f,A,B)
359
360 hessian = [[0,0],[0,0]]
361 hessian[0][0] = d2f_dA2
362 hessian[0][1] = d2f_dAB
363 hessian[1][0] = d2f_dAB
364 hessian[1][1] = d2f_dB2
365
366 return hessian
367
368 - def dF_dA(self,t,f,A,B):
369
370
371
372 small = 1e-15
373 partial = 0
374 for i in range(len(t)):
375 invprob = 1+math.exp(B+A*f[i])
376 prob = 1/invprob
377 if abs(prob-1) < small:
378 prob = 1-small
379
380 partial = partial + \
381 (math.exp(B+A*f[i])*f[i]*(1-t[i]))/ \
382 (invprob**2 * (1-prob)) - \
383 math.exp(B+A*f[i])*prob*f[i]*t[i]
384
385 return -partial
386
387 - def dF_dB(self,t,f,A,B):
388
389
390
391 small = 1e-15
392 partial = 0
393 for i in range(len(t)):
394 invprob = 1+math.exp(B+A*f[i])
395 prob = 1/invprob
396 if abs(prob-1) < small:
397 prob = 1-small
398
399 partial = partial + \
400 (math.exp(B+A*f[i])*(1-t[i]))/ \
401 (invprob**2 * (1-prob)) - \
402 math.exp(B+A*f[i])*prob*t[i]
403
404 return -partial
405
407
408
409 small = 1e-15
410 partial = 0
411 for i in range(len(t)):
412 invprob = 1+math.exp(B+A*f[i])
413 prob = 1/invprob
414 if abs(prob-1) < small:
415 prob = 1-small
416 partial = partial + \
417 -((math.exp(2*B + 2*A*f[i])*f[i]**2*(1 - t[i]))/(invprob**4*(1 - prob)**2)) - \
418 (2*math.exp(2*B + 2*A*f[i])*f[i]**2*(1 - t[i]))/(invprob**3*(1 - prob)) + \
419 (math.exp(B + A*f[i])*f[i]**2*(1 - t[i]))/(invprob**2*(1 - prob)) + \
420 (math.exp(2*B + 2*A*f[i])*f[i]**2*t[i])/invprob**2 - \
421 math.exp(B + A*f[i])*prob*f[i]**2*t[i]
422
423 return -partial
424
426
427
428 small = 1e-15
429 partial = 0
430 for i in range(len(t)):
431 invprob = 1+math.exp(B+A*f[i])
432 prob = 1/invprob
433 if abs(prob-1) < small:
434 prob = 1-small
435 partial = partial + \
436 -((math.exp(2*B + 2*A*f[i])*(1 - t[i]))/(invprob**4*(1 - prob)**2)) - \
437 (2*math.exp(2*B + 2*A*f[i])*(1 - t[i]))/(invprob**3*(1 - prob)) + \
438 (math.exp(B + A*f[i])*(1 - t[i]))/(invprob**2*(1 - prob)) + \
439 (math.exp(2*B + 2*A*f[i])*t[i])/invprob**2 - math.exp(B + A*f[i])*prob*t[i]
440
441 return -partial
442
444
445
446 small = 1e-15
447 partial = 0
448 for i in range(len(t)):
449 invprob = 1+math.exp(B+A*f[i])
450 prob = 1/invprob
451 if abs(prob-1) < small:
452 prob = 1-small
453 partial = partial + \
454 -((math.exp(2*B + 2*A*f[i])*f[i]*(1 - t[i]))/(invprob**4*(1 - prob)**2)) - \
455 (2*math.exp(2*B + 2*A*f[i])*f[i]*(1 - t[i]))/(invprob**3*(1 - prob)) + \
456 (math.exp(B + A*f[i])*f[i]*(1 - t[i]))/(invprob**2*(1 - prob)) + \
457 (math.exp(2*B + 2*A*f[i])*f[i]*t[i])/invprob**2 - math.exp(B + A*f[i])*prob*f[i]*t[i]
458
459 return -partial
460