Package PyML :: Package evaluators :: Module assess
[frames] | no frames]

Source Code for Module PyML.evaluators.assess

   1  import numpy 
   2  import random 
   3  import math 
   4  import os 
   5  import tempfile 
   6  import copy 
   7  import time 
   8   
   9  from PyML.utils import myio,misc 
  10   
  11  """functionality for assessing classifier performance""" 
  12   
  13  __docformat__ = "restructuredtext en" 
  14   
  15   
16 -def test(classifier, data, **args) :
17 """test a classifier on a given dataset 18 19 :Parameters: 20 - `classifier` - a trained classifier 21 - `data` - a dataset 22 23 :Return: 24 a Results class instance 25 26 :Keywords: 27 - `stats` - whether to compute the statistics of the match between the 28 predicted labels and the given labels [True by default] 29 """ 30 31 if 'verbose' in args : 32 verbose = args['verbose'] 33 else : 34 verbose = 1 35 36 if verbose : 37 print 'testing', \ 38 '***********************************************************' 39 40 testStart = time.clock() 41 42 if data.testingFunc is not None : 43 data.test(classifier.trainingData, **args) 44 45 classifier.project(data) 46 47 res = classifier.resultsObject(data, classifier, **args) 48 49 for i in range(len(data)) : 50 if verbose and i % 100 == 0 and i > 0 : 51 print i, 'patterns classified' 52 res.appendPrediction(classifier.classify(data, i), data, i) 53 54 try : 55 res[0].log = classifier.log 56 except : 57 pass 58 try : 59 computeStats = args['stats'] 60 except : 61 computeStats = False 62 if computeStats and data.labels.L is not None : 63 res.computeStats() 64 65 try : 66 res[0].log.testingTime = time.clock() - testStart 67 except : 68 pass 69 70 return res
71 72
73 -def loo(classifier, data, **args) :
74 """perform Leave One Out 75 76 :Returns: 77 a results object 78 79 USAGE: loo(classifier, data) 80 """ 81 82 looResults = classifier.resultsObject() 83 args['stats'] = False 84 85 for i in range(len(data)) : 86 trainingPatterns = misc.setminus(range(len(data)), [i]) 87 looResults.extend( 88 classifier.trainTest(data, trainingPatterns, [i], **args)) 89 90 looResults.computeStats() 91 92 return looResults
93 94
95 -def cvFromFolds(classifier, data, trainingPatterns, testingPatterns, 96 **args) :
97 98 """perform cross validation 99 100 :Parameters: 101 - `classifier` - a classifier template 102 - `data` - a dataset 103 - `trainingPatterns` - a list providing the training examples for each fold 104 - `testingPatterns` - a list providing the testing examples for each fold 105 106 :Keywords: 107 - `intermediateFile` - a file name to save intermediate results under 108 if this argument is not given, no intermediate results are saved 109 110 :Returns: 111 a Results object. 112 The ROC curve is computed using the resulting classification of each 113 point in the dataset (in contrast to Provost, Fawcett and Kohavi who compute 114 average ROC curves). 115 """ 116 117 assert len(trainingPatterns) == len(testingPatterns) 118 119 cvResults = classifier.resultsObject() 120 args['stats'] = False 121 122 for fold in range(len(trainingPatterns)) : 123 cvResults.extend(trainTest(classifier, data, 124 trainingPatterns[fold], testingPatterns[fold], **args)) 125 #if 'intermediateFile' in args : 126 # cvResults.save(args['intermediateFile']) 127 128 cvResults.computeStats() 129 130 return cvResults
131 132
133 -def cv(classifier, data, numFolds = 5, **args) :
134 """perform k-fold cross validation 135 136 :Parameters: 137 - `classifier` - a classifier template 138 - `data` - a dataset 139 - `numFolds` - number of cross validation folds (default = 5) 140 141 :Returns: 142 a Results object. 143 144 :Keywords: 145 - `numFolds` - number of cross validation folds (default = 5) 146 - `seed` - random number generator seed 147 - `foldsToPerform` - number of folds to actually perform (in case you're doing 148 n fold CV, and want to save time, and only do some of the folds) 149 """ 150 151 if 'numFolds' in args : 152 numFolds = args['numFolds'] 153 if 'seed' in args : 154 random.seed(args['seed']) 155 foldsToPerform = numFolds 156 if 'foldsToPerform' in args : 157 foldsToPerform = args['foldsToPerform'] 158 if foldsToPerform > numFolds : 159 raise ValueError, 'foldsToPerform > numFolds' 160 161 perm = range(len(data)) 162 random.shuffle(perm) 163 foldSize = len(data) / numFolds 164 trainingPatterns = [] 165 testingPatterns = [] 166 167 for fold in range(foldsToPerform) : 168 if fold < numFolds-1: 169 testingPatterns.append(perm[foldSize * fold : foldSize * (fold + 1)]) 170 else: 171 testingPatterns.append(perm[foldSize * fold : len(data)]) 172 trainingPatterns.append(misc.setminus(range(len(data)), 173 testingPatterns[-1])) 174 175 return cvFromFolds(classifier, data, trainingPatterns, testingPatterns, **args)
176 177
178 -def stratifiedCV(classifier, data, numFolds = 5, **args) :
179 """perform k-fold stratified cross-validation; in each fold the number of 180 patterns from each class is proportional to the relative fraction of the 181 class in the dataset 182 183 :Parameters: 184 - `classifier` - a classifier template 185 - `data` - a dataset 186 - `numFolds` - number of cross validation folds (default = 5) 187 188 :Returns: 189 a Results object. 190 191 :Keywords: 192 - `numFolds` - number of cross-validation folds -- overrides the numFolds parameter 193 - `seed` - random number generator seed 194 - `trainingAllFolds` - a list of patterns that are to be used as training 195 examples in all CV folds. 196 - `intermediateFile` - a file name to save intermediate results under 197 if this argument is not given, not intermediate results are saved 198 - `foldsToPerform` - number of folds to actually perform (in case you're doing 199 n fold CV, and want to save time, and only do some of the folds) 200 """ 201 202 if 'numFolds' in args : 203 numFolds = args['numFolds'] 204 if 'seed' in args : 205 random.seed(args['seed']) 206 if 'trainingAllFolds' in args : 207 trainingAllFolds = args['trainingAllFolds'] 208 else : 209 trainingAllFolds = [] 210 foldsToPerform = numFolds 211 if 'foldsToPerform' in args : 212 foldsToPerform = args['foldsToPerform'] 213 if foldsToPerform > numFolds : 214 raise ValueError, 'foldsToPerform > numFolds' 215 216 trainingAllFoldsDict = misc.list2dict(trainingAllFolds) 217 218 labels = data.labels 219 p = [[] for i in range(labels.numClasses)] 220 classFoldSize = [int(labels.classSize[k] / numFolds) for k in range(labels.numClasses)] 221 222 for i in range(len(data)): 223 if i not in trainingAllFoldsDict : 224 p[labels.Y[i]].append(i) 225 for k in range(labels.numClasses): 226 random.shuffle(p[k]) 227 228 trainingPatterns = [[] for i in range(foldsToPerform)] 229 testingPatterns = [[] for i in range(foldsToPerform)] 230 for fold in range(foldsToPerform) : 231 for k in range(labels.numClasses) : 232 classFoldStart = classFoldSize[k] * fold 233 if fold < numFolds-1: 234 classFoldEnd = classFoldSize[k] * (fold + 1) 235 else: 236 classFoldEnd = labels.classSize[k] 237 testingPatterns[fold].extend(p[k][classFoldStart:classFoldEnd]) 238 if fold > 0: 239 trainingPatterns[fold].extend(p[k][0:classFoldStart] + 240 p[k][classFoldEnd:labels.classSize[k]]) 241 else: 242 trainingPatterns[fold].extend(p[k][classFoldEnd:labels.classSize[k]]) 243 244 if len(trainingPatterns) > 0 : 245 for fold in range(len(trainingPatterns)) : 246 trainingPatterns[fold].extend(trainingAllFolds) 247 248 return cvFromFolds(classifier, data, trainingPatterns, testingPatterns, **args)
249 250
251 -def nCV(classifier, data, **args) :
252 """ 253 runs CV n times, returning a 'ResultsList' object. 254 255 :Parameters: 256 - `classifier` - classifier template 257 - `data` - dataset 258 259 :Keywords: 260 - `cvType` - which CV function to apply (default: stratifiedCV) 261 - `seed` - random number generator seed (default: 1) 262 This is used as the seed for the first CV run. Subsequent runs 263 use seed + 1, seed + 2... 264 - `iterations` - number of times to run CV (default: 10) 265 - `numFolds` - number of folds to use with CV (default: 5) 266 - `intermediateFile` - a file name to save intermediate results under 267 if this argument is not given, no intermediate results are saved 268 269 :Returns: 270 `ResultsList` - a list of the results of each CV run as a ResultsList object 271 """ 272 273 cvList = ResultsList() 274 275 cvType = 'stratifiedCV' 276 if 'cvType' in args : cvType = args['cvType'] 277 seed = 1 278 if 'seed' in args : seed = args['seed'] 279 numFolds = 5 280 if 'numFolds' in args : numFolds = args['numFolds'] 281 iterations = 10 282 if 'iterations' in args : iterations = args['iterations'] 283 intermediateFile = None 284 if 'intermediateFile' in args : intermediateFile = args['intermediateFile'] 285 286 for i in range(iterations) : 287 if cvType == 'stratifiedCV' : 288 cvList.append(classifier.stratifiedCV(data, numFolds=numFolds, seed=seed + i)) 289 elif cvType == 'cv' : 290 cvList.append(classifier.cv(data, numFolds=numFolds, seed=seed + i)) 291 else : 292 raise ValueError, 'unrecognized type of CV' 293 if intermediateFile is not None : 294 cvList.save(intermediateFile) 295 296 cvList.computeStats() 297 298 return cvList
299 300
301 -def makeFolds(data, numFolds, datasetName, directory = '.') :
302 303 '''split a dataset into several folds and save the training and testing 304 data of each fold as a separate dataset 305 306 data - a dataset instance 307 numfolds - number of folds into which to split the data 308 datasetName - string to use for the file names 309 directory - the directory into which to deposit the files 310 ''' 311 312 perm = range(len(data)) 313 random.shuffle(perm) 314 foldSize = len(data) / numFolds 315 316 for fold in range(numFolds) : 317 if fold < numFolds-1: 318 testingPatterns = perm[foldSize * fold : foldSize * (fold + 1)] 319 else: 320 testingPatterns = perm[foldSize * fold : len(data)] 321 trainingPatterns = misc.setminus(range(len(data)), testingPatterns) 322 323 trainingData = data.__class__(data, patterns = trainingPatterns) 324 testingData = data.__class__(data, patterns = testingPatterns) 325 326 testingDataName = os.path.join(directory, datasetName + 'Testing' + str(fold) + '.data') 327 testingData.save(testingDataName) 328 trainingDataName = os.path.join(directory, datasetName + 'Training' + str(fold) + '.data') 329 trainingData.save(trainingDataName)
330 331
332 -def cvFromFile(classifier, trainingBase, testingBase, datasetClass, **args) :
333 """perform CV when the training and test data are in files whose names 334 are of the form: 335 trainingBase + number + string 336 and 337 testingBase + number + string 338 For example: 339 training0.data, training1.data, training2.data 340 and 341 testing0.data, testing1.data, testing2.data 342 for 3 fold CV. 343 training and testing files are matched by the number appearing after 344 the strings trainingBase and testingBase 345 both trainingBase and testingBase can be paths. 346 """ 347 348 args['stats'] = False 349 import re 350 directory = os.path.dirname(trainingBase) 351 if directory == '' : directory = '.' 352 353 files = os.listdir(directory) 354 trainingFiles = [file for file in files 355 if file.find(trainingBase) == 0] 356 testingFiles = [file for file in files 357 if file.find(testingBase) == 0] 358 359 # now we check if the training files match the test files: 360 numberRE = re.compile(r'\d+') 361 362 trainingNum = [numberRE.findall(trainingFile)[-1] 363 for trainingFile in trainingFiles] 364 testingNum = [numberRE.findall(testingFile)[-1] 365 for testingFile in testingFiles] 366 367 assert len(trainingNum) == len(testingNum) 368 for i in range(len(trainingNum)) : 369 if trainingNum[i] != testingNum[i] : 370 raise ValueError, 'training files do not match testing files' 371 372 trainingData = datasetClass(trainingFiles[0]) 373 374 cvResults = classifier.resultsObject(trainingData, classifier) 375 376 for fold in range(len(trainingFiles)) : 377 if fold > 0 : 378 trainingData = datasetClass(trainingFiles[fold]) 379 380 classifier.train(trainingData) 381 del trainingData 382 383 testingData = datasetClass(testingFiles[fold]) 384 385 r = classifier.test(testingData, **args) 386 cvResults.extend(r) 387 388 cvResults.computeStats() 389 390 return cvResults
391 392
393 -def scatter(r1, r2, statistic = 'roc', x1Label = '', x2Label= '', 394 fileName = None, **args) :
395 """ 396 a scatter plot for comparing the performance of two classifiers 397 398 :Parameters: 399 - `r1, r2` - both are either a list of Result classes, or a list of 400 success rates / ROC scores 401 - `statistic` - which measure of classifier success to plot 402 values : 'roc', 'successRate', 'balancedSuccessRate' 403 in order to specify parts of the roc curve you can use something like: 404 'roc50' or 'roc0.1' 405 406 :Keywords: 407 - `title` - the title of the plot 408 """ 409 410 if len(r1) != len(r2) : 411 print 'unequal lengths for r1 and r2' 412 if type(r1) != type({}) : 413 raise ValueError, 'Cannot handle unequal length when it is not a dict' 414 keys1 = r1.keys() 415 keys2 = r2.keys() 416 common = misc.intersect(keys1, keys2) 417 r1new = {} 418 r2new = {} 419 for key in common : 420 r1new[key] = r1[key] 421 r2new[key] = r2[key] 422 r1 = r1new 423 r2 = r2new 424 425 if type(r1) == type({}) and type(r2) == type({}) : 426 I = r1.keys() 427 else : 428 I = range(len(r1)) 429 430 if (r1[I[0]].__class__.__name__ == 'Results' or 431 r1[I[0]].__class__.__name__ == 'Container') : 432 p1 = misc.extractAttribute(r1, statistic) 433 p2 = misc.extractAttribute(r2, statistic) 434 else : 435 p1 = r1 436 p2 = r2 437 438 if type(p1) == type({}) : 439 p1 = p1.values() 440 p2 = p2.values() 441 442 from matplotlib import pylab 443 444 x = numpy.arange(0,1,0.01) 445 pylab.plot(p1, p2, 'bo',x,x, '-k') 446 pylab.xlabel(x1Label, fontsize = 18) 447 pylab.ylabel(x2Label, fontsize = 18) 448 if 'title' in args : 449 pylab.title(args['title'], fontsize = 18) 450 pylab.show() 451 452 if fileName is not None : 453 pylab.savefig(fileName) 454 pylab.close()
455
456 -def plotROC2(decisionFunc, givenY, fileName = None, **args) :
457 458 res = misc.Container({'decisionFunc': decisionFunc, 459 'givenY' : givenY, 460 'Y' : None}) 461 plotROC(res, fileName, **args)
462 463
464 -def plotROC(res, fileName = None, **args) :
465 """plot the ROC curve from a given Results (or Results-like) object 466 467 :Parameters: 468 - `res` - Results (or Container object that was made by saving a a 469 Results object (note that if you have a Results object you can 470 use this function as a method so there is no need to supply this 471 argument). 472 - `fileName` - optional argument - if given, the roc curve is saved 473 in the given file name. The format is determined by the extension. 474 Supported extensions: .eps, .png, .svg 475 476 :Keywords: 477 - `rocN` - what type of ROC curve to plot (roc50, roc10 etc.) default is 478 full ROC curve 479 - `normalize` - whether to normalize the ROC curve (default: True) 480 - `plotStr` - which string to pass to matplotlib's plot function 481 default: 'ob' 482 - `axis` - redefine the figure axes; takes a list of the form 483 [xmin,xmax,ymin,ymax] 484 - `show` - whether to show the ROC curve (default: True) 485 useful when you just want to save the curve to a file. 486 The use of Some file formats automatically sets this to False 487 (e.g. svg files). This relates to quirks of matplotlib. 488 """ 489 490 if 'rocN' in args : 491 rocN = args['rocN'] 492 else : 493 rocN = None 494 if 'show' in args : 495 show = args['show'] 496 else : 497 show = True 498 if 'plotStr' in args : 499 plotStr = args['plotStr'] 500 else : 501 plotStr = 'ob' 502 rocNormalize = True 503 if 'normalize' in args : 504 rocNormalize = args['normalize'] 505 506 numPoints = 200 507 if 'numPoints' in args : 508 numPoints = args['numPoints'] 509 510 targetClass = 1 511 512 if type(res) == type([]) : 513 feature = res[0] 514 givenY = res[1] 515 rocTP, rocFP, rocArea = roc( 516 None, givenY, feature, rocN, targetClass, rocNormalize) 517 else : 518 rocTP, rocFP, rocArea = roc( 519 res.Y, res.givenY, res.decisionFunc, rocN, targetClass, 520 rocNormalize) 521 522 stride = int(max(1, float(len(rocTP)) / float(numPoints))) 523 524 if stride > 1 : 525 rocTP = [rocTP[i] for i in range(0,len(rocTP), stride)] 526 rocFP = [rocFP[i] for i in range(0,len(rocFP), stride)] 527 528 import matplotlib 529 if fileName is not None and fileName.find('.svg') > 0 : 530 matplotlib.use('SVG') 531 show = False 532 if fileName is not None and fileName.find('.eps') > 0 : 533 matplotlib.use('PS') 534 show = False 535 536 from matplotlib import pylab 537 lines = pylab.plot(rocFP, rocTP, plotStr, 538 markersize = 8, linewidth = 3) 539 if rocNormalize : 540 pylab.xlabel('False positive rate', fontsize = 18) 541 pylab.ylabel('True positive rate', fontsize = 18) 542 else : 543 pylab.xlabel('False positives', fontsize = 18) 544 pylab.ylabel('True positives', fontsize = 18) 545 if rocNormalize : 546 pylab.axis([0, 1, 0, 1]) 547 if 'axis' in args : 548 pylab.axis(args['axis']) 549 print fileName 550 if fileName is not None : 551 pylab.savefig(fileName) 552 if show : 553 pylab.show()
554
555 -def plotROCs(resList, descriptions = None, fileName = None, **args) :
556 557 """ 558 plot multiple ROC curves. 559 560 :Parameters: 561 - `resList` - a list or dictionary of Result or Result-like objects 562 - `descriptions` - text for the legend (a list the size of resList). 563 A legend is not shown if this parameter is not given 564 In the case of a dictionary input the description for the legend is 565 taken from the dictionary keys. 566 - `fileName` - if given, a file to save the figure in 567 568 :Keywords: 569 - `legendLoc` - the position of the legend -- an integer between 0 and 9; 570 see the matplotlib documentation for details 571 - `plotStrings` - a list of matlab style plotting string to send to the 572 plotROC function (instead of the plotString keyword of plotROC) 573 - `other keywords` - keywords of the plotROC function 574 """ 575 576 if type(resList) == type([]) and type(resList[0]) == type('') : 577 fileNames = resList 578 resList = [] 579 for fileName in fileNames : 580 resList.append(myio.load(fileName)) 581 if descriptions is None : 582 descriptions = [] 583 for fileName in fileNames : 584 descriptions.append(os.path.splitext(fileName)[0]) 585 586 import matplotlib 587 show = True 588 if fileName is not None and fileName.find('.svg') > 0 : 589 matplotlib.use('SVG') 590 show = False 591 if fileName is not None and fileName.find('.eps') > 0 : 592 matplotlib.use('PS') 593 show = False 594 595 from matplotlib import pylab 596 args['show'] = False 597 598 plotStrings = ['bo', 'k^', 'rv', 'g<', 'm>', 'k<'] 599 plotStrings = ['b-', 'k--', 'r-', 'g-.', 'm-', 'k:', 'b-', 'r-', 'g-'] 600 #plotStrings = ['b:', 'k-.', 'b-', 'g-', 'm-', 'k-', 'b-', 'r-', 'g-'] 601 if 'plotStrings' in args : 602 plotStrings = args['plotStrings'] 603 if type(resList) == type([]) : 604 for i in range(len(resList)) : 605 print i 606 args['plotStr'] = plotStrings[i] 607 plotROC(resList[i], **args) 608 else : 609 if descriptions is None : 610 descriptions = [key for key in resList] 611 i = 0 612 for key in resList : 613 args['plotStr'] = plotStrings[i] 614 plotROC(resList[key], **args) 615 i+=1 616 617 if descriptions is not None : 618 legendLoc = 'best' 619 if 'legendLoc' in args : 620 legendLoc = args['legendLoc'] 621 pylab.legend(descriptions, loc = legendLoc) 622 leg = pylab.gca().get_legend() 623 ltext = leg.get_texts() # all the text.Text instance in the legend 624 llines = leg.get_lines() # all the lines.Line2D instance in the legend 625 frame = leg.get_frame() # the Rectangle instance surrounding the legend 626 627 #frame.set_facecolor(0.80) # set the frame face color to light gray 628 for obj in ltext : 629 obj.set_size(14) 630 #leg.draw_frame(False) # don't draw the legend frame 631 632 print fileName 633 634 if fileName is not None : 635 pylab.savefig(fileName) 636 if show : 637 pylab.show()
638 639
640 -def significance(r1, r2, statistic = 'roc') :
641 """ 642 report the statistical significance of the difference in error rates 643 of a series of classification results of two classifiers 644 using the Wilcoxon signed rank test. 645 646 Returns: pvalue, (median1, median2) 647 where: 648 pvalue - the pvalue of the two sided Wilcoxon signed rank test; to get 649 the pvalue of a one sided test divide the pvalue by two. 650 (median1, median2) - the median of the statistics of the inputs r1 and r2. 651 652 :Parameters: 653 - `r1, r2` - both are either a list of Result classes, or a list of success 654 rates 655 - `statistic` - which measure of classifier success to plot 656 values : 'roc', 'successRate', 'balancedSuccessRate' 657 in order to specify parts of the roc curve you can use something like: 658 'roc50' or 'roc0.1' 659 660 """ 661 662 if type(r1) != type(r2) : 663 raise ValueError, 'r1 and r2 do not have the same type' 664 665 # if the two objects are dictionaries, then we can handle the case that 666 # the lengths are not equal: 667 if len(r1) != len(r2) : 668 print 'unequal lengths for r1 and r2' 669 if type(r1) != type({}) : 670 raise ValueError, 'Cannot handle unequal length when it is not a dict' 671 keys1 = r1.keys() 672 keys2 = r2.keys() 673 common = misc.intersect(keys1, keys2) 674 r1new = {} 675 r2new = {} 676 for key in common : 677 r1new[key] = r1[key] 678 r2new[key] = r2[key] 679 r1 = r1new 680 r2 = r2new 681 682 if type(r1) == type({}) : 683 if r1.keys() != r2.keys() : 684 raise ValueError, 'r1 and r2 do not have the same keys' 685 I = r1.keys() 686 else : 687 I = range(len(r1)) 688 if r1[I[0]].__class__.__name__ == 'Results' or r1[I[0]].__class__.__name__ == 'Container' : 689 p1 = misc.extractAttribute(r1, statistic) 690 p2 = misc.extractAttribute(r2, statistic) 691 else : 692 p1 = r1 693 p2 = r2 694 695 if type(p1) == type({}) : 696 p1 = p1.values() 697 p2 = p2.values() 698 699 #import stats 700 701 import salstat_stats 702 test = salstat_stats.TwoSampleTests(p1, p2) 703 test.SignedRanks (p1, p2) 704 705 p = test.prob 706 median1 = numpy.median(numpy.array(p1)) 707 median2 = numpy.median(numpy.array(p2)) 708 709 return p, (median1,median2)
710 711
712 -def trainTest(classifierTemplate, data, trainingPatterns, testingPatterns, **args) :
713 """Train a classifier on the list of training patterns, and test it 714 on the test patterns 715 """ 716 717 if 'verbose' in args : 718 verbose = args['verbose'] 719 else : 720 verbose = True 721 722 trainingData = data.__class__(data, deepcopy = classifierTemplate.deepcopy, 723 patterns = trainingPatterns) 724 725 classifier = classifierTemplate.__class__(classifierTemplate) 726 727 if verbose : 728 print 'training', \ 729 '***********************************************************' 730 731 classifier.train(trainingData, **args) 732 733 testingData = data.__class__(data, deepcopy = True, 734 patterns = testingPatterns) 735 736 return classifier.test(testingData, **args)
737 738
739 -def confmat(L1, L2) :
740 """computes the confusion matrix between two labelings 741 """ 742 743 if len(L1) != len(L2): 744 raise ValueError, "labels not the same length" 745 746 n = len(L1) 747 748 classes1 = misc.unique(L1) 749 classes2 = misc.unique(L2) 750 classes1.sort() 751 classes2.sort() 752 numClasses1 = len(classes1) 753 numClasses2 = len(classes2) 754 755 I1 = {} # a mapping from classes1 to 0..numclasses1-1 756 I2 = {} 757 for i in range(numClasses1) : 758 I1[classes1[i]] = i 759 for i in range(numClasses2) : 760 I2[classes2[i]] = i 761 762 confmat = numpy.zeros((numClasses1, numClasses2)) 763 764 for i in range(n): 765 confmat[I1[L1[i]]][I2[L2[i]]] += 1 766 767 return confmat
768 769
770 -def superConfmat(Y1, Y2, numClasses = 0) :
771 """computes the confusion matrix between two labelings, where 772 the matrix is assumed to be square, according to the labels of L1 773 L1 and L2 are assumed to have integer components in the range 774 0,.., numClasses 775 """ 776 777 if len(Y1) != len(Y2): 778 raise ValueError, "labels not the same length" 779 780 n = len(Y1) 781 782 m = max(max(Y1), max(Y2), numClasses) 783 784 # prefer using a list object rather than Numeric object so that there 785 # wouldn't be a problem in pickling the object 786 confmat = misc.matrix((m, m), 0) 787 788 for i in range(n): 789 confmat[Y1[i]][Y2[i]] += 1 790 791 return confmat
792 793
794 -def roc(Y, givenY, decisionFunc, n = None, targetClass = 1, normalize = True) :
795 """Compute the ROC curve and area under the curve for a two class problem 796 797 :Parameters: 798 - `Y` - the predicted labels (can put None instead) 799 - `givenY` - the true labels 800 - `decisionFunc` - the values of the decision function 801 - `n` - the number of false positives to take into account (roc_n) 802 - `targetClass` - the "positive" class 803 - `normalize` whether to normalize the roc curve (default: True) 804 when this is set to False, TP/FP counts are output rather than TP/FP rates 805 806 """ 807 808 # the number of false positives to take into account 809 # note that n can be either an integer or a fraction 810 if n is not None and n < 1 : 811 n = int(n * numpy.sum(numpy.not_equal(givenY, targetClass))) 812 813 I = range(len(decisionFunc)) 814 random.shuffle(I) 815 decisionFunc = [decisionFunc[i] for i in I] 816 givenY = [givenY[i] for i in I] 817 #eps = 0.01 818 #for i in range(len(decisionFunc)) : 819 # if givenY[i] == 0 : decisionFunc[i] += eps 820 f = numpy.array(decisionFunc) 821 822 tp = [0.0] 823 fp = [0.0] 824 I = numpy.argsort(-f) 825 826 for patternIdx in I : 827 if givenY[patternIdx] == targetClass : 828 tp[-1] += 1 829 else : 830 tp.append(tp[-1]) 831 fp.append(fp[-1] + 1.0) 832 if n is not None and fp[-1] >= n : 833 break 834 835 numTP = numpy.sum(numpy.equal(givenY, targetClass)) 836 837 if normalize : 838 for i in range(len(tp)): 839 #if tp[-1] > 0 : tp[i] /= float(tp[-1]) 840 if tp[-1] > 0 : tp[i] /= float(numTP) 841 for i in range(len(fp)) : 842 if fp[-1] > 0 : fp[i] /= float(fp[-1]) 843 844 area = numpy.sum(tp) / len(tp) 845 846 else : 847 area = numpy.sum(tp) / (len(tp) * numTP) 848 849 return tp,fp, area
850 851
852 -class ResultsContainer (object) :
853
854 - def __len__(self) :
855 856 return len(self.Y)
857
858 - def appendPrediction(self, arg, data, pattern) :
859 860 raise NotImplementedError
861 862
863 -class Results (list) :
864
865 - def __init__(self, arg = None, classifier = None, **args) :
866 867 list.__init__(self)
868
869 - def __len__(self) :
870 871 return sum([len(res) for res in self])
872
873 - def appendPrediction(self, arg, data, pattern) :
874 875 self[-1].appendPrediction(arg, data, pattern)
876
877 - def computeStats(self) :
878 879 for results in self : 880 results.computeStats()
881
882 - def getNumFolds(self) :
883 return list.__len__(self)
884
885 - def setNumFolds(self) :
886 raise AttributeError
887 888 numFolds = property(getNumFolds, setNumFolds, None, 'number of folds') 889
890 - def __getattr__(self, attr) :
891 892 if hasattr(self[0], attr) : 893 if self.numFolds == 1 : 894 return getattr(self[0], attr) 895 else : 896 if attr not in self.attributeAction : 897 return [getattr(results, attr) for results in self] 898 elif self.attributeAction[attr] == 'average' : 899 return numpy.average([getattr(results, attr) 900 for results in self]) 901 elif self.attributeAction[attr] == 'returnFirst' : 902 return getattr(self[0], attr) 903 elif self.attributeAction[attr] == 'addMatrix' : 904 out = numpy.array(getattr(self[0], attr)) 905 for results in self[1:] : 906 out += getattr(results, attr) 907 return out 908 else : 909 raise AttributeError, 'unknown attribute ' + attr
910 911 912
913 - def get(self, attribute, fold = None) :
914 915 if fold is None : 916 if self.numFolds == 1 : 917 return getattr(self[0], attribute) 918 else : 919 return getattr(self, attribute) 920 else : 921 return getattr(self[fold], attribute)
922
923 - def getDecisionFunction(self, fold = None) :
924 return self.get('decisionFunc', fold)
925
926 - def getPatternID(self, fold = None) :
927 return self.get('patternID', fold)
928
929 - def getLog(self, fold = None) :
930 931 if fold is None : 932 return [self.get('log', fold_) for fold_ in range(self.numFolds)] 933 else : 934 return self.get('log', fold)
935
936 - def getInfo(self, fold = None) :
937 938 if fold is None : 939 return [self.get('info', fold_) for fold_ in range(self.numFolds)] 940 else : 941 return self.get('info', fold)
942 943
944 - def convert(self, *options) :
945 946 return [results.convert(*options) for results in self]
947
948 - def save(self, fileName, *options) :
949 """ 950 save Results to a file 951 only attributes given in the attribute list are saved 952 953 OPTIONS:: 954 955 'long' - use long attribute list 956 'short' - use short attribute list 957 using the short attribute list won't allow you to reconstruct 958 the Results object afterwards, only the statistics that characterize 959 the results. 960 """ 961 962 resultsList = self.convert(*options) 963 964 myio.save(resultsList, fileName)
965 966
967 -class ClassificationFunctions (object) :
968
969 - def __repr__(self) :
970 971 if self.numClasses == 1 : return '' 972 973 if not hasattr(self, 'confusionMatrix') : 974 try : 975 self.computeStats() 976 except : 977 return '' 978 979 rep = [] 980 981 rep.extend( self.formatConfusionMatrix() ) 982 983 rep.append('success rate: %f' % self.successRate) 984 rep.append('balanced success rate: %f' % self.balancedSuccessRate) 985 #if self.numClasses == 2 : 986 # rep.append('ppv: %f ' % self.ppv + 'sensitivity: %f' % self.sensitivity) 987 988 if self.numClasses == 2 : 989 rep.append('area under ROC curve: %f' % self.roc) 990 if int(self.rocN) == self.rocN : 991 rep.append('area under ROC %d curve: %f ' % \ 992 (self.rocN, getattr(self, 'roc' + str(self.rocN))) ) 993 else : 994 rep.append('area under ROC %f curve: %f ' % \ 995 (self.rocN, getattr(self, 'roc' + str(self.rocN))) ) 996 997 return '\n'.join(rep)
998 999
1000 - def formatConfusionMatrix(self) :
1001 1002 rep = [] 1003 columnWidth = 4 1004 for label in self.classLabels : 1005 if len(label) > columnWidth : columnWidth = len(label) 1006 columnWidth +=1 1007 columnWidth = max(columnWidth, 1008 math.ceil(math.log10(numpy.max(self.confusionMatrix)) + 1)) 1009 1010 rep.append('Confusion Matrix:') 1011 rep.append(' ' * columnWidth + ' Given labels:') 1012 rep.append(' ' * columnWidth + 1013 ''.join([label.center(columnWidth) for label in self.classLabels])) 1014 1015 (numClassesTest, numClassesTrain) = numpy.shape(self.confusionMatrix) 1016 for i in range(numClassesTest) : 1017 label = self.classLabels[i] 1018 rep.append(label.rjust(columnWidth) + ''.join( 1019 [str(self.confusionMatrix[i][j]).center(columnWidth) 1020 for j in range(numClassesTrain)])) 1021 1022 return rep
1023
1024 - def successRates(self) :
1025 1026 targetClass = 1 1027 classSuccess = numpy.zeros(self.numClasses, numpy.float_) 1028 classSize = numpy.zeros(self.numClasses, numpy.int_) 1029 for i in range(len(self)) : 1030 classSize[self.givenY[i]] += 1 1031 if self.givenY[i] == self.Y[i] : 1032 classSuccess[self.Y[i]] += 1 1033 balancedSuccess = 0.0 1034 for i in range(self.numClasses) : 1035 if classSize[i] > 0 : 1036 balancedSuccess += classSuccess[i] / float(classSize[i]) 1037 balancedSuccess /= self.numClasses 1038 sensitivity = 0 1039 ppv = 0 1040 if self.numClasses == 2 : 1041 if classSuccess[targetClass] > 0 : 1042 sensitivity = float(classSuccess[targetClass]) /\ 1043 float(classSize[targetClass]) 1044 numTarget = numpy.sum(numpy.equal(self.Y, targetClass)) 1045 if numTarget == 0 : 1046 ppv = 0 1047 else : 1048 ppv = float(classSuccess[targetClass]) / numTarget 1049 1050 1051 return 1 - len(self.misclassified) / float(len(self.Y)), balancedSuccess,\ 1052 ppv, sensitivity
1053 1054
1055 -class ClassificationResultsContainer (ResultsContainer, ClassificationFunctions) :
1056 """A class for holding the results of testing a classifier 1057 """ 1058 1059 plotROC = plotROC 1060 #computeStats = computeStats 1061 1062 shortAttrList = ['info', 'log', 1063 'successRate', 'balancedSuccessRate', 1064 'roc','roc50', 1065 'classLabels', 'confusionMatrix', 1066 'ppv', 'sensitivity'] 1067 longAttrList = ['info', 'log', 1068 'Y', 'L', 'decisionFunc', 'givenY', 'givenL', 1069 'classLabels', 1070 'patternID', 'numClasses'] 1071 1072
1073 - def __init__(self, arg, classifier = None, **args) :
1074 1075 self.rocN = 50 1076 if 'rocN' in args : 1077 self.rocN = args['rocN'] 1078 1079 # deal with the roc options and args : 1080 if 'rocTargetClass' in args : 1081 self.targetClass = args['rocTargetClass'] 1082 if type(self.targetClass) == type('') : 1083 # the following is not optimal: 1084 self.targetClass = arg.labels.classDict[self.targetClass] 1085 else : 1086 self.targetClass = 1 1087 1088 if 'normalization' in args : 1089 self.rocNormalization = args['normalization'] 1090 else : 1091 self.rocNormalization = True 1092 1093 if (arg.__class__.__name__ == 'Container' or 1094 arg.__class__ == self.__class__) : 1095 self.copyConstruct(arg, **args) 1096 return 1097 1098 data = arg 1099 self.Y = [] 1100 self.L = [] 1101 self.decisionFunc = [] 1102 self.patternID = [] 1103 self.givenY = [] 1104 self.givenL = [] 1105 1106 self.successRate = 0.0 1107 self.info = 'dataset:\n' + data.__repr__() + \ 1108 'classifier:\n' + classifier.__repr__() 1109 1110 if hasattr(classifier, 'labels') : 1111 self.classLabels = classifier.labels.classLabels 1112 elif data.labels.L is not None : 1113 self.classLabels = data.labels.classLabels 1114 if hasattr(self, 'classLabels') : 1115 self.numClasses = len(self.classLabels)
1116
1117 - def copyConstruct(self, other, **args) :
1118 1119 if not hasattr(other, 'decisionFunc') : 1120 raise AttributeError, 'not a valid results object' 1121 1122 if 'patterns' in args : 1123 p = args['patterns'] 1124 idDict = misc.list2dict(other.patternID, range(len(other.patternID))) 1125 patterns = [idDict[pattern] for pattern in p 1126 if pattern in idDict] 1127 else : 1128 patterns = range(len(other.Y)) 1129 1130 self.patternID = [other.patternID[p] for p in patterns] 1131 self.L = [other.L[p] for p in patterns] 1132 self.Y = [other.Y[p] for p in patterns] 1133 self.decisionFunc = [other.decisionFunc[p] for p in patterns] 1134 self.givenY = [other.givenY[p] for p in patterns] 1135 self.givenL = [other.givenL[p] for p in patterns] 1136 self.rocN = 50 1137 self.classLabels = copy.deepcopy(other.classLabels) 1138 self.numClasses = len(self.classLabels) 1139 self.info = other.info 1140 try : 1141 self.log = other.log 1142 except : 1143 pass 1144 self.computeStats()
1145
1146 - def __getattr__(self, attr) :
1147 1148 if attr in ['balancedSuccessRate', 'successRate', 'confusionMatrix'] : 1149 self.computeStats() 1150 return getattr(self, attr) 1151 1152 if not attr.find('roc') == 0 : 1153 raise AttributeError, 'unknown attribute ' + attr 1154 1155 if attr == 'roc' : 1156 rocN = None 1157 elif attr[-1] == '%' : # roc1% 1158 rocN = float(attr[3:-1]) / 100.0 1159 elif float(attr[3:]) >= 1 : # roc50 1160 rocN = int(float(attr[3:])) 1161 else : 1162 rocN = float(attr[3:]) # roc0.01 (equivalent to roc1%) 1163 1164 rocValue = self.getROC(rocN) 1165 # set the value of the roc so that it does not have to be computed 1166 # next time it is accessed 1167 # xxx be careful -- this is o.k. as long as rocN hasn't changed. 1168 # whenver rocN is changed need to reset roc. 1169 setattr(self, attr, rocValue) 1170 1171 return rocValue
1172 1173
1174 - def getROC(self, rocN = None) :
1175 1176 rocTP, rocFP, rocValue = roc(self.Y, self.givenY, self.decisionFunc, 1177 rocN, self.targetClass, self.rocNormalization) 1178 return rocValue
1179 1180
1181 - def appendPrediction(self, arg, data, pattern) :
1182 ''' 1183 add the classification results and labels of a data point 1184 ''' 1185 1186 (y, f) = arg 1187 if f==1 and f==0 : 1188 warnings.warn("decision function value is a nan, prediction ignored", 1189 RuntimeWarning) 1190 return 1191 self.Y.append(y) 1192 self.decisionFunc.append(f) 1193 self.L.append(self.classLabels[y]) 1194 if hasattr(data.labels, 'patternID') and data.labels.patternID is not None : 1195 self.patternID.append(data.labels.patternID[pattern]) 1196 if hasattr(data.labels, 'Y') and data.labels.Y is not None : 1197 self.givenY.append(data.labels.Y[pattern]) 1198 self.givenL.append(data.labels.L[pattern])
1199
1200 - def computeStats(self, **args) :
1201 1202 if len(self.givenY) == 0 : return 1203 Y = self.givenY 1204 self.confusionMatrix = superConfmat(self.Y, self.givenY, self.numClasses) 1205 1206 self.misclassified = [self.patternID[i] for i in range(len(self.patternID)) 1207 if self.Y[i] != Y[i]] 1208 1209 self.successRate, self.balancedSuccessRate, self.ppv, \ 1210 self.sensitivity = self.successRates()
1211
1212 - def convert(self, *options) :
1213 1214 if 'short' in options : 1215 attributes = self.shortAttrList 1216 else : 1217 attributes = self.longAttrList 1218 1219 return convert(self, attributes)
1220 1221
1222 -class ClassificationResults (Results, ClassificationFunctions) :
1223 1224 # how to construct an attribute from the results of each fold 1225 # the default action is to make a list 1226 attributeAction = {'classLabels' : 'returnFirst', 1227 'numClasses' : 'returnFirst', 1228 'successRate' : 'average', 1229 'balancedSuccessRate' : 'average', 1230 'ppv' : 'average', 1231 'sensitivity' : 'average', 1232 'confusionMatrix' : 'addMatrix'} 1233
1234 - def __init__(self, arg = None, classifier = None, **args) :
1235 1236 Results.__init__(self, arg, classifier, **args) 1237 if arg.__class__ == self.__class__ or type(arg) == type([]) : 1238 for r in arg : 1239 self.append(ClassificationResultsContainer(r, **args)) 1240 self.computeStats() 1241 elif arg is None : 1242 pass 1243 else : 1244 # construct a blank object: 1245 self.append(ClassificationResultsContainer(arg, classifier, **args))
1246
1247 - def __repr__(self) :
1248 1249 return ClassificationFunctions.__repr__(self)
1250
1251 - def __getattr__(self, attr) :
1252 1253 if attr.find('roc') == 0 : 1254 return numpy.average([getattr(results, attr) 1255 for results in self]) 1256 else : 1257 return Results.__getattr__(self, attr)
1258 1259
1260 - def plotROC(self, fileName=None, foldNum = None, **args) :
1261 1262 if foldNum is None : foldNum = 0 1263 if foldNum > self.numFolds : 1264 raise ValueError, 'foldNum too large' 1265 1266 self[foldNum].plotROC(fileName, **args)
1267 1268
1269 - def toFile(self, fileName, delim = '\t') :
1270 """ 1271 save results to a (tab) delimited file 1272 1273 format is: 1274 patternID, decision function, predicted class, given class, fold 1275 1276 :Parameters: 1277 - `fileName` - file name to which to save the results 1278 - `delim` - delimiter (default: tab) 1279 """ 1280 1281 outfile = open(fileName, 'w') 1282 for fold in range(self.numFolds) : 1283 results = self[fold] 1284 for i in range(len(results)) : 1285 outfile.write( 1286 delim.join([results.patternID[i], 1287 str(results.decisionFunc[i]), 1288 results.L[i], 1289 results.givenL[i], 1290 str(fold + 1)]) + '\n')
1291 1292
1293 - def getPredictedLabels(self, fold = None) :
1294 return self.get('L', fold)
1295
1296 - def getPredictedClass(self, fold = None) :
1297 return self.get('Y', fold)
1298
1299 - def getGivenClass(self, fold = None) :
1300 return self.get('givenY', fold)
1301
1302 - def getGivenLabels(self, fold = None) :
1303 return self.get('givenL', fold)
1304
1305 - def getROC(self, fold = None) :
1306 1307 return self.get('roc', fold)
1308
1309 - def getROCn(self, rocN = None, fold = None) :
1310 1311 if rocN is None : rocN = self.rocN 1312 return self.get('roc' + str(rocN), fold)
1313
1314 - def getSuccessRate(self, fold = None) :
1315 1316 return self.get('successRate', fold)
1317
1318 - def getBalancedSuccessRate(self, fold = None) :
1319 1320 return self.get('balancedSuccessRate', fold)
1321
1322 - def getConfusionMatrix(self, fold = None) :
1323 1324 return self.get('confusionMatrix', fold)
1325
1326 - def getPPV(self, fold = None) :
1327 1328 return self.get('ppv', fold)
1329
1330 - def getSensitivity(self, fold = None) :
1331 1332 return self.get('sensitivity', fold)
1333
1334 - def getClassLabels(self) :
1335 1336 return self.classLabels
1337
1338 -def convert (object, attributes) :
1339 1340 obj = misc.Container() 1341 obj.addAttributes(object, attributes) 1342 1343 return obj
1344 1345
1346 -def saveResultObjects (objects, fileName, *options) :
1347 """ 1348 save a list or dictionary of Results objects 1349 it is o.k. if the list or dictionary is itself a list or dictionary of 1350 OPTIONS: 1351 long - save the long attribute list 1352 """ 1353 1354 if type(objects) == type([]) : 1355 if type(objects[0]) == type([]) : 1356 obj = [ [o.convert(*options) for o in resultsList] 1357 for resultsList in objects] 1358 elif type(objects[0]) == type({}) : 1359 obj = [] 1360 for resultsList in objects : 1361 object = {} 1362 else : 1363 obj = [o.convert(*options) for o in objects] 1364 1365 elif type(objects) == type({}) : 1366 obj = {} 1367 for rkey in objects : 1368 if type(objects[rkey]) == type({}) : 1369 obj[rkey] = {} 1370 for key in objects[rkey] : 1371 obj[rkey][key] = object[rkey][key].convert(*options) 1372 elif type(objects[rkey]) == type([]) : 1373 obj[rkey] = [ object.convert(*options) for object in objects[rkey] ] 1374 else : 1375 obj[rkey] = objects[rkey].convert(*options) 1376 else : 1377 raise ValueError, 'expected either a list or dictionary' 1378 1379 myio.save(obj, fileName)
1380 1381
1382 -class ResultsList (list) :
1383
1384 - def __init__(self, resList = None) :
1385 1386 self.rocN = 50 1387 if resList is None : return 1388 for results in resList : 1389 if type(results) == type([]) : 1390 self.append(ClassificationResults(results)) 1391 else : 1392 self.append(results) 1393 self.computeStats()
1394 1395
1396 - def __repr__(self) :
1397 1398 rep = [] 1399 1400 rep.append('number of Results objects: %d' % len(self)) 1401 rep.append('success rate: %f (%f)' % (self.successRate, numpy.std(self.successRates)) ) 1402 rep.append('balanced success rate: %f (%f)' % 1403 (self.balancedSuccessRate, numpy.std(self.balancedSuccessRates)) ) 1404 1405 rep.append('area under ROC curve: %f (%f)' % (self.roc, numpy.std(self.rocs)) ) 1406 rep.append('area under ROC %d curve: %f (%f)' % \ 1407 (self.rocN, numpy.average(self.rocNs), numpy.std(self.rocNs))) 1408 1409 return '\n'.join(rep)
1410
1411 - def save(self, fileName, *options) :
1412 1413 if 'short' in options : 1414 attributes = self[0][0].shortAttrList 1415 else : 1416 attributes = self[0][0].longAttrList 1417 1418 resultsList = [[convert(results, attributes) for results in res] 1419 for res in self] 1420 1421 myio.save(resultsList, fileName)
1422
1423 - def computeStats(self) :
1424 1425 self.balancedSuccessRates = [res.balancedSuccessRate for res in self] 1426 self.balancedSuccessRate = numpy.average(self.balancedSuccessRates) 1427 self.successRates = [res.successRate for res in self] 1428 self.successRate = numpy.average(self.successRates) 1429 self.rocs = [res.roc for res in self] 1430 self.roc = numpy.average(self.rocs) 1431 self.rocNs = [res.getROCn(self.rocN) for res in self]
1432 1433
1434 -class RegressionResultsContainer (ResultsContainer) :
1435
1436 - def __init__(self, arg, classifier = None, **args) :
1437 1438 self.Y = [] 1439 self.givenY = [] 1440 self.patternID = [] 1441 self.info = 'dataset:\n' + arg.__repr__() + \ 1442 'classifier:\n' + classifier.__repr__()
1443 1444
1445 - def appendPrediction(self, y, data, pattern) :
1446 1447 self.Y.append(y) 1448 if hasattr(data.labels, 'patternID') and data.labels.patternID is not None : 1449 self.patternID.append(data.labels.patternID[pattern]) 1450 if hasattr(data.labels, 'Y') and data.labels.Y is not None : 1451 self.givenY.append(data.labels.Y[pattern])
1452 1453
1454 - def computeStats(self) :
1455 1456 if len(self.givenY) == 0 : return 1457 self.rmse = numpy.average([(self.givenY[i] - self.Y[i])**2 1458 for i in range(len(self.Y))])
1459 1460
1461 -class RegressionResults (Results) :
1462 1463 attributeAction = {'rmse' : 'average'} 1464
1465 - def __init__(self, arg = None, classifier = None, **args) :
1466 1467 Results.__init__(self, arg, classifier, **args) 1468 if arg.__class__ == self.__class__ or type(arg) == type([]) : 1469 for r in arg : 1470 self.append(RegressionResultsContainer(r, **args)) 1471 self.computeStats() 1472 elif arg is None : 1473 pass 1474 else : 1475 # construct a blank object: 1476 self.append(RegressionResultsContainer(arg, classifier, **args))
1477
1478 - def __repr__(self) :
1479 1480 rep = [] 1481 rep.append('rmse: ' + str(self.rmse) ) 1482 1483 return ''.join(rep)
1484
1485 - def getRMSE(self, fold = None) :
1486 1487 return self.get('rmse', fold)
1488
1489 - def getGivenLabels(self, fold = None) :
1490 1491 return self.get('givenY', fold)
1492
1493 - def getDecisionFunction(self, fold = None) :
1494 1495 return self.get('Y', fold)
1496 1497 1498
1499 -def loadResults(fileName, isNewFormat = True) :
1500 """ 1501 isNewFormat -- whether the Results were saved under version 0.6.1 or newer 1502 """ 1503 1504 res = myio.load(fileName) 1505 if not isNewFormat : 1506 return ClassificationResults([res]) 1507 if type(res) == type({}) : 1508 results = {} 1509 for key in res : 1510 results[key] = ClassificationResults(res[key]) 1511 return results 1512 if type(res[0]) == type([]) : 1513 return ResultsList(res) 1514 else : 1515 return ClassificationResults(res)
1516
1517 -def loadResults2(fileName) :
1518 """ 1519 load a list of list of Results objects or a dictionary of a list of Results objects 1520 """ 1521 1522 res = myio.load(fileName) 1523 if type(res) == type({}) : 1524 results = {} 1525 for key in res : 1526 results[key] = [ Results(object) for object in res[key] ] 1527 return results 1528 1529 elif type(res) == type([]) : 1530 return [ [Results(object) for object in listOfResults] for listOfResults in res ]
1531