Package PyML :: Package classifiers :: Module composite
[frames] | no frames]

Source Code for Module PyML.classifiers.composite

  1   
  2  import numpy 
  3  import math 
  4   
  5  from PyML.utils import misc 
  6  from PyML.datagen import sample 
  7  from PyML.evaluators import assess 
  8  from baseClassifiers import Classifier, IteratorClassifier 
  9  import svm 
 10   
 11  __docformat__ = "restructuredtext en" 
 12   
13 -class CompositeClassifier (Classifier) :
14 15 '''A base class for creating composite classifiers 16 17 A composite classifier has an attribute called "classifier", and by default 18 requests are forwarded to the appropriate function of the classifier 19 (including the "test" function). 20 For logging purposes, use the log attribute of the classifier rather 21 than the composite log. 22 See for example the FeatureSelect object.''' 23 24 deepcopy = True 25
26 - def __init__(self, classifier, **args) :
27 28 Classifier.__init__(self, classifier, **args) 29 if type(classifier) == type('') : return 30 if (not hasattr(classifier, 'type')) or classifier.type != 'classifier' : 31 raise ValueError, 'argument should be a classifier' 32 if classifier.__class__ == self.__class__ : 33 self.classifier = classifier.classifier.__class__( 34 classifier.classifier) 35 else : 36 self.classifier = classifier.__class__(classifier)
37
38 - def __repr__(self) :
39 rep = '<' + self.__class__.__name__ + ' instance>\n' 40 rep += 'Classifier:\n' 41 rep += self.classifier.__repr__() 42 43 return rep
44
45 - def preproject(self, data) :
46 47 self.classifier.preproject(data)
48
49 - def decisionFunc(self, data, i) :
50 51 return self.classifier.decisionFunc(data, i)
52
53 - def classify(self, data, i) :
54 55 return self.classifier.classify(data, i)
56 57 58 #def preprocess(self, data) : 59 60 # self.classifier.preprocess(data) 61
62 - def getTest(self) :
63 64 return self.classifier.test
65
66 - def setTest(self) :
67 68 raise ValueError, 'do not call this function'
69 70 # if the classifier used by the composite implements a test function - 71 # use it rather than the default assess.test 72 test = property (getTest, setTest, 73 None, 'the test function of the underlying classifier')
74 75 76
77 -class Chain (CompositeClassifier) :
78 '''A chain is a list of actions to be performed on a dataset, 79 the last of which is assumed to be a classifier. 80 The actions can be for example a chain of preprocessing steps or 81 a step of feature selection (same as using the FeatureSelect class) 82 Each action in the chain is assumed to have a "train" method and is 83 assumed to have a copy constructor''' 84 85 deepcopy = True 86
87 - def __init__(self, arg) :
88 """ 89 :Parameters: 90 - `arg` - a Chain object of a list of objects, each of which implements 91 a 'train', 'test' and has a copy constructor 92 93 """ 94 Classifier.__init__(self) 95 96 if arg.__class__ == self.__class__ : 97 other = arg 98 self.classifier = other.classifier.__class__(other.classifier) 99 self.chain = [component.__class__(component) 100 for component in other.chain] 101 102 elif type(arg) == type([]) : 103 self.classifier = arg[-1].__class__(arg[-1]) 104 self.chain = [arg[i].__class__(arg[i]) 105 for i in range(len(arg) - 1)]
106 107
108 - def train(self, data, **args) :
109 110 Classifier.train(self, data, **args) 111 112 for component in self.chain : 113 component.train(data, **args) 114 115 self.classifier.train(data, **args) 116 self.log.trainingTime = self.getTrainingTime()
117
118 - def test(self, data, **args) :
119 120 for component in self.chain : 121 component.test(data, **args) 122 123 print 'I am testing',self.classifier 124 print 'testing function', self.classifier.test 125 print 'the data is :', data 126 return self.classifier.test(data, **args)
127
128 -class FeatureSelect (CompositeClassifier) :
129 130 """A method for combining a feature selector and classifier; 131 training consists of performing feature selection and afterwards training 132 the classifier on the selected features; 133 use this classifier to test the accuracy of a feature selector/classifier 134 combination. 135 USAGE: 136 construction : 137 featureSelect(classifier, featureSelector) 138 featureSelect(otherFeatureSelectInstance) - copy construction 139 """ 140 141 deepcopy = True 142
143 - def __init__(self, arg1, arg2 = None) :
144 145 Classifier.__init__(self) 146 147 if arg1.__class__ == self.__class__ : 148 other = arg1 149 self.classifier = other.classifier.__class__(other.classifier) 150 self.featureSelector = other.featureSelector.__class__( 151 other.featureSelector) 152 else : 153 for arg in (arg1, arg2) : 154 if arg.type == 'classifier' : 155 self.classifier = arg.__class__(arg) 156 elif arg.type == 'featureSelector' : 157 self.featureSelector = arg.__class__(arg) 158 else : 159 raise ValueError, \ 160 'argument should be either classifier or featureSelector'
161 162
163 - def __repr__(self) :
164 165 rep = '<' + self.__class__.__name__ + ' instance>\n' 166 if hasattr(self, 'numFeatures') : 167 rep += 'number of features trained on:' + str(self.numFeatures) + '\n' 168 rep += 'Classifier:\n' 169 rep += self.classifier.__repr__() 170 rep += 'Feature Selector:\n' 171 rep += self.featureSelector.__repr__() 172 173 return rep
174 175
176 - def train(self, data, **args) :
177 178 Classifier.train(self, data, **args) 179 180 self.featureSelector.select(data, **args) 181 #self.numFeatures = data.numFeatures 182 self.classifier.log.numFeatures = data.numFeatures 183 self.classifier.log.features = data.featureID[:] 184 185 self.classifier.train(data, **args) 186 self.classifier.log.trainingTime = self.getTrainingTime()
187 188
189 -class FeatureSelectAll (IteratorClassifier) :
190 191 '''A method for combining a feature selector and classifier; 192 the difference from FeatureSelect is that it is specifically 193 designed for computing the accuracy while varying the 194 number of features. 195 ''' 196 197 deepcopy = True 198
199 - def __init__(self, arg1, arg2 = None) :
200 201 Classifier.__init__(self) 202 203 if arg1.__class__ == self.__class__ : 204 other = arg1 205 self.classifier = other.classifier.__class__(other.classifier) 206 self.featureSelector = other.featureSelector.__class__( 207 other.featureSelector) 208 else : 209 for arg in (arg1, arg2) : 210 if arg.type == 'classifier' : 211 self.classifier = arg.__class__(arg) 212 elif arg.type == 'featureSelector' : 213 self.featureSelector = arg.__class__(arg) 214 else : 215 raise ValueError, \ 216 'argument should be either classifier or featureSelector'
217 218
219 - def train(self, data, **args) :
220 221 Classifier.train(self, data, **args) 222 223 numFeatures = [] 224 n = 1 225 while n < data.numFeatures : 226 numFeatures.append(n) 227 n *=2 228 229 self.classifiers = [self.classifier.__class__(self.classifier) 230 for i in range(len(numFeatures))] 231 232 featureSelector = self.featureSelector.__class__(self.featureSelector) 233 rankedFeatures = featureSelector.rank(data) 234 235 for i in range(len(numFeatures)) : 236 selectedData = data.__class__(data) 237 selectedData.keepFeatures(rankedFeatures[:numFeatures[i]]) 238 self.classifiers[i].train(selectedData) 239 self.classifiers[i].log.numFeatures = selectedData.numFeatures 240 241 self.classifier.log.trainingTime = self.getTrainingTime()
242 243 244
245 -class AggregateClassifier (Classifier) :
246 247 """ 248 classifier combines the predictions of classifiers trained on 249 different datasets. 250 The datasets are presented as a DataAggregate dataset container. 251 """ 252
253 - def __init__ (self, arg) :
254 255 Classifier.__init__(self) 256 if arg.__class__ == self.__class__ : 257 self.classifiers = [classifier.__class__(classifier) 258 for classifier in arg.classifiers] 259 elif type(arg) == type([]) : 260 self.classifiers = [classifier.__class__(classifier) 261 for classifier in arg]
262
263 - def train(self, data, **args) :
264 265 Classifier.train(self, data, **args) 266 if not data.__class__.__name__ == 'DataAggregate' : 267 raise ValueError, 'train requires a DataAggregate dataset' 268 269 for i in range(len(self.classifiers)) : 270 self.classifiers[i].train(data.datas[i], **args) 271 self.log.trainingTime = self.getTrainingTime()
272
273 - def classify(self, data, p) :
274 275 if not data.__class__.__name__ == 'DataAggregate' : 276 raise ValueError, 'classify requires a DataAggregate dataset' 277 278 decisionFuncs = [self.classifiers[i].decisionFunc(data.datas[i], p) 279 for i in range(len(self.classifiers))] 280 #decisionFunc = numpy.sum(decisionFuncs) 281 #if decisionFunc > 0 : 282 # return (1, decisionFunc) 283 #else : 284 # return (0, decisionFunc) 285 if decisionFuncs[0] > 0 and decisionFuncs[1] > 0 : 286 return 1, numpy.sum(decisionFuncs) 287 else : 288 return 0, min(decisionFuncs)
289