Package PyML :: Package containers :: Module labels
[frames] | no frames]

Source Code for Module PyML.containers.labels

  1   
  2  __docformat__ = "restructuredtext en" 
  3  import random 
  4  from PyML.containers import parsers 
  5   
6 -class Labels (object) :
7 """ 8 A class that holds the labels of a dataset. 9 10 Attributes:: 11 12 L - labels provided by the user (strings) 13 Y - internal label representation - an integer from 0 to numClasses - 1 14 for multilabel problems each pattern has a list of integer labels 15 patternID - a list of the ids of each pattern 16 classDict - a mapping from L to Y 17 classLabels - a list providing the name of class i 18 classSize - a list with the number of patterns in each class 19 numClasses - the number of classes in the data 20 """ 21
22 - def __init__(self, arg = None, **args) :
23 24 """ 25 :Parameters: 26 - `arg` - a file name from which to read labels, or a list of labels 27 28 :Keywords: 29 - `patternID` - a list of pattern IDs 30 - `patterns` - in case of copy construction, which patterns to copy 31 - `numericLabels` - a Boolean, indicating whether the labels are 32 class labels or numeric values (class labels by default). 33 - `positiveClass` - for a two class problem, the identity of the 34 positive class. If the labels are '+1' and '-1' or '1' and '-1', 35 the positive class is detected automatically. 36 - `classLabels` - 37 - `forgetClassLabels` - when using copy construction the default behavior 38 is to return a Labels object that remembers the set of classes that the 39 original object had, even if some classes are no longer represented. 40 this keyword allows you to change this behavior, so that the classes of 41 the original object are forgotten. 42 """ 43 44 self.L = None 45 self.numericLabels = False 46 if 'numericLabels' in args : 47 self.numericLabels = args['numericLabels'] 48 self.positiveClass = None 49 if 'positiveClass' in args : 50 self.positiveClass = args['positiveClass'] 51 if 'classLabels' in args : 52 self.classLabels = args['classLabels'] 53 54 if type(arg) == type('') : 55 args['hint'] = 'csv' 56 p = parsers.parserDispatcher(arg, **args) 57 L, self.patternID = p.readLabels() 58 elif arg.__class__ == self.__class__ : 59 L, self.patternID = self.copy(arg, **args) 60 else : 61 if arg is not None : 62 L = list(arg) 63 if len(L) == 0 : L = None 64 else : 65 L = None 66 if 'patternID' in args : 67 self.patternID = args['patternID'] 68 else : 69 self.patternID = range(len(L)) 70 71 if L is not None : 72 self.processLabels(L)
73
74 - def copy(self, other, **args) :
75 76 forgetClassLabels = False 77 if 'forgetClassLabels' in args : 78 forgetClassLabels = args['forgetClassLabels'] 79 self.numericLabels = other.numericLabels 80 81 if other.L is not None and not self.numericLabels and not forgetClassLabels : 82 self.numClasses = other.numClasses 83 self.classLabels = other.classLabels[:] 84 85 if 'patterns' in args : 86 patternsToCopy = args['patterns'] 87 else : 88 patternsToCopy = range(len(other)) 89 90 if other.L is None : 91 return ( None, [other.patternID[p] for p in patternsToCopy] ) 92 else : 93 return ([other.L[p] for p in patternsToCopy], 94 [other.patternID[p] for p in patternsToCopy])
95 96
97 - def extend(self, other, patterns = None) :
98 """add to a dataset a list of patterns from another dataset""" 99 100 if patterns is None : 101 patterns = range(len(other)) 102 # retrieve labels from other 103 for p in patterns : 104 if self.L is not None : 105 self.L.append(other.L[p]) 106 if self.patternID is not None : 107 self.patternID.append(other.patternID[p]) 108 if self.L is not None : 109 self.processLabels(self.L)
110 111
112 - def processLabels(self, L, **args) :
113 114 forgetClassLabels = False 115 if 'forgetClassLabels' in args : 116 forgetClassLabels = args['forgetClassLabels'] 117 118 if self.numericLabels or type(L[0]) == type(1.0) : 119 self.Y = [float(y) for y in L] 120 self.L = self.Y 121 return 122 n = len(L) 123 classDict = {} 124 Y = [] 125 if not forgetClassLabels and hasattr(self, 'classLabels') : 126 classLabels = self.classLabels 127 numClasses = self.numClasses 128 else : 129 for l in L : 130 classDict[l] = 1 131 classLabels = classDict.keys() 132 classLabels.sort() 133 numClasses = len(classLabels) 134 if classLabels == ['+1', '-1'] or classLabels == ['1', '-1'] : 135 classLabels[0],classLabels[1] = classLabels[1],classLabels[0] 136 if self.positiveClass is not None and numClasses == 2 : 137 if self.positiveClass not in classLabels : 138 raise ValueError, 'unrecognized positiveClass' 139 if classLabels[1] != self.positiveClass : 140 classLabels[0],classLabels[1] = classLabels[1],classLabels[0] 141 if 'rest' in classLabels and numClasses == 2 : 142 if classLabels[1] == 'rest' : 143 classLabels[0],classLabels[1] = classLabels[1],classLabels[0] 144 145 classDict = {} 146 for i in range(len(classLabels)) : 147 classDict[classLabels[i]] = i 148 149 classSize = [0 for i in range(numClasses)] 150 classes = [[] for i in range(numClasses)] 151 for i in range(n) : 152 y = classDict[L[i]] 153 classSize[y] += 1 154 Y.append(y) 155 classes[y].append(i) 156 157 self.L = L 158 self.Y = Y 159 self.classSize = classSize 160 self.classLabels = classLabels 161 self.classDict = classDict 162 self.classes = classes 163 self.numClasses = numClasses
164
165 - def flip(self, patterns) :
166 167 if self.numClasses != 2 : 168 raise ValueError, 'not a two class labeling' 169 for p in patterns : 170 self.L[p] = self.classLabels[(self.Y[p] + 1) % 2] 171 self.processLabels(self.L)
172
173 - def __len__ (self) :
174 175 return len(self.patternID)
176
177 - def __repr__(self) :
178 179 rep = '' 180 if self.L is not None and type(self.L[0]) == type('') : 181 rep += 'class Label / Size \n' 182 for i in range(self.numClasses) : 183 rep += ' %s : %d\n' % (self.classLabels[i],self.classSize[i]) 184 185 return rep
186
187 - def isLabeled(self) :
188 189 if self.L is None : 190 return False 191 else : 192 return True
193
194 - def save(self, fileName, delim = '\t') :
195 196 fileHandle = open(fileName, 'w') 197 for i in range(len(self)) : 198 if self.L is not None : 199 fileHandle.write(self.patternID[i] + delim + str(self.L[i]) + '\n') 200 else : 201 fileHandle.write(self.patternID[i] + '\n')
202
203 - def convertFromMultiLabel(self) :
204 205 for i in range(len(self.L)) : 206 self.L[i] = string.join(self.L[i], ";")
207 208
209 - def mergeClasses(self, classList, newLabel = None) :
210 """Merge a list of classes into a new class. 211 212 :Parameters: 213 - `classList` - a list of classes to merge; can either provide the 214 names of the classes or the index. 215 - `newLabel` - the name of the new class (if not given then the label 216 is formed by concatenating the names of the merged classes) 217 """ 218 219 if type(classList[0]) == type(1) : 220 classList = [self.classLabels[label] for label in classList] 221 222 if newLabel is None : 223 try : 224 newLabel = "+".join(classList) 225 except : 226 newLabel = str(classList) 227 228 for classLabel in classList : 229 for p in self.classes[self.classDict[classLabel]] : 230 self.L[p] = newLabel 231 232 self.processLabels(self.L, forgetClassLabels = True)
233
234 - def oneAgainstRest(self, classLabels, className = None) :
235 236 """ 237 creates a one-against-the-rest labels object 238 239 :Parameters: 240 - `classLabels` - a single class name, or a list of class names (string 241 or a list of strings) 242 - `className` - if given, the new name given to the class 243 244 """ 245 246 patternID = self.patternID[:] 247 248 if type(classLabels) == type(1) or type(classLabels) == type('') : 249 classLabels = [classLabels] 250 if type(classLabels[0]) == type(1) : 251 classLabels = [labels.classLabels[label] for label in classLabels] 252 253 if className is None : 254 className = '+'.join(classLabels) 255 256 newL = [] 257 for i in range(len(self)): 258 if self.L[i] in classLabels : 259 newL.append(className) 260 else : 261 newL.append("rest") 262 263 self.processLabels(newL, forgetClassLabels = True)
264 265
266 -def mergeClasses(data, classList, newLabel = None) :
267 """Merge a list of classes into a new class. 268 269 :Parameters: 270 - `data` - a dataset container 271 - `classList` - a list of classes to merge; can either provide the 272 names of the classes or the index. 273 - `newLabel` - the name of the new class (if not given then the label 274 is formed by concatenating the names of the merged classes) 275 276 calls Labels.mergeClasses and returns the dataset with the modified labels 277 278 """ 279 280 data.labels.mergeClasses(classList, newLabel) 281 data.attachLabels(data.labels) 282 283 return data
284 285
286 -def oneAgainstRest(data, classLabels, className = None) :
287 288 """ 289 creates a one-against-the-rest dataset/labels object 290 291 :Parameters: 292 293 - `data` - a dataset 294 - `classLabels` - a single class name, or a list of class names (string 295 or a list of strings) 296 - `className` - if given, the new name given to the class 297 298 Return value:: 299 300 returns a dataset object where all class labels that are different 301 from the given class label/s are converted to a single class 302 """ 303 304 data.labels.oneAgainstRest(classLabels, className) 305 data.attachLabels(data.labels) 306 307 return data
308 309
310 -def randomLabels(Y) :
311 """shuffle the vector Y""" 312 313 Yrand = Y[:] 314 random.shuffle(Yrand) 315 316 return Yrand
317
318 -def eliminateMultiLabeled(data) :
319 320 patterns = [i for i in range(len(data.n)) 321 if len(data.labels.L[i].split(';')) == 1] 322 323 return data.__class__(data, patterns = patterns)
324 325
326 -def eliminateSmallClasses(data, size) :
327 """returns a dataset that contains the classes of d that contain 328 at least size patterns""" 329 330 patterns = [] 331 for i in range(len(data)) : 332 if data.labels.classSize[data.labels.Y[i]] >= size : 333 patterns.append(i) 334 335 return d.__class__(d, patterns = patterns)
336