Package PyML :: Package containers :: Module aggregate
[frames] | no frames]

Source Code for Module PyML.containers.aggregate

  1   
  2  import numpy 
  3  from PyML.containers.ext import caggregate 
  4  from PyML.containers.baseDatasets import BaseDataSet, WrapperDataSet 
  5   
6 -class Aggregate (WrapperDataSet, caggregate.Aggregate) :
7 """ 8 combines several C++ dataset objects into a single dataset. 9 its dot product is a weighted sum of the kernels of the individual 10 dataset objects 11 12 Construction of an aggregate requires a list of dataset objects. 13 It is assumed that all datasets refer to the same underlying objects so 14 in particular have the same labels and same number of patterns (the labels 15 object is initialized using the labels of the first dataset in the list). 16 """ 17 18 isVector = False 19
20 - def __init__(self, arg, **args) :
21 """ 22 :Parameters: 23 - `arg` - either an Aggregate object (for copy construction) or a list 24 of C++ dataset objects 25 26 :Keywords: 27 - `weights` - a list of weights used for computing the dot product 28 element i is the weight for dataset i in the aggregate 29 """ 30 31 BaseDataSet.__init__(self) 32 if arg.__class__ == self.__class__ : 33 self.copyConstruct(arg, **args) 34 elif type(arg) == type([]) : 35 self.checkDatas(arg) 36 self.pydatas = arg 37 if 'weights' in args : 38 self.pyweights = args['weights'] 39 for i in range(len(self.pyweights)) : 40 self.pyweights[i] = float(self.pyweights[i]) 41 assert len(self.pyweights) == len(self.pydatas) 42 else : 43 self.pyweights = [1.0 / len(self.pydatas) for i in range(len(self.pydatas))] 44 self._addDatas() 45 WrapperDataSet.attachLabels(self, self.pydatas[0].labels) 46 self.attachKernel('linear') 47 48 else : 49 raise ValueError, 'wrong type of input for constructor'
50 51
52 - def _addDatas(self) :
53 54 caggregate.Aggregate.__init__(self, len(self.pydatas[0]), tuple(self.pyweights)) 55 for data in self.pydatas : 56 self.addDataSet(data.castToBase())
57 58
59 - def addData(self, data, weight) :
60 61 self.pydatas.append(data) 62 self.weights.append(weight) 63 self.addDataSet(data.castToBase(), float(weight))
64 65
66 - def copy(self, other, patterns, deepcopy) :
67 68 self.pyweights = other.pyweights[:] 69 self.pydatas = [data.__class__(data, patterns = patterns) 70 for data in other.pydatas] 71 self._addDatas()
72
73 - def __len__(self) :
74 75 return self.size()
76
77 - def checkDatas(self, datas) :
78 79 lengths = [len(data) for data in datas] 80 if not numpy.alltrue(numpy.equal(lengths, lengths[0])) : 81 raise ValueError, 'datasets not equal lengths' 82 for i in range(1, len(datas)) : 83 if datas[i].labels.patternID != datas[0].labels.patternID : 84 raise ValueError, 'datasets not have the same pattern IDs'
85 86 87
88 -class DataAggregate (BaseDataSet) :
89 90 """An aggregate of datasets. 91 a DataAggregate object contains a list of datasets in its datas attribute, 92 and behaves like a dataset when it comes to copy construction, so it can 93 be used as a dataset object when it comes to testing classifiers. 94 USAGE: 95 DataAggregate(list) - construct an object out of a list of datasets 96 (they do not have to be of the same kind! 97 It is assumed that all datasets are the same length, and have the same labels 98 DataAggregate(other[,optional arguments]) - copy construction - all options 99 supported by the dataset classes can be used. 100 """ 101
102 - def __init__(self, arg, *opt, **args) :
103 104 BaseDataSet.__init__(self) 105 if arg.__class__ == self.__class__ : 106 other = arg 107 self.datas = [other.datas[i].__class__(other.datas[i], *opt, **args) 108 for i in range(len(other.datas))] 109 elif type(arg) == type([]) : 110 self.datas = arg 111 else : 112 raise ValueError, 'wrong type of input for DataAggregate' 113 self.labels = self.datas[0].labels
114
115 - def __len__(self) :
116 117 return len(self.datas[0])
118
119 - def __repr__(self) :
120 121 rep = '' 122 for i in range(len(self.datas)) : 123 rep += str(self.datas[i]) + '\n' 124 125 return rep
126