1 import os
2 import numpy
3
4 from PyML.classifiers import svm,multi,ridgeRegression,knn,composite,modelSelection
5 from PyML.feature_selection import featsel
6 from PyML.containers import ker,labels
7 from PyML.containers import vectorDatasets
8 from PyML.containers.aggregate import Aggregate
9 from PyML.containers.kernelData import KernelData
10 from PyML.containers.sequenceData import SequenceData
11 from PyML.classifiers import platt
12 from PyML.preproc import preproc
13
14 from PyML.evaluators import assess
15
16
17 heartdatafile = 'heart.data'
18
19 irisdatafile = 'iris.data'
20
21 yeastdatafile = 'yeast.data'
22
23 -def test (component = 'svm', **args) :
24
25 if 'container' in args :
26 container = args['container']
27 else :
28 container = 'SparseDataSet'
29
30 try :
31 DataSet = getattr(vectorDatasets, container)
32 except :
33 raise ValueError, 'wrong container ' + container
34
35 s = svm.SVM()
36
37 results = {}
38
39 comp = 'general'
40 if component == 'all' or component == comp :
41 d = DataSet (heartdatafile, labelsColumn = 0)
42 s.train(d)
43 s.test(d)
44 s = svm.SVM()
45 s.stratifiedCV(d)
46 print 'starting aggregate****************'
47 d2 = Aggregate([d,d])
48 print 'end aggregate'
49
50
51 d.attachKernel('polynomial')
52 s.cv(d)
53 d.attachKernel('linear')
54 s = svm.SVM()
55 s.train(d)
56 s.train(d, saveSpace = False)
57 s.save("tmp")
58 loaded = svm.loadSVM("tmp", datasetClass=DataSet)
59 r = loaded.test(d)
60 d.attachKernel('gaussian', gamma = 0.01)
61
62 s.train(d, saveSpace = False)
63 s.save("tmp")
64 loaded = svm.loadSVM("tmp", datasetClass=DataSet, labelsColumn = 1)
65 r = loaded.test(d)
66 os.remove('tmp')
67
68 d = DataSet(numpy.random.randn(100,10))
69 d = DataSet([[1,2], [2,3]])
70 d = SequenceData(['asa', 'ben', 'hur'])
71
72 comp = 'svm'
73 if component == 'all' or component == comp :
74 d = DataSet (heartdatafile, labelsColumn = 0)
75 results[comp] = []
76 d.attachKernel('polynomial')
77 results[comp].append(
78 s.cv(d, saveSpace = True))
79 d.attachKernel('linear')
80 results[comp].append(
81 s.cv(d))
82
83 comp = 'kernelData'
84 if component == 'all' or component == comp :
85 d = DataSet (heartdatafile, labelsColumn = 0)
86 results[comp] = []
87 kdata = KernelData('heart.kernel', gistFormat = True)
88 kdata.attachLabels(d.labels)
89 s=svm.SVM()
90 results[comp].append(
91 s.cv(kdata))
92 kdata.attachKernel('gaussian', gamma = 0.1)
93 results[comp].append(
94 s.cv(kdata))
95
96 comp = 'normalization'
97 if component == 'all' or component == comp :
98 results[comp] = []
99 data = DataSet (heartdatafile, labelsColumn = 0)
100 data.attachKernel('polynomial', degree = 4, normalization = 'dices')
101 s=svm.SVM()
102 results[comp].append(
103 s.cv(data))
104
105 comp = 'svr'
106 if component == 'all' or component == comp :
107 d = DataSet (heartdatafile, labelsColumn = 0, numericLabels = True)
108 results[comp] = []
109 s = svm.SVR()
110
111
112
113
114 results[comp].append( s.cv(d) )
115
116 comp = 'save'
117 if component == 'all' or component == comp :
118 results[comp] = []
119 s = svm.SVM()
120 data = DataSet (heartdatafile, labelsColumn = 0)
121 import tempfile
122 tmpfile = tempfile.mktemp()
123 r = s.cv(data)
124 r.save(tmpfile)
125 r = assess.loadResults(tmpfile)
126 results['save'].append(r)
127
128 r = s.nCV(data)
129 r.save(tmpfile)
130 results['save'].append(assess.loadResults(tmpfile))
131
132 r = {}
133 for i in range(10) :
134 r[i] = s.cv(data)
135
136 assess.saveResultObjects(r, tmpfile)
137 r = assess.loadResults(tmpfile)
138
139 comp = 'classifiers'
140 if component == 'all' or component == comp :
141 d = DataSet (heartdatafile, labelsColumn = 0)
142 results[comp] = []
143 cl = knn.KNN()
144 results[comp].append(
145 cl.stratifiedCV(d))
146 print 'testing ridge regression'
147 ridge = ridgeRegression.RidgeRegression()
148
149
150
151 comp = 'platt'
152 if component == 'all' or component == 'platt' :
153 results[comp] = []
154 d = DataSet (heartdatafile, labelsColumn = 0)
155 p = platt.Platt2(s)
156 results[comp].append(p.stratifiedCV(d))
157
158 comp = 'multi'
159 if component == 'all' or component == comp :
160 results[comp] = []
161 d = DataSet(irisdatafile, labelsColumn = -1)
162
163 mc = multi.OneAgainstOne (svm.SVM())
164 results[comp].append(
165 mc.cv(d))
166
167 d = DataSet(irisdatafile, labelsColumn = -1)
168
169 mc = multi.OneAgainstRest (svm.SVM())
170 results[comp].append(
171 mc.cv(d))
172
173 mc = multi.OneAgainstRest (svm.SVM())
174 d.attachKernel('poly')
175 results[comp].append(
176 mc.cv(d))
177 d.attachKernel('linear')
178 mc = multi.OneAgainstRest (svm.SVM())
179
180
181
182
183
184 comp = 'featsel'
185 if component == 'all' or component == comp :
186 results[comp] = []
187
188 s = svm.SVM()
189 d = DataSet (yeastdatafile, labelsColumn = 0)
190 d2 = labels.oneAgainstRest(d, '2')
191 results[comp].append(
192 s.stratifiedCV(d2))
193
194
195 m = composite.FeatureSelect (s, featsel.RFE())
196 results[comp].append(
197 m.stratifiedCV(d2, 3))
198
199
200
201
202
203 fs = featsel.FeatureScore ('golub')
204 f = featsel.Filter (fs, sigma = 2)
205 m = composite.FeatureSelect (s, f)
206 results[comp].append(
207 m.stratifiedCV(d2, 3))
208
209
210 c = composite.Chain ([f,s])
211
212
213 comp = 'modelSelection'
214 if component == 'all' or component == comp :
215 results[comp] = []
216 s = svm.SVM()
217 d = DataSet (heartdatafile, labelsColumn = 0)
218 p = modelSelection.ParamGrid(svm.SVM(ker.Polynomial()), 'C', [0.1, 1, 10, 100],
219 'kernel.degree', [2, 3, 4])
220 p = modelSelection.ParamGrid(svm.SVM(ker.Gaussian()), 'C', [0.1, 1, 10, 100],
221 'kernel.gamma', [0.01, 0.1, 1])
222
223
224 m = modelSelection.ModelSelector(p, measure = 'roc', foldsToPerform = 2)
225 m = modelSelection.ModelSelector(p)
226
227 results[comp].append(
228 m.cv(d))
229
230 comp = 'preproc'
231 if component == 'all' or component == comp :
232 results[comp] = []
233
234 s = svm.SVM()
235 d = DataSet (yeastdatafile, labelsColumn = 0)
236 d2 = labels.oneAgainstRest(d, '2')
237 results[comp].append(
238 s.stratifiedCV(d2))
239 p = preproc.Standardizer()
240 p.train(d2)
241 results[comp].append(
242 s.stratifiedCV(d2))
243 print p.scale
244 print p.translation
245
246 return results
247
248 if __name__ == '__main__' :
249
250 if len(sys.argv) > 1 :
251 test(sys.argv[1])
252 else :
253 test()
254