Package PyML :: Package utils :: Module misc
[hide private]
[frames] | no frames]

Source Code for Module PyML.utils.misc

  1   
  2  import numpy 
  3  import random 
  4  import os 
  5  import myio 
  6   
  7  __docformat__ = "restructuredtext en" 
  8   
  9   
10 -def my_import(name) :
11 12 mod = __import__(name) 13 components = name.split('.') 14 for comp in components[1:]: 15 mod = getattr(mod, comp) 16 17 return mod
18
19 -def subseteq(A, B) :
20 21 if type(B) != type({}) : 22 Bdict = list2dict(B) 23 else: 24 Bdict = B 25 26 all = 1 27 for a in A : 28 if a not in Bdict : 29 all = 0 30 31 return all
32 33
34 -def setminus(A, B):
35 36 Bdict = {} 37 for b in B : 38 Bdict[b] = 1 39 40 result = [] 41 for a in A : 42 if not Bdict.has_key(a) : 43 result.append(a) 44 45 return result
46
47 -def unique(A) :
48 '''return the unique elements of a list''' 49 50 Adict = {} 51 for a in A : 52 Adict[a] = 1 53 54 return Adict.keys()
55
56 -def consecutiveUnique(A, B = None) :
57 58 if len(A) == 0 : 59 if B is None : 60 return [] 61 else : 62 return ([], []) 63 64 resultA = [A[0]] 65 if B is not None : 66 resultB = [B[0]] 67 68 for i in range(1,len(A)) : 69 if A[i] != A[i-1] : 70 resultA.append(A[i]) 71 if B is not None : 72 resultB.append(B[i]) 73 74 if B is None : 75 return resultA 76 else : 77 return (resultA, resultB)
78
79 -def listEqual(A, B) :
80 '''Determine if the lists A and B contain exactly the same elements 81 (the lists are treated as multisets)''' 82 83 A.sort() 84 B.sort() 85 86 allequal = 1 87 if len(A) != len(B) : return 0 88 89 for i in range(len(A)) : 90 if A[i] != B[i] : return 0 91 92 return 1
93
94 -def setEqual(A, B) :
95 96 Adict = list2dict(A) 97 for b in B : 98 if b not in Adict : 99 return 0 100 else : 101 Adict[b] = 0 102 103 if sum(Adict.values()) > 0 : 104 return 0 105 else : 106 return 1
107 108
109 -def intersect(A, B) :
110 111 S = [] 112 for b in B : 113 if b in A and not b in S : 114 S.append(b) 115 116 return S
117
118 -def intersectDicts(A, B) :
119 120 Anew = {} 121 Bnew = {} 122 for key in A.keys() : 123 if key in B: 124 Anew[key] = A[key] 125 Bnew[key] = B[key] 126 127 return Anew, Bnew
128 129
130 -def intersectIndices(A, B) :
131 132 I = [] 133 Bdict = {} 134 for b in B : 135 Bdict[b] = 1 136 137 for i in range(len(A)) : 138 if Bdict.has_key(A[i]) : 139 I.append(i) 140 141 return I
142
143 -def intersectSorted(A, B) :
144 145 S = [] 146 i = 0 147 j = 0 148 while i < len(A) and j < len(B) : 149 if A[i] == B[j] : 150 S.append(A[i]) 151 i += 1 152 j += 1 153 elif A[i] > B[j] : 154 j += 1 155 else : 156 i += 1 157 158 return S
159 160
161 -def union(A, B) :
162 163 Adict = list2dict(A) 164 165 # make sure we have a list: 166 if type(A) != type([]) : 167 U = [a for a in A] 168 else : 169 U = A[:] 170 for b in B : 171 if b not in Adict : 172 U.append(b) 173 174 return U
175 176
177 -def mergeSorted(A, B) :
178 179 S = [] 180 i = 0 181 j = 0 182 while i < len(A) and j < len(B) : 183 if A[i] == B[j] : 184 S.append(A[i]) 185 i += 1 186 j += 1 187 elif A[i] > B[j] : 188 S.append(B[j]) 189 j += 1 190 else : 191 S.append(A[i]) 192 i += 1 193 194 if i < len(A) : 195 S = S + A[i:] 196 if j < len(B) : 197 S = S + B[j:] 198 199 return S
200 201
202 -def invertDict(A) :
203 204 Ai = {} 205 for key in A : 206 Ai[A[key]] = key 207 208 return Ai
209
210 -def invert(A) :
211 212 Adict = {} 213 for i in range(len(A)) : 214 Adict[A[i]] = i 215 216 return Adict
217 218
219 -def majority(A) :
220 221 counts = {} 222 for a in A : 223 if not counts.has_key(a) : 224 counts[a] = 1 225 else : 226 counts[a] += 1 227 228 M = 0 229 for key in counts.keys() : 230 if counts[key] > M : 231 maj = key 232 M = counts[key] 233 234 return maj
235
236 -def idSubList(A, ids, idlist, *options) :
237 '''Take a sublist of a list where each member has an id 238 the sublist is taken according to the given sublist of ids 239 these indicate either ids to take or ids to remove (default 240 behavior is to take the ids in idlist, use the option "remove" 241 for the other behavior) 242 ''' 243 244 subA = [] 245 if 'remove' in options : 246 idlist = setminus(ids, idlist) 247 idlist = list2dict(idlist) 248 for i in range(len(ids)) : 249 if ids[i] in idlist : 250 subA.append(A[i]) 251 252 return subA
253 254
255 -def subList(A, I, J = None) :
256 '''return a sublist of a list 257 INPUT 258 A - list, list of lists, or a list of strings 259 I - subset of "rows" (first index) to take 260 J - subset of "columns" (second index) to take (optional) 261 returns A[i] for i in I 262 or A[i][j] for i in I and j in J if J is given 263 ''' 264 265 if J is None : 266 return [A[i] for i in I] 267 elif type(A[0]) == type([]) : 268 print 1 269 return [[A[i][j] for j in J] for i in I] 270 elif type(A[0]) == type('') : 271 result = [] 272 for i in I : 273 result.append(''.join([A[i][j] for j in J])) 274 return result 275 else : 276 print 'wrong type of input'
277 278
279 -def list2dict(A, val = None) :
280 '''convert a list to a dictionary 281 If a value list is not given, then the value 1 is associated with 282 each element in the dictionary 283 to assign each element its position in the list use: 284 list2dict(A, range(len(A))) 285 ''' 286 287 if type(A) == type({}) : return A 288 289 D = {} 290 291 if val is None : 292 for a in A : 293 D[a] = 1 294 elif len(A) == len(val) : 295 for i in range(len(A)) : 296 D[A[i]] = val[i] 297 else : 298 print 'list and value list do not have the same length' 299 300 return D
301 302
303 -def dictCount(A) :
304 305 D = {} 306 for a in A : 307 if a in D : 308 D[a] += 1 309 else : 310 D[a] = 1 311 312 return D
313 314
315 -def emptyLOL(n) :
316 317 return [[] for i in range(n)]
318 319
320 -def matrix(shape, value = None) :
321 322 return [[value for j in range(shape[1])] for i in range(shape[0])]
323 324 ## def transpose(matrix) : 325 326 ## m = len(matrix) 327 ## n = len(matrix[0]) 328 ## A = matrix((n,m)) 329 ## for i in range(m) : 330 ## for j in range(n) : 331 ## A[j][i] = matrix[i][j] 332 333 ## return A 334
335 -def LOD(n) :
336 337 e = [] 338 for i in range(n) : 339 e.append({}) 340 341 return e
342 343
344 -def inverseCumulative(x, v) :
345 346 x = numpy.asarray(x) 347 348 num = int(len(x) * v) 349 350 x = numpy.sort(x) 351 352 return x[num]
353 354
355 -def translate(id, idList) :
356 357 idDict = list2dict(idList, range(len(idList))) 358 359 id2 = [] 360 361 for elem in id : 362 id2.append(idDict[elem]) 363 364 return id2
365 366
367 -def count(A) :
368 '''count the number of occurrences of each element in a list''' 369 370 counts = {} 371 372 for a in A : 373 if a in counts : 374 counts[a] += 1 375 else : 376 counts[a] = 1 377 378 return counts
379 380
381 -class Container (object) :
382
383 - def __init__(self, attributeDict = {}) :
384 385 for attribute in attributeDict : 386 self.__setattr__(attribute, attributeDict[attribute])
387
388 - def __repr__(self) :
389 390 maxLength = 10 391 rep = '' 392 for attribute in self.__dict__ : 393 try : 394 l = len(self.__getattribute__(attribute)) 395 if l > maxLength : 396 rep += 'length of ' + attribute + ' ' + str(l) + '\n' 397 else : 398 rep += attribute + ' ' + str(self.__getattribute__(attribute)) + '\n' 399 except : 400 rep += attribute + ' ' + str(self.__getattribute__(attribute)) + '\n' 401 402 return rep[:-1]
403
404 - def addAttributes(self, object, attributes) :
405 406 for attribute in attributes : 407 if hasattr(object, attribute) : 408 self.__setattr__(attribute, object.__getattribute__(attribute))
409 410
411 -def extractAttribute(l, attribute) :
412 413 if type(l) == type({}) : 414 out = {} 415 for k in l.keys() : 416 out[k] = getattr(l[k], attribute) 417 418 elif type(l) == type([]) : 419 out = [] 420 for elem in l : 421 out.append(getattr(elem, attribute)) 422 423 return out
424 425
426 -def split(s, delim) :
427 428 if delim == ' ' : 429 return s.split() 430 else : 431 return s.split(delim)
432 433
434 -def flat(A) :
435 436 outlist = [] 437 for a in A : 438 outlist.extend(a) 439 440 return outlist
441 442
443 -def transpose(A) :
444 445 if type(A[0]) == type([]) or type(A[0]) == type((1)) : 446 447 return [[A[j][i] for j in range(len(A))] for i in range(len(A[0]))] 448 449 elif type(A[0]) == type('') : 450 451 m = len(A) 452 n = len(A[0]) 453 454 B = [] 455 456 for i in range(n) : 457 B.append(A[0][i]) 458 for j in range(1,m) : 459 B[i] += A[j][i] 460 461 return B 462 463 else : 464 raise ValueError, 'wrong type of Input'
465 466
467 -def dictProjection(A, B) :
468 469 Aprojected = {} 470 Bprojected = {} 471 for a in A : 472 if a in B : 473 Aprojected[a] = A[a] 474 Bprojected[a] = B[a] 475 476 return Aprojected,Bprojected
477
478 -def randsubset(length, subsetLength) :
479 '''returns a random subset of range(length) of size subsetLength''' 480 481 if type(length) == type(1) : 482 I = range(length) 483 random.shuffle(I) 484 I = I[:subsetLength] 485 I.sort() 486 return I 487 else : 488 raise ValueError, 'wrong type of argument'
489
490 -class Null :
491 """ 492 Null objects always and reliably, do nothing. 493 """ 494
495 - def __init__(self, *args, **kwargs) : pass
496 - def __call__(self, *args, **kwargs) : return self
497 - def __repr__(self) : return "Null()"
498 - def __nonzero__(self) : return 0
499
500 - def __getattr__(self, name) : return self
501 - def __setattr__(self, name, value) : return self
502 - def __delattr__(self, name) : return self
503
504 -def sortDict(dict) :
505 '''sort the values and keys of a dictionary 506 assumes values are numeric''' 507 508 values = dict.values() 509 keys = dict.keys() 510 511 ranking = numpy.argsort(values) 512 sorted = numpy.sort(values) 513 514 sortedKeys = [keys[ranking[i]] for i in range(len(keys))] 515 516 return sorted, sortedKeys
517
518 -def dict2array(dict) :
519 520 a = numpy.zeros(max(dict.keys()) + 1, numpy.float_) 521 for key in dict : 522 a[key] = dict[key] 523 524 return a
525
526 -def argmax(A) :
527 '''returns the indices of the maximum element of a two dimensional matrix''' 528 529 if len(numpy.shape(A)) != 2 : 530 raise ValueError, 'wrong shape for matrix' 531 532 (m,n) = numpy.shape(A) 533 534 maxElem = numpy.argmax(A.flat) 535 536 return divmod(maxElem, n)
537 538
539 -def splitFileName(fileName) :
540 541 fileName = os.path.abspath(fileName) 542 (directory, fileName) = os.path.split(fileName) 543 (base, ext) = os.path.splitext(fileName) 544 directory += '/' 545 546 return (directory, base, ext)
547
548 -def unravel(l) :
549 550 r = [] 551 for element in l : 552 r.extend(element) 553 554 return r
555
556 -def findDelim(handleOrName) :
557 558 commentChars = ['%', '#'] 559 if type(handleOrName) == type('') : 560 fileHandle = myio.myopen(handleOrName) 561 else : 562 fileHandle = handleOrName 563 pos = fileHandle.tell() 564 line = fileHandle.readline() 565 while line[0] in commentChars : 566 line = fileHandle.readline() 567 line = fileHandle.readline() 568 delims = [',', ' ', '\t'] 569 length = 0 570 for delim in delims : 571 l = len(line.split(delim)) 572 if l >= length : 573 delimiter = delim 574 length = l 575 fileHandle.seek(pos) 576 577 return delimiter
578 579 getDelim = findDelim 580
581 -def adjacencyMatrix(fileName) :
582 583 file = open(fileName) 584 delim = findDelim(file) 585 586 E = {} 587 for line in file : 588 tokens = split(line, delim) 589 try : 590 v1 = int(tokens[0]) 591 v2 = int(tokens[1]) 592 except : 593 v1 = tokens[0] 594 v2 = tokens[0] 595 if v1 not in E : E[v1] = {} 596 if v2 not in E : E[v2] = {} 597 E[v1][v2] = 1 598 E[v2][v1] = 1 599 600 return E
601
602 -def getArch() :
603 604 (input, output) = os.popen4('arch') 605 arch = output.readline() 606 #arch = arch.strip() 607 return arch.strip()
608
609 -class MyList (list) :
610
611 - def __init__(self, arg1 = None, arg2 = None, arg3 = None, *options, **args) :
612 613 list.__init__(self)
614
615 - def append(self, arg, arg1 = None, arg2 = None, arg3 = None) :
616 617 list.append(self, arg)
618
619 - def appendPrediction(self, arg1, arg2, arg3) :
620 621 list.append(self, arg)
622 623
624 - def computeStats(self) :
625 626 pass
627
628 -class DecisionFuncResults (object) :
629
630 - def __init__(self, arg1 = None, arg2 = None, arg3 = None, *options, **args) :
631 632 self.decisionFunc = []
633
634 - def appendPrediction(self, arg1, arg2, arg3) :
635 636 self.decisionFunc.append(arg1[0])
637 638
639 - def computeStats(self) :
640 641 pass
642
643 -def mysetattr(obj, attribute, value) :
644 645 tokens = attribute.split('.') 646 if len(tokens) == 1 : 647 setattr(obj, attribute, value) 648 else : 649 childObj = getattr(obj, tokens[0]) 650 mysetattr(childObj, '.'.join(tokens[1:]), value)
651
652 -def isString(var) :
653 """ 654 determine whether a variable is a string 655 """ 656 657 return type(var) in (str, unicode)
658
659 -def set_attributes(x, values, defaults = None) :
660 661 if isinstance(x, dict): 662 x.update(defaults) 663 x.update(values) 664 else : 665 if defaults is None : defaults = values 666 for attribute in defaults : 667 if attribute in values : 668 setattr(x, attribute, values[attribute]) 669 else : 670 setattr(x, attribute, defaults[attribute])
671
672 -def update(x, **entries):
673 """Update a dict or an object with according to entries. 674 >>> update({'a': 1}, a=10, b=20) 675 {'a': 10, 'b': 20} 676 >>> update(Struct(a=1), a=10, b=20) 677 Struct(a=10, b=20) 678 """ 679 if isinstance(x, dict): 680 x.update(entries) 681 else : 682 for attribute in entries : 683 setattr(x, attribute, entries[attribute])
684 685
686 -def timer(fn, *args):
687 """Time the application of fn to args. Return (result, seconds).""" 688 689 import time 690 start = time.clock() 691 return fn(*args), time.clock() - start
692
693 -def get_defaults(defaults, args, varNames) :
694 695 returnList = [] 696 for name in varNames : 697 if name not in defaults : 698 raise ValueError, 'argument mission in defaults' 699 if name in args : 700 returnList.append(args[name]) 701 else : 702 returnList.append(defaults[name]) 703 return returnList
704