Package mmLib :: Module mmCIF
[hide private]
[frames] | no frames]

Source Code for Module mmLib.mmCIF

   1  ## Copyright 2002-2010 by PyMMLib Development Group (see AUTHORS file) 
   2  ## This code is part of the PyMMLib distribution and governed by 
   3  ## its license.  Please see the LICENSE file that should have been 
   4  ## included as part of this package. 
   5  """mmCIF file and mmCIF dictionary parser. Files are parsed into a set of data  
   6  structures where they can be further processed. The data structures can also  
   7  be constructed and written back out as mmCIF. A CIF dictionary parser is also  
   8  included as a specialized version of the mmCIF parser. 
   9  """ 
  10  from __future__ import generators 
  11   
  12  import re 
  13  import copy 
  14  import itertools 
  15   
  16  ## 
  17  ## DATA STRUCTURES FOR HOLDING CIF INFORMATION 
  18  ## 
  19  ## mmCIF files are parsed into: 
  20  ##         mmCIFFile -> [mmCIFData] -> [mmCIFTable] -> [mmCIFRow] 
  21  ## 
  22  ## mmCIF dictionaries are parsed into: 
  23  ##         mmCIFDictionary -> [mmCIFData] -> [mmCIFTable] -> [mmCIFRow] 
  24  ## 
  25   
  26  ## mmCIF Maximum Line Length 
  27  MAX_LINE = 2048 
  28   
  29   
30 -class mmCIFError(Exception):
31 """Base class of errors raised by Structure objects. 32 """ 33 pass
34 35
36 -class mmCIFSyntaxError(Exception):
37 """Base class of errors raised by Structure objects. 38 """
39 - def __init__(self, line_num, text):
40 Exception.__init__(self) 41 self.line_num = line_num 42 self.text = text
43
44 - def __str__(self):
45 return "[line: %d] %s" % (self.line_num, self.text)
46 47
48 -class mmCIFRow(dict):
49 """Contains one row of data. In a mmCIF file, this is one complete 50 set of data found under a section. The data can be accessed by using 51 the column names as class attributes. 52 """ 53 __slots__ = ["table"] 54
55 - def __eq__(self, other):
56 return id(self) == id(other)
57
58 - def __deepcopy__(self, memo):
59 cif_row = mmCIFRow() 60 for key, val in self.iteritems(): 61 cif_row[key] = val 62 return cif_row
63
64 - def __contains__(self, column):
65 return dict.__contains__(self, column.lower())
66
67 - def __setitem__(self, column, value):
68 assert value is not None 69 dict.__setitem__(self, column.lower(), value)
70
71 - def __getattr__(self, name):
72 try: 73 return self[name] 74 except KeyError: 75 raise AttributeError(name)
76
77 - def __getitem__(self, column):
78 return dict.__getitem__(self, column.lower())
79
80 - def getitem_lower(self, clower):
81 return dict.__getitem__(self, clower)
82
83 - def __delitem__(self, column):
84 dict.__delitem__(self, column.lower())
85
86 - def get(self, column, default = None):
87 return dict.get(self, column.lower(), default)
88
89 - def get_lower(self, clower, default = None):
90 return dict.get(self, clower, default)
91
92 - def has_key(self, column):
93 return dict.has_key(self, column.lower())
94
95 - def has_key_lower(self, clower):
96 return dict.has_key(self, clower)
97 98
99 -class mmCIFTable(list):
100 """Contains columns and rows of data for a mmCIF section. Rows of data 101 are stored as mmCIFRow classes. 102 """ 103 __slots__ = ["name", "columns", "columns_lower", "data"] 104
105 - def __init__(self, name, columns = None):
106 assert name is not None 107 108 list.__init__(self) 109 self.name = name 110 if columns is None: 111 self.columns = list() 112 self.columns_lower = dict() 113 else: 114 self.set_columns(columns)
115
116 - def __deepcopy__(self, memo):
117 table = mmCIFTable(self.name, self.columns[:]) 118 for row in self: 119 table.append(copy.deepcopy(row, memo)) 120 return table
121
122 - def __eq__(self, other):
123 return id(self) == id(other)
124
125 - def is_single(self):
126 """Return true if the table is not a _loop table with multiple 127 rows of data. 128 """ 129 return len(self) <= 1
130
131 - def __getattr__(self, name):
132 try: 133 return self[name] 134 except KeyError: 135 raise AttributeError(name)
136
137 - def __getitem__(self, x):
138 """Retrieves mmCIFRow at index x from the table if the argument is 139 an integer. If the argument is a string, then the data from the 140 first row is returned. 141 """ 142 if isinstance(x, int): 143 return list.__getitem__(self, x) 144 145 elif isinstance(x, str): 146 try: 147 return self[0][x] 148 except (IndexError, KeyError): 149 raise KeyError 150 151 raise TypeError, x
152
153 - def __setitem__(self, x, value):
154 assert value is not None 155 156 if isinstance(x, int) and isinstance(value, mmCIFRow): 157 value.table = self 158 list.__setitem__(self, x, value) 159 160 elif isinstance(x, str): 161 try: 162 self[0][x] = value 163 except IndexError: 164 row = mmCIFRow() 165 row[x] = value 166 self.append(row)
167
168 - def __delitem__(self, i):
169 self.remove(self[i])
170
171 - def get(self, x, default = None):
172 try: 173 return self[x] 174 except KeyError: 175 return default
176
177 - def append(self, row):
178 assert isinstance(row, mmCIFRow) 179 row.table = self 180 list.append(self, row)
181
182 - def insert(self, i, row):
183 assert isinstance(row, mmCIFRow) 184 row.table = self 185 list.insert(self, i, row)
186
187 - def remove(self, row):
188 assert isinstance(row, mmCIFRow) 189 del row.table 190 list.remove(self, row)
191
192 - def set_columns(self, columns):
193 """Sets the list of column(subsection) names to the list of names in 194 columns. 195 """ 196 self.columns = list() 197 self.columns_lower = dict() 198 for column in columns: 199 self.append_column(column)
200
201 - def append_column(self, column):
202 """Appends a column(subsection) name to the table. 203 """ 204 clower = column.lower() 205 if clower in self.columns_lower: 206 i = self.columns.index(self.columns_lower[clower]) 207 self.columns[i] = column 208 self.columns_lower[clower] = column 209 else: 210 self.columns.append(column) 211 self.columns_lower[clower] = column
212
213 - def has_column(self, column):
214 """Tests if the table contains the column name. 215 """ 216 return column.lower() in self.columns_lower
217
218 - def remove_column(self, column):
219 """Removes the column name from the table. 220 """ 221 clower = column.lower() 222 if clower not in self.columns_lower: 223 return 224 self.columns.remove(self.columns_lower[clower]) 225 del self.columns_lower[clower]
226
227 - def autoset_columns(self):
228 """Automatically sets the mmCIFTable column names by inspecting all 229 mmCIFRow objects it contains. 230 """ 231 clower_used = {} 232 for cif_row in self: 233 for clower in cif_row.iterkeys(): 234 clower_used[clower] = True 235 if clower not in self.columns_lower: 236 self.append_column(clower) 237 for clower in self.columns_lower.keys(): 238 if not clower_used.has_key(clower): 239 self.remove_column(clower)
240
241 - def get_row1(self, clower, value):
242 """Return the first row which which has column data matching value. 243 """ 244 fpred = lambda r: r.get_lower(clower) == value 245 itertools.ifilter(fpred, self) 246 for row in itertools.ifilter(fpred, self): 247 return row 248 return None
249
250 - def get_row(self, *args):
251 """Preforms a SQL-like 'AND' select aginst all the rows in the table, 252 and returns the first matching row found. The arguments are a 253 variable list of tuples of the form: 254 (<lower-case-column-name>, <column-value>) 255 For example: 256 get_row(('atom_id','CA'),('entity_id', '1')) 257 returns the first matching row with atom_id==1 and entity_id==1. 258 """ 259 if len(args) == 1: 260 clower, value = args[0] 261 for row in self: 262 if row.get_lower(clower) == value: 263 return row 264 else: 265 for row in self: 266 match_row = True 267 for clower, value in args: 268 if row.get_lower(clower) != value: 269 match_row = False 270 break 271 if match_row: 272 return row 273 return None
274
275 - def new_row(self):
276 """Creates a new mmCIF rows, addes it to the table, and returns it. 277 """ 278 cif_row = mmCIFRow() 279 self.append(cif_row) 280 return cif_row
281
282 - def iter_rows(self, *args):
283 """This is the same as get_row, but it iterates over all matching 284 rows in the table. 285 """ 286 for cif_row in self: 287 match_row = True 288 for clower, value in args: 289 if cif_row.get_lower(clower) != value: 290 match_row = False 291 break 292 if match_row: 293 yield cif_row
294
295 - def row_index_dict(self, clower):
296 """Return a dictionary mapping the value of the row's value in 297 column 'key' to the row itself. If there are multiple rows with 298 the same key value, they will be overwritten with the last found 299 row. 300 """ 301 dictx = dict() 302 for row in self: 303 try: 304 dictx[row.getitem_lower(clower)] = row 305 except KeyError: 306 pass 307 return dictx
308 309
310 -class mmCIFData(list):
311 """Contains all information found under a data_ block in a mmCIF file. 312 mmCIF files are represented differently here than their file format 313 would suggest. Since a mmCIF file is more-or-less a SQL database dump, 314 the files are represented here with their sections as "Tables" and 315 their subsections as "Columns". The data is stored in "Rows". 316 """ 317 __slots__ = ["name", "file"] 318
319 - def __init__(self, name):
320 assert name is not None 321 list.__init__(self) 322 self.name = name
323
324 - def __str__(self):
325 return "mmCIFData(name = %s)" % (self.name)
326
327 - def __deepcopy__(self, memo):
328 data = mmCIFData(self.name) 329 for table in self: 330 data.append(copy.deepcopy(table, memo)) 331 return data
332
333 - def __eq__(self, other):
334 return id(self) == id(other)
335
336 - def __getattr__(self, name):
337 try: 338 return self[name] 339 except KeyError: 340 raise AttributeError(name)
341
342 - def __getitem__(self, x):
343 if isinstance(x, int): 344 return list.__getitem__(self, x) 345 346 elif isinstance(x, str): 347 name = x.lower() 348 for ctable in self: 349 if ctable.name.lower() == name: 350 return ctable 351 raise KeyError, x 352 353 raise TypeError, x
354
355 - def __setitem__(self, x, table):
356 """ 357 """ 358 assert isinstance(table, mmCIFTable) 359 360 try: 361 old_table = self[x] 362 except (KeyError, IndexError): 363 pass 364 else: 365 self.remove(old_table) 366 367 if isinstance(x, int): 368 table.data = self 369 list.__setitem__(self, x, table) 370 371 elif isinstance(x, str): 372 self.append(table)
373
374 - def __delitem__(self, x):
375 """Remove a mmCIFTable by index or table name. 376 """ 377 self.remove(self[x])
378
379 - def append(self, table):
380 """Append a mmCIFTable. This will trigger the removal of any table 381 with the same name. 382 """ 383 assert isinstance(table, mmCIFTable) 384 try: 385 del self[table.name] 386 except KeyError: 387 pass 388 table.data = self 389 list.append(self, table)
390
391 - def insert(self, i, table):
392 assert isinstance(table, mmCIFTable) 393 try: 394 del self[table.name] 395 except KeyError: 396 pass 397 table.data = self 398 list.insert(self, i, table)
399
400 - def remove(self, table):
401 assert isinstance(table, mmCIFTable) 402 del table.data 403 list.remove(self, table)
404
405 - def has_key(self, x):
406 try: 407 self[x] 408 except KeyError: 409 return False 410 else: 411 return True
412
413 - def get(self, x, default = None):
414 try: 415 return self[x] 416 except KeyError: 417 return default
418
419 - def has_table(self, x):
420 try: 421 self[x] 422 except KeyError: 423 return False 424 else: 425 return True
426
427 - def get_table(self, name):
428 """Looks up and returns a stored mmCIFTable class by its name. This 429 name is the section key in the mmCIF file. 430 """ 431 try: 432 return self[name] 433 except KeyError: 434 return None 435 except IndexError: 436 return None
437
438 - def new_table(self, name, columns=None):
439 """Creates and returns a mmCIFTable object with the given name. 440 The object is added to this object before it is returned. 441 """ 442 cif_table = mmCIFTable(name, columns) 443 self.append(cif_table) 444 return cif_table
445
446 - def split_tag(self, tag):
447 cif_table_name, cif_column_name = tag[1:].split(".") 448 return cif_table_name.lower(), cif_column_name.lower()
449
450 - def join_tag(self, cif_table_name, cif_column_name):
451 return "_%s.%s" % (cif_table_name, cif_column_name)
452
453 - def get_tag(self, tag):
454 """Get. 455 """ 456 table_name, column = self.split_tag(tag) 457 try: 458 return self[table_name][column] 459 except KeyError: 460 return None
461
462 - def set_tag(self, tag, value):
463 """Set.x 464 """ 465 table_name, column = self.split_tag(tag) 466 self[table_name][column] = value
467 468
469 -class mmCIFSave(mmCIFData):
470 """Class to store data from mmCIF dictionary save_ blocks. We treat 471 them as non-nested sections along with data_ sections. 472 This may not be correct! 473 """ 474 pass
475 476
477 -class mmCIFFile(list):
478 """Class representing a mmCIF files. 479 """
480 - def __deepcopy__(self, memo):
481 cif_file = mmCIFFile() 482 for data in self: 483 cif_file.append(copy.deepcopy(data, memo)) 484 return cif_file
485
486 - def __str__(self):
487 l = [str(cdata) for cdata in self] 488 return "mmCIFFile([%s])" % (", ".join(l))
489
490 - def __eq__(self, other):
491 return id(self) == id(other)
492
493 - def __getattr__(self, name):
494 try: 495 return self[name] 496 except KeyError: 497 raise AttributeError(name)
498
499 - def __getitem__(self, x):
500 """Retrieve a mmCIFData object by index or name. 501 """ 502 if isinstance(x, int): 503 return list.__getitem__(self, x) 504 505 elif isinstance(x, str): 506 name = x.lower() 507 for cdata in self: 508 if cdata.name.lower() == name: 509 return cdata 510 raise KeyError, x 511 512 raise TypeError, x
513
514 - def __delitem__(self, x):
515 """Remove a mmCIFData by index or data name. Raises IndexError 516 or KeyError if the mmCIFData object is not found, the error raised 517 depends on the argument type. 518 """ 519 self.remove(self[x])
520
521 - def append(self, cdata):
522 """Append a mmCIFData object. This will trigger the removal of any 523 mmCIFData object in the file with the same name. 524 """ 525 assert isinstance(cdata, mmCIFData) 526 try: 527 del self[cdata.name] 528 except KeyError: 529 pass 530 cdata.file = self 531 list.append(self, cdata)
532
533 - def insert(self, i, cdata):
534 assert isinstance(cdata, mmCIFData) 535 try: 536 del self[cdata.name] 537 except KeyError: 538 pass 539 cdata.file = self 540 list.insert(self, i, cdata)
541
542 - def has_key(self, x):
543 for cdata in self: 544 if cdata.name == x: 545 return True 546 return False
547
548 - def get(self, x, default = None):
549 try: 550 return self[x] 551 except KeyError: 552 return default
553
554 - def load_file(self, fil):
555 """Load and append the mmCIF data from file object fil into self. 556 The fil argument must be a file object or implement its iterface. 557 """ 558 if isinstance(fil, str): 559 fileobj = open(fil, "r") 560 else: 561 fileobj = fil 562 mmCIFFileParser().parse_file(fileobj, self)
563
564 - def save_file(self, fil):
565 if isinstance(fil, str): 566 fileobj = open(fil, "w") 567 else: 568 fileobj = fil 569 mmCIFFileWriter().write_file(fileobj, self)
570
571 - def get_data(self, name):
572 """Returns the mmCIFData object with the given name. Returns None 573 if no such object exists. 574 """ 575 try: 576 return self[name] 577 except KeyError: 578 return None 579 except IndexError: 580 return None
581
582 - def new_data(self, name):
583 """Creates a new mmCIFData object with the given name, adds it 584 to this mmCIFFile, and returns it. 585 """ 586 cif_data = mmCIFData(name) 587 self.append(cif_data) 588 return cif_data
589 590
591 -class mmCIFDictionary(mmCIFFile):
592 """Class representing a mmCIF dictionary. The constructor of this class 593 takes two arguments. The first is the string path for the file, or 594 alternativly a file object. 595 """ 596 pass
597 598 599 ## 600 ## FILE PARSERS/WRITERS 601 ## 602 603
604 -class mmCIFFileParser(object):
605 """Stateful parser which uses the mmCIFElementFile tokenizer to read 606 a mmCIF file and convert it into the mmCIFData/mmCIFTable/mmCIFRow 607 data hierarchy. 608 """
609 - def parse_file(self, fileobj, cif_file):
610 self.line_number = 0 611 token_iter = self.gen_token_iter(fileobj) 612 613 try: 614 self.parse(token_iter, cif_file) 615 except StopIteration: 616 pass 617 else: 618 raise mmCIFError()
619
620 - def syntax_error(self, err):
621 raise mmCIFSyntaxError(self.line_number, err)
622
623 - def split_token(self, tokx):
624 """Returns the mmCIF token split into a 2-tuple: 625 (reserved word, name) where directive is one of the mmCIF 626 reserved words: data_, loop_, global_, save_, stop_ 627 """ 628 i = tokx.find("_") 629 if i == -1: 630 return None, None 631 632 rword = tokx[:i].lower() 633 if rword not in ("data", "loop", "global", "save", "stop"): 634 return None, None 635 636 name = tokx[i+1:] 637 return rword, name
638
639 - def parse(self, token_iter, cif_file):
640 """Stateful parser for mmCIF files. 641 642 XXX: loop_, data_, save_ tags are handled in a case-sensitive 643 manor. These tokens are case-insensitive. 644 """ 645 646 cif_table_cache = dict() 647 cif_data = None 648 cif_table = None 649 cif_row = None 650 state = "" 651 652 ## ignore anything in the input file until a reserved word is 653 ## found 654 while True: 655 tblx, colx, strx, tokx = token_iter.next() 656 if tokx is None: 657 continue 658 rword, name = self.split_token(tokx) 659 if rword is not None: 660 break 661 662 while True: 663 ## 664 ## PROCESS STATE CHANGES 665 ## 666 if tblx is not None: 667 state = "RD_SINGLE" 668 669 elif tokx is not None: 670 rword, name = self.split_token(tokx) 671 672 if rword == "loop": 673 state = "RD_LOOP" 674 675 elif rword == "data": 676 state = "RD_DATA" 677 678 elif rword == "save": 679 state = "RD_SAVE" 680 681 elif rword == "stop": 682 return 683 684 elif rword == "global": 685 self.syntax_error("unable to handle global_ syntax") 686 687 else: 688 self.syntax_error("bad token #1: " + str(tokx)) 689 690 else: 691 self.syntax_error("bad token #2") 692 return 693 694 ## 695 ## PROCESS DATA IN RD_SINGLE STATE 696 ## 697 if state == "RD_SINGLE": 698 try: 699 cif_table = cif_table_cache[tblx] 700 except KeyError: 701 cif_table = cif_table_cache[tblx] = mmCIFTable(tblx) 702 703 try: 704 cif_data.append(cif_table) 705 except AttributeError: 706 self.syntax_error("section not contained in data_ block") 707 return 708 709 cif_row = mmCIFRow() 710 cif_table.append(cif_row) 711 else: 712 try: 713 cif_row = cif_table[0] 714 except IndexError: 715 self.syntax_error("bad token #3") 716 return 717 718 ## check for duplicate entries 719 if colx in cif_table.columns: 720 self.syntax_error("redefined subsection (column)") 721 return 722 else: 723 cif_table.append_column(colx) 724 725 ## get the next token from the file, it should be the data 726 ## keyed by the previous token 727 tx, cx, strx, tokx = token_iter.next() 728 if tx is not None or (strx is None and tokx is None): 729 self.syntax_error("missing data for _%s.%s" % (tblx,colx)) 730 731 if tokx is not None: 732 ## check token for reserved words 733 rword, name = self.split_token(tokx) 734 if rword is not None: 735 if rword == "stop": 736 return 737 self.syntax_error("unexpected reserved word: %s" % (rword)) 738 739 if tokx != ".": 740 cif_row[colx] = tokx 741 742 elif strx is not None: 743 cif_row[colx] = strx 744 745 else: 746 self.syntax_error("bad token #4") 747 748 tblx, colx, strx, tokx = token_iter.next() 749 continue 750 751 ### 752 ## PROCESS DATA IN RD_LOOP STATE 753 ## 754 ## This is entered upon the beginning of a loop, and 755 ## the loop is read completely before exiting. 756 ### 757 elif state == "RD_LOOP": 758 ## the first section.subsection (tblx.colx) is read 759 ## to create the section(table) name for the entire loop 760 tblx, colx, strx, tokx = token_iter.next() 761 762 if tblx is None or colx is None: 763 self.syntax_error("bad token #5") 764 return 765 766 if cif_table_cache.has_key(tblx): 767 self.syntax_error("_loop section duplication") 768 return 769 770 cif_table = mmCIFTable(tblx) 771 772 try: 773 cif_data.append(cif_table) 774 except AttributeError: 775 self.syntax_error("_loop section not contained in data_ block") 776 return 777 778 cif_table.append_column(colx) 779 780 ## read the remaining subsection definitions for the loop_ 781 while True: 782 tblx, colx, strx, tokx = token_iter.next() 783 784 if tblx is None: 785 break 786 787 if tblx != cif_table.name: 788 self.syntax_error("changed section names in loop_") 789 return 790 791 cif_table.append_column(colx) 792 793 ## before starting to read data, check tokx for any control 794 ## tokens 795 if tokx is not None: 796 rword, name = self.split_token(tokx) 797 if rword is not None: 798 if rword == "stop": 799 return 800 else: 801 self.syntax_error( 802 "unexpected reserved word: %s" % (rword)) 803 804 ## now read all the data 805 while True: 806 cif_row = mmCIFRow() 807 cif_table.append(cif_row) 808 809 for col in cif_table.columns: 810 if tokx is not None: 811 if tokx != ".": 812 cif_row[col] = tokx 813 elif strx is not None: 814 cif_row[col] = strx 815 816 tblx,colx,strx,tokx = token_iter.next() 817 818 ## the loop ends when one of these conditions is met: 819 ## condition #1: a new table is encountered 820 if tblx is not None: 821 break 822 823 ## condition #2: a reserved word is encountered 824 if tokx is not None: 825 rword, name = self.split_token(tokx) 826 if rword is not None: 827 break 828 829 continue 830 831 elif state == "RD_DATA": 832 cif_data = mmCIFData(tokx[5:]) 833 cif_file.append(cif_data) 834 cif_table_cache = dict() 835 cif_table = None 836 837 tblx,colx,strx,tokx = token_iter.next() 838 839 elif state == "RD_SAVE": 840 cif_data = mmCIFSave(tokx[5:]) 841 cif_file.append(cif_data) 842 cif_table_cache = dict() 843 cif_table = None 844 845 tblx,colx,strx,tokx = token_iter.next()
846 847
848 - def gen_token_iter(self, fileobj):
849 re_tok = re.compile( 850 r"(?:" 851 852 "(?:_(.+?)[.](\S+))" "|" # _section.subsection 853 854 "(?:['\"](.*?)(?:['\"]\s|['\"]$))" "|" # quoted strings 855 856 "(?:\s*#.*$)" "|" # comments 857 858 "(\S+)" # unquoted tokens 859 860 ")") 861 862 file_iter = iter(fileobj) 863 864 ## parse file, yielding tokens for self.parser() 865 while True: 866 ln = file_iter.next() 867 self.line_number += 1 868 869 ## skip comments 870 if ln.startswith("#"): 871 continue 872 873 ## semi-colen multi-line strings 874 if ln.startswith(";"): 875 lmerge = [ln[1:]] 876 while True: 877 ln = file_iter.next() 878 self.line_number += 1 879 if ln.startswith(";"): 880 break 881 lmerge.append(ln) 882 883 lmerge[-1] = lmerge[-1].rstrip() 884 yield (None, None, "".join(lmerge), None) 885 continue 886 887 ## split line into tokens 888 tok_iter = re_tok.finditer(ln) 889 890 for tokm in tok_iter: 891 groups = tokm.groups() 892 if groups != (None, None, None, None): 893 yield groups
894 895
896 -class mmCIFFileWriter(object):
897 """Writes out a mmCIF file using the data in the mmCIFData list. 898 """
899 - def write_file(self, fil, cif_data_list):
900 self.fil = fil 901 902 ## constant controlls the spacing between columns 903 self.SPACING = 2 904 905 ## iterate through the data sections and write them 906 ## out to the file 907 for cif_data in cif_data_list: 908 self.cif_data = cif_data 909 self.write_cif_data()
910
911 - def write(self, x):
912 self.fil.write(x)
913
914 - def writeln(self, x = ""):
915 self.fil.write(x + "\n")
916
917 - def write_mstring(self, mstring):
918 self.write(self.form_mstring(mstring))
919
920 - def form_mstring(self, mstring):
921 l = [";"] 922 923 lw = MAX_LINE - 2 924 for x in mstring.split("\n"): 925 if x == "": 926 l.append("\n") 927 continue 928 929 while len(x) > 0: 930 l.append(x[:lw]) 931 l.append("\n") 932 933 x = x[lw:] 934 935 l.append(";\n") 936 return "".join(l)
937
938 - def data_type(self, x):
939 """Analyze x and return its type: token, qstring, mstring 940 """ 941 assert x is not None 942 943 if not isinstance(x, str): 944 x = str(x) 945 return x, "token" 946 947 if x == "" or x == ".": 948 return ".", "token" 949 950 if x.find("\n") != -1: 951 return x, "mstring" 952 953 if x.count(" ") != 0 or x.count("\t") != 0 or x.count("#") != 0: 954 if len(x) > (MAX_LINE - 2): 955 return x, "mstring" 956 if x.count("' ") != 0 or x.count('" ') != 0: 957 return x, "mstring" 958 return x, "qstring" 959 960 if len(x) < MAX_LINE: 961 return x, "token" 962 else: 963 return x, "mstring"
964
965 - def write_cif_data(self):
966 if isinstance(self.cif_data, mmCIFSave): 967 self.writeln("save_%s" % self.cif_data.name) 968 else: 969 self.writeln("data_%s" % self.cif_data.name) 970 971 self.writeln("#") 972 973 for cif_table in self.cif_data: 974 ## ignore tables without data rows 975 if len(cif_table) == 0: 976 continue 977 978 ## special handling for tables with one row of data 979 elif len(cif_table) == 1: 980 self.write_one_row_table(cif_table) 981 982 ## _loop tables 983 elif len(cif_table) > 1 and len(cif_table.columns) > 0: 984 self.write_multi_row_table(cif_table) 985 986 else: 987 raise mmCIFError() 988 989 self.writeln("#")
990
991 - def write_one_row_table(self, cif_table):
992 row = cif_table[0] 993 994 ## determine max key length for formatting output 995 kmax = 0 996 table_len = len(cif_table.name) + 2 997 for col in cif_table.columns: 998 klen = table_len + len(col) 999 assert klen < MAX_LINE 1000 kmax = max(kmax, klen) 1001 1002 ## we need a space after the tag 1003 kmax += self.SPACING 1004 vmax = MAX_LINE - kmax - 1 1005 1006 ## write out the keys and values 1007 for col in cif_table.columns: 1008 1009 cif_key = "_%s.%s" % (cif_table.name, col) 1010 l = [cif_key.ljust(kmax)] 1011 1012 try: 1013 x0 = row[col] 1014 except KeyError: 1015 x = "?" 1016 dtype = "token" 1017 else: 1018 x, dtype = self.data_type(x0) 1019 1020 if dtype == "token": 1021 if len(x) > vmax: 1022 l.append("\n") 1023 l.append("%s\n" % (x)) 1024 self.write("".join(l)) 1025 1026 elif dtype == "qstring": 1027 if len(x) > vmax: 1028 l.append("\n") 1029 self.write("".join(l)) 1030 self.write_mstring(x) 1031 1032 else: 1033 l.append("'%s'\n" % (x)) 1034 self.write("".join(l)) 1035 1036 elif dtype == "mstring": 1037 l.append("\n") 1038 self.write("".join(l)) 1039 self.write_mstring(x)
1040
1041 - def write_multi_row_table(self, cif_table):
1042 ## write the key description for the loop_ 1043 self.writeln("loop_") 1044 for col in cif_table.columns: 1045 key = "_%s.%s" % (cif_table.name, col) 1046 assert len(key) < MAX_LINE 1047 self.writeln(key) 1048 1049 col_len_map = {} 1050 col_dtype_map = {} 1051 1052 for row in cif_table: 1053 for col in cif_table.columns: 1054 ## get data and data type 1055 try: 1056 x0 = row[col] 1057 except KeyError: 1058 lenx = 1 1059 dtype = "token" 1060 else: 1061 x, dtype = self.data_type(x0) 1062 1063 ## determine write length of data 1064 if dtype == "token": 1065 lenx = len(x) 1066 elif dtype == "qstring": 1067 lenx = len(x) + 2 1068 else: 1069 lenx = 0 1070 1071 try: 1072 col_dtype = col_dtype_map[col] 1073 except KeyError: 1074 col_dtype_map[col] = dtype 1075 col_len_map[col] = lenx 1076 continue 1077 1078 ## update the column charactor width if necessary 1079 if col_len_map[col] < lenx: 1080 col_len_map[col] = lenx 1081 1082 ## modify column data type if necessary 1083 if col_dtype != dtype: 1084 if dtype == "mstring": 1085 col_dtype_map[col] = "mstring" 1086 elif col_dtype == "token" and dtype == "qstring": 1087 col_dtype_map[col] = "qstring" 1088 1089 ## form a write list of the column names with values of None to 1090 ## indicate a newline 1091 wlist = [] 1092 llen = 0 1093 for col in cif_table.columns: 1094 dtype = col_dtype_map[col] 1095 1096 if dtype == "mstring": 1097 llen = 0 1098 wlist.append((None, None, None)) 1099 wlist.append((col, dtype, None)) 1100 continue 1101 1102 lenx = col_len_map[col] 1103 if llen == 0: 1104 llen = lenx 1105 else: 1106 llen += self.SPACING + lenx 1107 1108 if llen > (MAX_LINE - 1): 1109 wlist.append((None, None, None)) 1110 llen = lenx 1111 1112 wlist.append((col, dtype, lenx)) 1113 1114 ## write out the data 1115 spacing = " " * self.SPACING 1116 add_space = False 1117 listx = [] 1118 1119 for row in cif_table: 1120 for (col, dtype, lenx) in wlist: 1121 1122 if col is None: 1123 add_space = False 1124 listx.append("\n") 1125 continue 1126 1127 if add_space == True: 1128 add_space = False 1129 listx.append(spacing) 1130 1131 if dtype == "token": 1132 x = str(row.get(col, ".")) 1133 if x == "": 1134 x = "." 1135 x = x.ljust(lenx) 1136 listx.append(x) 1137 add_space = True 1138 1139 elif dtype == "qstring": 1140 x = row.get(col, ".") 1141 if x == "": 1142 x = "." 1143 elif x != "." and x != "?": 1144 x = "'%s'" % (x) 1145 x = x.ljust(lenx) 1146 listx.append(x) 1147 add_space = True 1148 1149 elif dtype == "mstring": 1150 try: 1151 listx.append(self.form_mstring(row[col])) 1152 except KeyError: 1153 listx.append(".\n") 1154 add_space = False 1155 1156 add_space = False 1157 listx.append("\n") 1158 1159 ## write out strx if it gets big to avoid using a lot of 1160 ## memory 1161 if len(listx) > 1024: 1162 self.write("".join(listx)) 1163 listx = [] 1164 1165 ## write out the _loop section 1166 self.write("".join(listx))
1167 1168 1169 ### <testing>
1170 -def test_module():
1171 import sys 1172 try: 1173 path = sys.argv[1] 1174 except IndexError: 1175 print "usage: mmCIF.py <mmCIF file path>" 1176 raise SystemExit 1177 1178 cif = mmCIFDictionary() 1179 cif.load_file(path) 1180 cif.save_file(sys.stdout)
1181 1182 if __name__ == '__main__': 1183 test_module() 1184 ### </testing> 1185