Package mmLib :: Module PDB
[hide private]
[frames] | no frames]

Source Code for Module mmLib.PDB

   1  ## Copyright 2002-2010 by PyMMLib Development Group (see AUTHORS file) 
   2  ## This code is part of the PyMMLib distribution and governed by 
   3  ## its license.  Please see the LICENSE file that should have been 
   4  ## included as part of this package. 
   5  """Brookhaven PDB v2.2 file parser. All records in the PDB v2.2 
   6  specification have corresponding classes defined here. PDB files are 
   7  loaded into a list of these cassed, and also can be constrcted/modified 
   8  and written back out as PDB files. 
   9  """ 
  10  from __future__ import generators 
  11  import fpformat 
  12   
  13   
14 -class PDBError(Exception):
15 """ 16 """ 17 pass
18 19
20 -class PDBValueError(PDBError):
21 """ 22 """
23 - def __init__(self, text):
24 self.text = text
25
26 - def __str__(self):
27 return self.text
28 29
30 -class PDBRecord(dict):
31 """Base class for all PDB file records. 32 """ 33 _name = None 34 _field_list = None 35
36 - def __str__(self):
37 return self.write()
38
39 - def write(self):
40 """Return a properly formed PDB record string from the instance 41 dictionary values. 42 """ 43 ln = self._name 44 45 for (field, start, end, ftype, just, get_func) in self._field_list: 46 47 try: 48 assert len(ln) <= (start - 1) 49 except AssertionError: 50 print "[ASSERT] "+ln 51 raise 52 53 ## add spaces to the end if necessary 54 ln = ln.ljust(start - 1) 55 56 ## used later 57 field_char_len = end - start + 1 58 59 ## access the namespace of this class to write the field 60 ## if a class has a special function defined for retrieving 61 ## this record, it should use it 62 if get_func: 63 ln += get_func(self) 64 continue 65 66 ## get the data 67 s = self.get(field, "") 68 69 ## if the data is blank, then just add the spaces and continue 70 if s is None or s == "": 71 ln += " " * field_char_len 72 continue 73 74 ## convert integer and float types 75 if ftype.startswith("string"): 76 pass 77 78 elif ftype.startswith("integer"): 79 s = str(s) 80 81 elif ftype.startswith("float"): 82 try: 83 s = fpformat.fix(s, int(ftype[6])) 84 except ValueError: 85 raise PDBValueError("field=%s %s not float" % (field, s)) 86 87 ## assert type 88 try: 89 assert isinstance(s, str) 90 except AssertionError: 91 print "### s",str(type(s)), str(s), ftype, field 92 print ln 93 raise 94 95 ## check for maximum length 96 if len(s) > field_char_len: 97 ln += s[:field_char_len] 98 else: 99 if just.startswith("ljust"): 100 ln += s.ljust(field_char_len) 101 else: 102 ln += s.rjust(field_char_len) 103 104 return ln
105
106 - def read(self, line):
107 """Read the PDB record line and convert the fields to the appropriate 108 dictionary values for this class. 109 """ 110 for (field, start, end, ftype, just, get_func) in self._field_list: 111 s = line[start-1:end] 112 113 ## ignore blank fields 114 if s == "" or s.isspace(): 115 continue 116 117 elif ftype.startswith("string"): 118 if just.endswith("lstrip"): 119 s = s.lstrip() 120 elif just.endswith("rstrip"): 121 s = s.rstrip() 122 else: 123 s = s.strip() 124 125 elif ftype.startswith("integer"): 126 try: 127 s = int(s) 128 except ValueError: 129 continue 130 131 elif ftype.startswith("float"): 132 try: 133 s = float(s) 134 except ValueError: 135 continue 136 137 self[field] = s
138
139 - def reccat(self, rec_list, field):
140 """Return the concatenation of field in all the records in rec_list. 141 """ 142 if not isinstance(rec_list, list): 143 rec_list = [rec_list] 144 145 retval = "" 146 for rec in rec_list: 147 x = rec.get(field) 148 if x is not None: 149 retval += x 150 return retval
151
152 - def reccat_list(self, rec_list, field, sep):
153 """Call reccat, then split the result by the separator. 154 """ 155 listx = self.reccat(rec_list, field).split(sep) 156 listx = [x.strip() for x in listx] 157 return listx
158
159 - def reccat_tuplelist(self, rec_list, field, sep1, sep2):
160 """Call reccat_list with sep1 as the list separator, then split 161 the items into tuples by sep2. 162 """ 163 listx = [] 164 for x in self.reccat_list(rec_list, field, sep1): 165 i = x.find(sep2) 166 if i == -1: 167 continue 168 key = x[:i].strip() 169 val = x[i+1:].strip() 170 listx.append((key, val)) 171 return listx
172
173 - def reccat_dictlist(self, rec_list, field, master_key):
174 listx = [] 175 dictx = {} 176 for (key, val) in self.reccat_tuplelist(rec_list, field, ";", ":"): 177 if key == master_key: 178 if dictx: 179 listx.append(dictx) 180 dictx = {} 181 dictx[key] = val 182 if dictx: 183 listx.append(dictx) 184 return listx
185
186 - def reccat_multi(self, rec_list, primary_key, translations):
187 """Create a list of dictionaries from a list of records. This 188 method has complex behavior to support translations of several 189 PDB records into a Python format. The primary key is used to 190 seperate the dictionaries within the list, and the translation 191 argument is a list of strings or 2-tuples. If the translation is a 192 string, the value from the PDB record field is copied to the return 193 dictionary. If the field is a 2-tuple==t, then t[0] is the return 194 dictionary key whose value is a list formed from the list of 195 PDB fields in t[1]. 196 """ 197 if not isinstance(rec_list, list): 198 rec_list = [rec_list] 199 200 listx = [] 201 for rec in rec_list: 202 203 ## XXX: add primary key generation for bad records 204 try: 205 pkey = rec[primary_key] 206 except KeyError: 207 ## if the record has no primary key, retrieve it from the 208 ## last dictionary which is in the same order as the 209 ## record list 210 try: 211 pkey = listx[-1][primary_key] 212 except KeyError: 213 continue 214 except IndexError: 215 continue 216 217 ## search for a dictionary in listx with the same primary key 218 dictx = None 219 for dx in listx: 220 if dx[primary_key] == pkey: 221 dictx = dx 222 break 223 224 ## new dictx if not found 225 if dictx is None: 226 dictx = {primary_key: pkey} 227 listx.append(dictx) 228 229 ## translate the PDB record into dictx 230 for trans in translations: 231 232 ## source is a list of fields which should be 233 ## added to a list under the dest key in dictx 234 if isinstance(trans, tuple): 235 (dest, srcs) = trans 236 237 for sx in srcs: 238 if dictx.has_key(dest): 239 try: 240 dictx[dest].append(rec[sx]) 241 except KeyError: 242 pass 243 else: 244 try: 245 dictx[dest] = [rec[sx]] 246 except KeyError: 247 pass 248 249 ## source is a single record field which should be 250 ## added to dictx under the dest key 251 else: 252 try: 253 dictx[trans] = rec[trans] 254 except KeyError: 255 pass 256 257 return listx
258 259 260 ############################################################################### 261 ## BEGIN PDB RECORD DEFINITIONS 262 263 ## SECTION 2: Title Section
264 -class HEADER(PDBRecord):
265 """This section contains records used to describe the experiment and the 266 biological macromolecules present in the entry: HEADER, OBSLTE, TITLE, 267 CAVEAT, COMPND, SOURCE, KEYWDS, EXPDTA, AUTHOR, REVDAT, SPRSDE, JRNL, 268 and REMARK records. 269 """ 270 __slots__ = [] 271 272 _name = "HEADER" 273 _field_list = [ 274 ("classification", 11, 50, "string", "rjust", None), 275 ("depDate", 51, 59, "string", "rjust", None), 276 ("idCode", 63, 66, "string", "rjust", None)]
277 278
279 -class OBSLTE(PDBRecord):
280 """OBSLTE appears in entries which have been withdrawn from distribution. 281 This record acts as a flag in an entry which has been withdrawn from the 282 PDB's full release. It indicates which, if any, new entries have replaced 283 the withdrawn entry. The format allows for the case of multiple new 284 entries replacing one existing entry. 285 """ 286 __slots__ = [] 287 288 _name = "OBSLTE" 289 _multi_record = "continuation" 290 _field_list = [ 291 ("continuation", 9, 10, "integer", "rjust", None), 292 ("repDate", 12, 20, "string", "rjust", None), 293 ("idCode", 22, 25, "string", "rjust", None), 294 ("rIdCode1", 32, 35, "string", "rjust", None), 295 ("rIdCode2", 37, 40, "string", "rjust", None), 296 ("rIdCode3", 42, 45, "string", "rjust", None), 297 ("rIdCode4", 47, 50, "string", "rjust", None), 298 ("rIdCode5", 52, 55, "string", "rjust", None), 299 ("rIdCode6", 57, 60, "string", "rjust", None), 300 ("rIdCode7", 62, 65, "string", "rjust", None), 301 ("rIdCode8", 67, 70, "string", "rjust", None)] 302
303 - def process(self, recs):
304 """Processes continued record list to a list of dictionary objects. 305 Each dictionary contains the data from one OBSLTE idCode. 306 """ 307 return self.reccat_multi( 308 recs, "idCode", 309 ["repDate", 310 ("rIdCodes", ["rIdCode1", "rIdCode2", "rIdCode3", "rIdCode4", 311 "rIdCode5", "rIdCode6", "rIdCode7", "rIdCode8"])])
312 313
314 -class TITLE(PDBRecord):
315 """The TITLE record contains a title for the experiment or analysis that is 316 represented in the entry. It should identify an entry in the PDB in the 317 same way that a title identifies a paper. 318 """ 319 __slots__ = [] 320 321 _name = "TITLE " 322 _multi_record = "continuation" 323 _field_list = [ 324 ("continuation", 9, 10, "integer", "rjust", None), 325 ("title", 11, 70, "string", "ljust", None)] 326
327 - def process(self, recs):
328 return self.reccat(recs, "title")
329 330
331 -class CAVEAT(PDBRecord):
332 """CAVEAT warns of severe errors in an entry. Use caution when using an 333 entry containing this record. 334 """ 335 __slots__ = [] 336 337 _name = "CAVEAT" 338 _multi_record = "continuation" 339 _field_list = [ 340 ("continuation", 9, 10, "integer", "rjust", None), 341 ("idCode", 12, 15, "string", "rjust", None), 342 ("comment", 20, 70, "string", "ljust", None)] 343
344 - def process(self, recs):
345 """Returns a list of dictionaries with keys idCode and comment. 346 """ 347 cavet_list = [] 348 for rec in recs: 349 idCode = rec.get("idCode") 350 if idCode is None: 351 continue 352 353 ## search for cavet entry with same idCode 354 cav = None 355 for cavx in cavet_list: 356 if cavx.get("idCode") == idCode: 357 cav = cavx 358 break 359 360 ## create new cavet dict if necessary 361 if cav is None: 362 cav = {"idCode" : idCode} 363 cavet_list.append(cav) 364 365 ## add comment 366 comment = rec.get("comment") 367 if comment is not None: 368 if cav.has_key("comment"): 369 cav["comment"] += comment 370 else: 371 cav["comment"] = comment 372 return cavet_list
373 374
375 -class COMPND(PDBRecord):
376 """The COMPND record describes the macromolecular contents of an entry. 377 Each macromolecule found in the entry is described by a set of token: value 378 pairs, and is referred to as a COMPND record component. Since the concept 379 of a molecule is difficult to specify exactly, PDB staff may exercise 380 editorial judgment in consultation with depositors in assigning these 381 names. For each macromolecular component, the molecule name, synonyms, 382 number assigned by the Enzyme Commission (EC), and other relevant details 383 are specified. 384 """ 385 __slots__ = [] 386 387 _name = "COMPND" 388 _multi_record = "continuation" 389 _field_list = [ 390 ("continuation", 9, 10, "integer", "rjust", None), 391 ("compound", 11, 70, "string", "ljust", None)] 392
393 - def process(self, recs):
394 return self.reccat_dictlist(recs, "compound", "MOL_ID")
395 396
397 -class SOURCE(PDBRecord):
398 """The SOURCE record specifies the biological and/or chemical source of 399 each biological molecule in the entry. Sources are described by both the 400 common name and the scientific name, e.g., genus and species. Strain and/or 401 cell-line for immortalized cells are given when they help to uniquely 402 identify the biological entity studied. 403 """ 404 __slots__ = [] 405 406 _name = "SOURCE" 407 _multi_record = "continuation" 408 _field_list = [ 409 ("continuation", 9, 10, "integer", "rjust", None), 410 ("srcName", 11, 70, "string", "ljust", None)] 411
412 - def process(self, recs):
413 return self.reccat_dictlist(recs, "srcName", "MOL_ID")
414 415
416 -class KEYWDS(PDBRecord):
417 """The KEYWDS record contains a set of terms relevant to the entry. Terms 418 in the KEYWDS record provide a simple means of categorizing entries and may 419 be used to generate index files. This record addresses some of the 420 limitations found in the classification field of the HEADER record. It 421 provides the opportunity to add further annotation to the entry in a 422 concise and computer-searchable fashion. 423 """ 424 __slots__ = [] 425 426 _name = "KEYWDS" 427 _multi_record = "continuation" 428 _field_list = [ 429 ("continuation", 9, 10, "integer", "rjust", None), 430 ("keywds", 11, 70, "string", "ljust", None)] 431
432 - def process(self, recs):
433 return self.reccat_list(recs, "keywds", ",")
434 435
436 -class EXPDTA(PDBRecord):
437 """The EXPDTA record presents information about the experiment. The EXPDTA 438 record identifies the experimental technique used. This may refer to the 439 type of radiation and sample, or include the spectroscopic or modeling 440 technique. Permitted values include: 441 ELECTRON DIFFRACTION 442 FIBER DIFFRACTION 443 FLUORESCENCE TRANSFER 444 NEUTRON DIFFRACTION 445 NMR 446 THEORETICAL MODEL 447 X-RAY DIFFRACTION 448 """ 449 __slots__ = [] 450 451 _name = "EXPDTA" 452 _multi_record = "continuation" 453 _field_list = [ 454 ("continuation", 9, 10, "integer", "rjust", None), 455 ("technique", 11, 70, "string", "ljust", None)] 456 _technique_list = [ 457 "ELECTRON DIFFRACTION", 458 "FIBER DIFFRACTION", 459 "FLUORESCENCE TRANSFER", 460 "NEUTRON DIFFRACTION", 461 "NMR", 462 "THEORETICAL MODEL", 463 "X-RAY DIFFRACTION"] 464
465 - def process(self, recs):
466 """Returns a list of 2-tuples: (technique, comment) where technique 467 is one of the accepted techniques. 468 """ 469 expdta_list = [] 470 471 for item in self.reccat_list(recs, "technique", ";"): 472 tech = None 473 cmnt = None 474 475 for techx in self._technique_list: 476 if item.startswith(techx): 477 tech = techx 478 cmnt = item[len(techx):].strip() or None 479 break 480 481 if tech is not None: 482 expdta_list.append((tech, cmnt)) 483 484 return expdta_list
485 486
487 -class AUTHOR(PDBRecord):
488 """The AUTHOR record contains the names of the people responsible for the 489 contents of the entry. 490 """ 491 __slots__ = [] 492 493 _name = "AUTHOR" 494 _multi_record = "continuation" 495 _field_list = [ 496 ("continuation", 9, 10, "integer", "rjust", None), 497 ("authorList", 11, 70, "string", "ljust", None)] 498
499 - def process(self, recs):
500 return self.reccat_list(recs, "authorList", ",")
501 502
503 -class REVDAT(PDBRecord):
504 """REVDAT records contain a history of the modifications made to an entry 505 since its release. 506 """ 507 __slots__ = [] 508 509 _name = "REVDAT" 510 _multi_record = "continuation" 511 _field_list = [ 512 ("modNum", 8, 10, "integer", "rjust", None), 513 ("continuation", 11, 12, "integer", "rjust", None), 514 ("modDate", 14, 22, "string", "rjust", None), 515 ("modID", 24, 28, "string", "rjust", None), 516 ("modType", 32, 32, "integer", "rjust", None), 517 ("record1", 40, 45, "string", "ljust", None), 518 ("record2", 47, 52, "string", "ljust", None), 519 ("record3", 54, 59, "string", "ljust", None), 520 ("record4", 61, 66, "string", "ljust", None)] 521
522 - def process(self, recs):
523 return self.reccat_multi( 524 recs, "modNum", 525 ["modDate", 526 "modID", 527 "modType", 528 ("records", ["record1", "record2", "record3", "record4"])])
529 530
531 -class SPRSDE(PDBRecord):
532 """The SPRSDE records contain a list of the ID codes of entries that were 533 made obsolete by the given coordinate entry and withdrawn from the PDB 534 release set. One entry may replace many. It is PDB policy that only the 535 principal investigator of a structure has the authority to withdraw it. 536 """ 537 __slots__ = [] 538 539 _name = "SPRSDE" 540 _multi_record = "continuation" 541 _field_list = [ 542 ("continuation", 9, 10, "integer", "rjust", None), 543 ("sprsdeDate", 12, 20, "string", "rjust", None), 544 ("idCode", 22, 25, "string", "rjust", None), 545 ("sIdCode1", 32, 35, "string", "rjust", None), 546 ("sIdCode2", 37, 40, "string", "rjust", None), 547 ("sIdCode3", 42, 45, "string", "rjust", None), 548 ("sIdCode4", 47, 50, "string", "rjust", None), 549 ("sIdCode5", 52, 55, "string", "rjust", None), 550 ("sIdCode6", 57, 60, "string", "rjust", None), 551 ("sIdCode7", 62, 65, "string", "rjust", None), 552 ("sIdCode8", 67, 70, "string", "rjust", None)] 553
554 - def process(self, recs):
555 return self.reccat_multi( 556 recs, "idCode", 557 ["sprsdeDate", 558 ("sIdCodes", ["sIdCode1", "sIdCode2", "sIdCode3", "sIdCode4", 559 "sIdCode5", "sIdCode6", "sIdCode7", "sIdCode8"])])
560 561
562 -class JRNL(PDBRecord):
563 """The JRNL record contains the primary literature citation that describes 564 the experiment which resulted in the deposited coordinate set. There is at 565 most one JRNL reference per entry. If there is no primary reference, then 566 there is no JRNL reference. Other references are given in REMARK 1. 567 """ 568 __slots__ = [] 569 570 _name = "JRNL " 571 _field_list = [ 572 ("text", 13, 70, "string", "ljust", None)]
573 574
575 -class REMARK(PDBRecord):
576 """REMARK records present experimental details, annotations, comments, and 577 information not included in other records. In a number of cases, REMARKs 578 are used to expand the contents of other record types. A new level of 579 structure is being used for some REMARK records. This is expected to 580 facilitate searching and will assist in the conversion to a relational 581 database. 582 """ 583 __slots__ = [] 584 585 _name = "REMARK" 586 _field_list = [ 587 ("remarkNum", 8, 10, "integer", "rjust", None), 588 ("text", 12, 70, "string", "ljust", None)]
589 590 591 ## SECTION 3: Primary Structure Section
592 -class DBREF(PDBRecord):
593 """ The DBREF record provides cross-reference links between PDB sequences 594 and the corresponding database entry or entries. A cross reference to 595 the sequence database is mandatory for each peptide chain with a length 596 greater than ten (10) residues. For nucleic acid entries a DBREF 597 record pointing to the Nucleic Acid Database (NDB) is mandatory when 598 the corresponding entry exists in NDB. 599 """ 600 __slots__ = [] 601 602 _name = "DBREF " 603 _field_list = [ 604 ("idCode", 8, 11, "string", "rjust", None), 605 ("chain_ID", 13, 13, "string", "rjust", None), 606 ("seqBegin", 15, 18, "integer", "rjust", None), 607 ("insertBegin", 19, 19, "string", "rjust", None), 608 ("seqEnd", 21, 24, "integer", "rjust", None), 609 ("insertEnd", 25, 25, "string", "rjust", None), 610 ("database", 27, 32, "string", "ljust", None), 611 ("dbAccession", 34, 41, "string", "ljust", None), 612 ("dbIdCode", 43, 54, "string", "ljust", None), 613 ("dbseqBegin", 56, 60, "integer", "rjust", None), 614 ("idbnsBeg", 61, 61, "string", "rjust", None), 615 ("dbseqEnd", 63, 67, "integer", "rjust", None), 616 ("dbinsEnd", 68, 68, "string", "rjust", None)]
617 618
619 -class SEQADV(PDBRecord):
620 """The SEQADV record identifies conflicts between sequence information 621 in the ATOM records of the PDB entry and the sequence database entry 622 given on DBREF. Please note that these records were designed to 623 identify differences and not errors. No assumption is made as to which 624 database contains the correct data. PDB may include REMARK records in 625 the entry that reflect the depositor's view of which database has the 626 correct sequence. 627 """ 628 __slots__ = [] 629 630 _name = "SEQADV" 631 _field_list = [ 632 ("idCode", 8, 11, "string", "rjust", None), 633 ("resName", 13, 15, "string", "rjust", None), 634 ("chainID", 17, 17, "string", "rjust", None), 635 ("seqNum", 19, 22, "integer", "rjust", None), 636 ("iCode", 23, 23, "string", "rjust", None), 637 ("database", 25, 28, "string", "ljust", None), 638 ("dbIDCode", 30, 38, "string", "ljust", None), 639 ("dbRes", 40, 42, "string", "rjust", None), 640 ("dbSeq", 44, 48, "integer", "rjust", None), 641 ("convlict", 50, 70, "string", "ljust", None)]
642 643
644 -class SEQRES(PDBRecord):
645 """The SEQRES records contain the amino acid or nucleic acid sequence of 646 residues in each chain of the macromolecule that was studied. 647 """ 648 __slots__ = [] 649 650 _name = "SEQRES" 651 _multi_record = "serNum" 652 _field_list = [ 653 ("serNum", 9, 10, "integer", "rjust", None), 654 ("chainID", 12, 12, "string", "rjust", None), 655 ("numRes", 14, 17, "integer", "rjust", None), 656 ("resName1", 20, 22, "string", "rjust", None), 657 ("resName2", 24, 26, "string", "rjust", None), 658 ("resName3", 28, 30, "string", "rjust", None), 659 ("resName4", 32, 34, "string", "rjust", None), 660 ("resName5", 36, 38, "string", "rjust", None), 661 ("resName6", 40, 42, "string", "rjust", None), 662 ("resName7", 44, 46, "string", "rjust", None), 663 ("resName8", 48, 50, "string", "rjust", None), 664 ("resName9", 52, 54, "string", "rjust", None), 665 ("resName10", 56, 58, "string", "rjust", None), 666 ("resName11", 60, 62, "string", "rjust", None), 667 ("resName12", 64, 66, "string", "rjust", None), 668 ("resName13", 68, 70, "string", "rjust", None)] 669
670 - def process(self, recs):
671 """Returns a dictionary with attributes chain_id, num_res, and 672 sequence_list 673 """ 674 seqres = {} 675 676 for rec in recs: 677 seqres["chain_id"] = rec.get("chainID", "") 678 seqres["num_res"] = rec.get("numRes", 0) 679 680 for field in ["resName1","resName2","resName3","resName4", 681 "resName5","resName6","resName7","resName8", 682 "resName9","resName10","resName11","resName12", 683 "resName13"]: 684 try: 685 value = rec[field] 686 except KeyError: 687 continue 688 try: 689 seqres["sequence_list"].append(value) 690 except KeyError: 691 seqres["sequence_list"] = [value] 692 693 return seqres
694 695
696 -class MODRES(PDBRecord):
697 """The MODRES record provides descriptions of modifications (e.g., 698 chemical or post-translational) to protein and nucleic acid residues. 699 Included are a mapping between residue names given in a PDB entry and 700 standard residues. 701 """ 702 __slots__ = [] 703 704 _name = "MODRES" 705 _field_list = [ 706 ("idCode", 8, 11, "string", "rjust", None), 707 ("resName", 13, 15, "string", "rjust", None), 708 ("chainID", 17, 17, "string", "rjust", None), 709 ("seqNum", 19, 22, "integer", "rjust", None), 710 ("iCode", 23, 23, "string", "rjust", None), 711 ("stdRes", 25, 27, "string", "rjust", None), 712 ("comment", 30, 70, "string", "ljust", None)]
713 714 715 ## SECTION 4: Heterogen Section
716 -class HET(PDBRecord):
717 """The HET records are used to describe non-standard residues, such as 718 prosthetic groups, inhibitors, solvent molecules, and ions for 719 which coordinates are supplied. Groups are considered HET if they are: 720 - not one of the standard amino acids, and 721 - not one of the nucleic acids (C, G, A, T, U, and I), and 722 - not one of the modified versions of nucleic acids (+C, +G, +A, 723 +T, +U, and +I), and 724 - not an unknown amino acid or nucleic acid where UNK is used to 725 indicate the unknown residue name. 726 Het records also describe heterogens for which the chemical identity 727 is unknown, in which case the group is assigned the hetID UNK. 728 """ 729 __slots__ = [] 730 731 _name = "HET " 732 _field_list = [ 733 ("hetID", 8, 10, "string", "rjust", None), 734 ("chainID", 13, 13, "string", "rjust", None), 735 ("seqNum", 14, 17, "integer", "rjust", None), 736 ("iCode", 18, 18, "string", "rjust", None), 737 ("numHetAtoms", 21, 25, "integer", "rjust", None), 738 ("text", 31, 70, "string", "ljust", None)]
739 740
741 -class HETNAM(PDBRecord):
742 """This record gives the chemical name of the compound with the 743 given hetID. 744 """ 745 __slots__ = [] 746 747 _name = "HETNAM" 748 _multi_record = "continuation" 749 _field_list = [ 750 ("continuation", 9, 10, "integer", "ljust", None), 751 ("hetID", 12, 14, "string", "rjust", None), 752 ("text", 16, 70, "string", "ljust", None)]
753 754
755 -class HETSYN(PDBRecord):
756 """This record provides synonyms, if any, for the compound in the 757 corresponding (i.e., same hetID) HETNAM record. This is to allow 758 greater flexibility in searching for HET groups. 759 """ 760 __slots__ = [] 761 762 _name = "HETSYN" 763 _multi_record = "continuation" 764 _field_list = [ 765 ("continuation", 9, 10, "integer", "ljust", None), 766 ("hetID", 12, 14, "string", "rjust", None), 767 ("hetSynonyms", 16, 70, "string", "ljust", None)]
768 769
770 -class FORMUL(PDBRecord):
771 """The FORMUL record presents the chemical formula and charge of a 772 non-standard group. (The formulas for the standard residues are given 773 in Appendix 5.) 774 """ 775 __slots__ = [] 776 777 _name = "FORMUL" 778 _multi_record = "continuation" 779 _field_list = [ 780 ("compNum", 9, 10, "integer", "rjust", None), 781 ("hetID", 13, 15, "string", "rjust", None), 782 ("continuation", 17, 18, "integer", "rjust", None), 783 ("asterisk", 19, 19, "string", "rjust", None), 784 ("text", 20, 70, "string", "ljust", None)]
785 786 787 ## SECTION 5: Secondary Structure Section
788 -class HELIX(PDBRecord):
789 """HELIX records are used to identify the position of helices in the 790 molecule. Helices are both named and numbered. The residues where the 791 helix begins and ends are noted, as well as the total length. 792 """ 793 __slots__ = [] 794 795 _name = "HELIX " 796 _field_list = [ 797 ("serNum", 8, 10, "integer", "rjust", None), 798 ("helixID", 12, 14, "string", "rjust", None), 799 ("initResName", 16, 18, "string", "rjust", None), 800 ("initChainID", 20, 20, "string", "rjust", None), 801 ("initSeqNum", 22, 25, "integer", "rjust", None), 802 ("initICode", 26, 26, "string", "rjust", None), 803 ("endResName", 28, 30, "string", "rjust", None), 804 ("endChainID", 32, 32, "string", "rjust", None), 805 ("endSeqNum", 34, 37, "integer", "rjust", None), 806 ("endICode", 38, 38, "string", "rjust", None), 807 ("helixClass", 39, 40, "integer", "rjust", None), 808 ("comment", 41, 70, "string", "ljust", None), 809 ("length", 72, 76, "integer", "rjust", None)]
810 811
812 -class SHEET(PDBRecord):
813 """SHEET records are used to identify the position of sheets in the 814 molecule. Sheets are both named and numbered. The residues where the 815 sheet begins and ends are noted. 816 """ 817 __slots__ = [] 818 819 _name = "SHEET " 820 _field_list = [ 821 ("strand", 8, 10, "integer", "rjust", None), 822 ("sheetID", 12, 14, "string", "rjust", None), 823 ("numStrands", 15, 16, "integer", "rjust", None), 824 ("initResName", 18, 20, "string", "rjust", None), 825 ("initChainID", 22, 22, "string", "rjust", None), 826 ("initSeqNum", 23, 26, "integer", "rjust", None), 827 ("initICode", 27, 27, "string", "rjust", None), 828 ("endResName", 29, 31, "string", "rjust", None), 829 ("endChainID", 33, 33, "string", "rjust", None), 830 ("endSeqNum", 34, 37, "integer", "rjust", None), 831 ("endICode", 38, 38, "string", "rjust", None), 832 ("sense", 39, 40, "integer", "rjust", None), 833 ("curAtom", 42, 45, "string", "rjust", None), 834 ("curResName", 46, 48, "string", "rjust", None), 835 ("curChainID", 50 ,50, "string", "rjust", None), 836 ("curResSeq", 51, 54, "integer", "rjust", None), 837 ("curICode", 55, 55, "string", "rjust", None), 838 ("prevAtom", 57, 60, "string", "rjust", None), 839 ("prevResName", 61, 63, "string", "rjust", None), 840 ("prevChainID", 65, 65, "string", "rjust", None), 841 ("prevResSeq", 66, 69, "integer", "rjust", None), 842 ("prevICode", 70, 70, "string", "rjust", None)]
843 844
845 -class TURN(PDBRecord):
846 """The TURN records identify turns and other short loop turns which 847 normally connect other secondary structure segments. 848 """ 849 __slots__ = [] 850 851 _name = "TURN " 852 _field_list = [ 853 ("seq", 8, 10, "integer", "rjust", None), 854 ("turnID", 12, 14, "string", "rjust", None), 855 ("initResName", 16, 18, "string", "rjust", None), 856 ("initChainID", 20, 20, "string", "rjust", None), 857 ("initSeqNum", 21, 24, "integer", "rjust", None), 858 ("initICode", 25, 25, "string", "rjust", None), 859 ("endResName", 27, 29, "string", "rjust", None), 860 ("endChainID", 31, 31, "string", "rjust", None), 861 ("endSeqNum", 32, 35, "integer", "rjust", None), 862 ("endICode", 36, 36, "string", "rjust", None), 863 ("comment", 41, 70, "string", "ljust", None)]
864 865 866 ## SECTION 6: Connectivity Annotation Section
867 -class SSBOND(PDBRecord):
868 """The SSBOND record identifies each disulfide bond in protein and 869 polypeptide structures by identifying the two residues involved in the 870 bond. 871 """ 872 __slots__ = [] 873 874 _name = "SSBOND" 875 _field_list = [ 876 ("serNum", 8, 10, "integer", "rjust", None), 877 ("resName1", 12, 14, "string", "rjust", None), 878 ("chainID1", 16, 16, "string", "rjust", None), 879 ("seqNum1", 18, 21, "integer", "rjust", None), 880 ("iCode1", 22, 22, "string", "rjust", None), 881 ("resName2", 26, 28, "string", "rjust", None), 882 ("chainID2", 30, 30, "string", "rjust", None), 883 ("seqNum2", 32, 35, "integer", "rjust", None), 884 ("iCode2", 36, 36, "string", "rjust", None), 885 ("sym1", 60, 65, "string", "rjust", None), 886 ("sym2", 67, 72, "string", "rjust", None)]
887 888 913 914
915 -class HYDBND(PDBRecord):
916 """The HYDBND records specify hydrogen bonds in the entry. 917 """ 918 __slots__ = [] 919 920 _name = "HYDBND" 921 _field_list = [ 922 ("name1", 13, 16, "string", "rjust", None), 923 ("altLoc1", 17, 17, "string", "rjust", None), 924 ("resName1", 18, 20, "string", "rjust", None), 925 ("chainID1", 22, 22, "string", "rjust", None), 926 ("resSeq1", 23, 27, "integer", "rjust", None), 927 ("iCode1", 28, 28, "string", "rjust", None), 928 ("nameH", 30, 33, "string", "rjust", None), 929 ("altLocH", 34, 34, "string", "rjust", None), 930 ("chainH", 36, 36, "string", "rjust", None), 931 ("resSeqH", 37, 41, "integer", "rjust", None), 932 ("iCodeH", 42, 42, "string", "rjust", None), 933 ("name2", 44, 47, "string", "rjust", None), 934 ("altLoc2", 48, 48, "string", "rjust", None), 935 ("resName2", 49, 51, "string", "rjust", None), 936 ("chainID2", 53, 53, "string", "rjust", None), 937 ("resSeq2", 54, 58, "integer", "rjust", None), 938 ("iCode2", 59, 59, "string", "rjust", None), 939 ("sym1", 60, 65, "string", "rjust", None), 940 ("sym2", 67, 72, "string", "rjust", None)]
941 942
943 -class SLTBRG(PDBRecord):
944 """The SLTBRG records specify salt bridges in the entry. 945 """ 946 __slots__ = [] 947 948 _name = "SLTBRG" 949 _field_list = [ 950 ("name1", 13, 16, "string", "rjust", None), 951 ("altLoc1", 17, 17, "string", "rjust", None), 952 ("resName1", 18, 20, "string", "rjust", None), 953 ("chainID1", 22, 22, "string", "rjust", None), 954 ("resSeq1", 23, 26, "integer", "rjust", None), 955 ("iCode1", 27, 27, "string", "rjust", None), 956 ("name2", 43, 46, "string", "rjust", None), 957 ("altLoc2", 47, 47, "string", "rjust", None), 958 ("resName2", 48, 50, "string", "rjust", None), 959 ("chainID2", 52, 52, "string", "rjust", None), 960 ("resSeq2", 53, 56, "integer", "rjust", None), 961 ("iCode2", 57, 57, "string", "rjust", None), 962 ("sym1", 60, 65, "string", "rjust", None), 963 ("sym2", 67, 72, "string", "rjust", None)]
964 965
966 -class CISPEP(PDBRecord):
967 """CISPEP records specify the prolines and other peptides found to be 968 in the cis conformation. This record replaces the use of footnote records 969 to list cis peptides. 970 """ 971 __slots__ = [] 972 973 _name = "CISPEP" 974 _field_list = [ 975 ("serial", 8, 10, "integer", "rjust", None), 976 ("resName1", 12, 14, "string", "rjust", None), 977 ("chainID1", 16, 16, "string", "rjust", None), 978 ("seqNum1", 18, 21, "integer", "rjust", None), 979 ("iCode1", 22, 22, "string", "rjust", None), 980 ("resName2", 26, 28, "string", "rjust", None), 981 ("chainID2", 30, 30, "string", "rjust", None), 982 ("seqNum2", 32, 35, "integer", "rjust", None), 983 ("iCode2", 36, 36, "string", "rjust", None), 984 ("modNum", 44, 46, "integer", "rjust", None), 985 ("measure", 54, 59, "float.2", "rjust", None)]
986 987 988 ## SECTION 7: Miscellaneous Features Section
989 -class SITE(PDBRecord):
990 """The SITE records supply the identification of groups comprising 991 important sites in the macromolecule. 992 """ 993 __slots__ = [] 994 995 _name = "SITE " 996 _field_list = [ 997 ("seqNum", 8, 10, "integer", "rjust", None), 998 ("siteID", 12, 14, "string", "rjust", None), 999 ("numRes", 16, 17, "integer", "rjust", None), 1000 ("resName1", 19, 21, "string", "rjust", None), 1001 ("chainID1", 23, 23, "string", "rjust", None), 1002 ("seq1", 24, 27, "integer", "rjust", None), 1003 ("iCode1", 28, 28, "string", "rjust", None), 1004 ("resName2", 30, 32, "string", "rjust", None), 1005 ("chainID2", 34, 34, "string", "rjust", None), 1006 ("seq2", 35, 38, "integer", "rjust", None), 1007 ("iCode2", 39, 39, "string", "rjust", None), 1008 ("resName3", 41, 43, "string", "rjust", None), 1009 ("chainID3", 45, 45, "string", "rjust", None), 1010 ("seq3", 46, 49, "integer", "rjust", None), 1011 ("iCode3", 50, 50, "string", "rjust", None), 1012 ("resName4", 52, 54, "string", "rjust", None), 1013 ("chainID4", 56, 56, "string", "rjust", None), 1014 ("seq4", 57, 60, "integer", "rjust", None), 1015 ("iCode4", 61, 61, "string", "rjust", None)]
1016 1017 1018 ## SECTION 8: Crystallographic and Coordinate Transformation Section
1019 -class CRYSTn(PDBRecord):
1020 """The CRYSTn (n=1,2,3) record presents the unit cell parameters, space 1021 group, and Z value. If the structure was not determined by crystallographic 1022 means, CRYSTn simply defines a unit cube. 1023 """ 1024 __slots__ = [] 1025 1026 _field_list = [ 1027 ("a", 7, 15, "float.3", "rjust", None), 1028 ("b", 16, 24, "float.3", "rjust", None), 1029 ("c", 25, 33, "float.3", "rjust", None), 1030 ("alpha", 34, 40, "float.3", "rjust", None), 1031 ("beta", 41, 47, "float.3", "rjust", None), 1032 ("gamma", 48, 54, "float.3", "rjust", None), 1033 ("sgroup", 56, 66, "string", "ljust", None), 1034 ("z", 67, 70, "integer", "ljust", None)]
1035 1036
1037 -class CRYST1(CRYSTn):
1038 __slots__ = [] 1039 1040 _name = "CRYST1"
1041 1042
1043 -class CRYST2(CRYSTn):
1044 __slots__ = [] 1045 1046 _name = "CRYST2"
1047 1048
1049 -class CRYST3(CRYSTn):
1050 __slots__ = [] 1051 1052 _name = "CRYST3"
1053 1054
1055 -class ORIGXn(PDBRecord):
1056 """The ORIGXn (n = 1, 2, or 3) records present the transformation from 1057 the orthogonal coordinates contained in the entry to the submitted 1058 coordinates. 1059 """ 1060 __slots__ = [] 1061 1062 _field_list = [ 1063 ("o[n][1]", 11, 20, "float.6", "rjust", None), 1064 ("o[n][2]", 21, 30, "float.6", "rjust", None), 1065 ("o[n][3]", 31, 40, "float.6", "rjust", None), 1066 ("t[n]", 46, 55, "float.5", "rjust", None)]
1067 1068
1069 -class ORIGX1(ORIGXn):
1070 __slots__ = [] 1071 1072 _name = "ORIGX1"
1073 1074
1075 -class ORIGX2(ORIGXn):
1076 __slots__ = [] 1077 1078 _name = "ORIGX2"
1079 1080
1081 -class ORIGX3(ORIGXn):
1082 __slots__ = [] 1083 1084 _name = "ORIGX3"
1085 1086
1087 -class SCALEn(PDBRecord):
1088 """The SCALEn (n = 1, 2, or 3) records present the transformation from 1089 the orthogonal coordinates as contained in the entry to fractional 1090 crystallographic coordinates. Non-standard coordinate systems should 1091 be explained in the remarks. 1092 """ 1093 __slots__ = [] 1094 1095 _field_list = [ 1096 ("s[n][1]", 11, 20, "float.6", "rjust", None), 1097 ("s[n][2]", 21, 30, "float.6", "rjust", None), 1098 ("s[n][3]", 31, 40, "float.6", "rjust", None), 1099 ("u[n]", 46, 55, "float.5", "rjust", None)]
1100 1101
1102 -class SCALE1(SCALEn):
1103 __slots__ = [] 1104 1105 _name = "SCALE1"
1106 1107
1108 -class SCALE2(SCALEn):
1109 __slots__ = [] 1110 1111 _name = "SCALE2"
1112 1113
1114 -class SCALE3(SCALEn):
1115 __slots__ = [] 1116 1117 _name = "SCALE3"
1118 1119
1120 -class MTRIXn(PDBRecord):
1121 """The MTRIXn (n = 1, 2, or 3) records present transformations expressing 1122 non-crystallographic symmetry. 1123 """ 1124 __slots__ = [] 1125 1126 _field_list = [ 1127 ("serial", 8, 10, "integer", "rjust", None), 1128 ("s[n][1]", 11, 20, "float.6", "rjust", None), 1129 ("s[n][2]", 21, 30, "float.6", "rjust", None), 1130 ("s[n][3]", 31, 40, "float.6", "rjust", None), 1131 ("v[n]", 46, 55, "float.5", "rjust", None), 1132 ("iGiven", 60, 60, "integer", "rjust", None)]
1133 1134
1135 -class MTRIX1(MTRIXn):
1136 __slots__ = [] 1137 1138 _name = "MTRIX1"
1139 1140
1141 -class MTRIX2(MTRIXn):
1142 __slots__ = [] 1143 1144 _name = "MTRIX2"
1145 1146
1147 -class MTRIX3(MTRIXn):
1148 __slots__ = [] 1149 1150 _name = "MTRIX3"
1151 1152
1153 -class TVECT(PDBRecord):
1154 """The TVECT records present the translation vector for infinite 1155 covalently connected structures. 1156 """ 1157 __slots__ = [] 1158 1159 _name = "TVECT " 1160 _field_list = [ 1161 ("serial", 8, 10, "integer", "rjust", None), 1162 ("t[1]", 11, 20, "float.5", "rjust", None), 1163 ("t[2]", 21, 30, "float.5", "rjust", None), 1164 ("t[3]", 31, 40, "float.5", "rjust", None), 1165 ("text", 41, 70, "string", "rjust", None)]
1166 1167 ## SECTION 9: Coordinate Selection
1168 -def ATOM_get_name(rec):
1169 """This should help older applications which do not use 1170 the element field of the ATOM record, these applications 1171 used column alignment to distinguish calcium (CA) from, say, 1172 an alpha-carbon (CA) 1173 """ 1174 name = rec.get("name") or "" 1175 element = rec.get("element") or "" 1176 1177 if len(element) == 2: 1178 name = name.ljust(4)[:4] 1179 else: 1180 l = len(name) 1181 if l == 0: name = "".ljust(4) 1182 elif name[0].isdigit(): name = name.ljust(4)[:4] 1183 elif l < 4: name = " " + name.ljust(3)[:3] 1184 1185 return name
1186 1187
1188 -class MODEL(PDBRecord):
1189 """The MODEL record specifies the model serial number when multiple 1190 structures are presented in a single coordinate entry, as is often 1191 the case with structures determined by NMR. 1192 """ 1193 __slots__ = [] 1194 1195 _name = "MODEL " 1196 _field_list = [ 1197 ("serial", 11, 14, "integer", "rjust", None)]
1198 1199
1200 -class ATOM(PDBRecord):
1201 """The ATOM records present the atomic coordinates for standard residues. 1202 They also present the occupancy and temperature factor for each atom. 1203 Heterogen coordinates use the HETATM record type. The element symbol 1204 is always present on each ATOM record; segment identifier and charge 1205 are optional. 1206 """ 1207 __slots__ = [] 1208 1209 _name = "ATOM " 1210 _field_list = [ 1211 ("serial", 7, 11, "integer", "rjust", None), 1212 ("name", 13, 16, "string", "ljust.rstrip", ATOM_get_name), 1213 ("altLoc", 17, 17, "string", "rjust", None), 1214 ("resName", 18, 20, "string", "rjust", None), 1215 ("chainID", 22, 22, "string", "rjust", None), 1216 ("resSeq", 23, 26, "integer", "rjust", None), 1217 ("iCode", 27, 27, "string", "rjust", None), 1218 ("x", 31, 38, "float.3", "rjust", None), 1219 ("y", 39, 46, "float.3", "rjust", None), 1220 ("z", 47, 54, "float.3", "rjust", None), 1221 ("occupancy", 55, 60, "float.2", "rjust", None), 1222 ("tempFactor", 61, 66, "float.2", "rjust", None), 1223 ("column6768", 67, 68, "string", "rjust", None), 1224 ("segID", 73, 76, "string", "rjust", None), 1225 ("element", 77, 78, "string", "rjust", None), 1226 ("charge", 79, 80, "string", "rjust", None)]
1227 1228
1229 -class ANISOU(PDBRecord):
1230 """The ANISOU records present the anisotropic temperature factors. 1231 Columns 7 - 27 and 73 - 80 are identical to the corresponding 1232 ATOM/HETATM record. 1233 """ 1234 __slots__ = [] 1235 1236 _name = "ANISOU" 1237 _field_list = [ 1238 ("serial", 7, 11, "integer", "rjust", None), 1239 ("name", 13, 16, "string", "ljust", ATOM_get_name), 1240 ("altLoc", 17, 17, "string", "rjust", None), 1241 ("resName", 18, 20, "string", "rjust", None), 1242 ("chainID", 22, 22, "string", "rjust", None), 1243 ("resSeq", 23, 26, "integer", "rjust", None), 1244 ("iCode", 27, 27, "string", "rjust", None), 1245 ("u[0][0]", 29, 35, "integer", "rjust", None), 1246 ("u[1][1]", 36, 42, "integer", "rjust", None), 1247 ("u[2][2]", 43, 49, "integer", "rjust", None), 1248 ("u[0][1]", 50, 56, "integer", "rjust", None), 1249 ("u[0][2]", 57, 63, "integer", "rjust", None), 1250 ("u[1][2]", 64, 70, "integer", "rjust", None), 1251 ("segID", 73, 76, "string", "rjust", None), 1252 ("element", 77, 78, "string", "rjust", None), 1253 ("charge", 79, 80, "string", "rjust", None)]
1254 1255
1256 -class HETATM(ATOM):
1257 """The HETATM records present the atomic coordinate records for atoms 1258 within "non-standard" groups. These records are used for water 1259 molecules and atoms presented in HET groups. 1260 """ 1261 __slots__ = [] 1262 1263 _name = "HETATM"
1264 1265
1266 -class SIGATM(PDBRecord):
1267 """The SIGATM records present the standard deviation 1268 of atomic parameters as they appear in ATOM and HETATM records. 1269 Columns 7 - 27 and 73 - 80 are identical to the corresponding 1270 ATOM/HETATM record. 1271 """ 1272 _name = "SIGATM" 1273 _field_list = [ 1274 ("serial", 7, 11, "integer", "rjust", None), 1275 ("name", 13, 16, "string", "ljust", ATOM_get_name), 1276 ("altLoc", 17, 17, "string", "rjust", None), 1277 ("resName", 18, 20, "string", "rjust", None), 1278 ("chainID", 22, 22, "string", "rjust", None), 1279 ("resSeq", 23, 26, "integer", "rjust", None), 1280 ("iCode", 27, 27, "string", "rjust", None), 1281 ("sigX", 31, 38, "float.3", "rjust", None), 1282 ("sigY", 39, 46, "float.3", "rjust", None), 1283 ("sigZ", 47, 54, "float.3", "rjust", None), 1284 ("sigOccupancy", 55, 60, "float.2", "rjust", None), 1285 ("sigTempFactor", 61, 66, "float.2", "rjust", None), 1286 ("segID", 73, 76, "string", "rjust", None), 1287 ("element", 77, 78, "string", "rjust", None), 1288 ("charge", 79, 80, "string", "rjust", None)]
1289 1290
1291 -class SIGUIJ(PDBRecord):
1292 """The SIGUIJ records present the standard deviations of anisotropic 1293 temperature factors scaled by a factor of 10**4 (Angstroms**2). 1294 Columns 7 - 27 and 73 - 80 are identical to the corresponding 1295 ATOM/HETATM record. 1296 """ 1297 __slots__ = [] 1298 1299 _name = "SIGUIJ" 1300 _field_list = [ 1301 ("serial", 7, 11, "integer", "rjust", None), 1302 ("name", 13, 16, "string", "ljust", ATOM_get_name), 1303 ("altLoc", 17, 17, "string", "rjust", None), 1304 ("resName", 18, 20, "string","rjust", None), 1305 ("chainID", 22, 22, "string", "rjust", None), 1306 ("resSeq", 23, 26, "integer", "rjust", None), 1307 ("iCode", 27, 27, "string", "rjust", None), 1308 ("sig[1][1]", 29, 35, "integer", "rjust", None), 1309 ("sig[2][2]", 36, 42, "integer", "rjust", None), 1310 ("sig[3][3]", 43, 49, "integer", "rjust", None), 1311 ("sig[1][2]", 50, 56, "integer", "rjust", None), 1312 ("sig[1][3]", 57, 63, "integer", "rjust", None), 1313 ("sig[2][3]", 64, 70, "integer", "rjust", None), 1314 ("segID", 73, 76, "string", "rjust", None), 1315 ("element", 77, 78, "string", "rjust", None), 1316 ("charge", 79, 80, "string", "rjust", None)]
1317 1318
1319 -class TER(PDBRecord):
1320 """The TER record indicates the end of a list of ATOM/HETATM records 1321 for a chain. 1322 """ 1323 __slots__ = [] 1324 1325 _name = "TER " 1326 _field_list = [ 1327 ("serial", 7, 11, "integer", "rjust", None), 1328 ("resName", 18, 20, "string", "rjust", None), 1329 ("chainID", 22, 22, "string", "rjust", None), 1330 ("resSeq", 23, 26, "integer", "rjust", None), 1331 ("iCode", 27, 27, "string", "rjust", None)]
1332 1333
1334 -class ENDMDL(PDBRecord):
1335 """The ENDMDL records are paired with MODEL records to group individual 1336 structures found in a coordinate entry. 1337 """ 1338 __slots__ = [] 1339 1340 _name = "ENDMDL" 1341 _field_list = []
1342 1343 1344 ## SECTION 10: Connectivity Section
1345 -class CONECT(PDBRecord):
1346 """The CONECT records specify connectivity between atoms for which 1347 coordinates are supplied. The connectivity is described using the 1348 atom serial number as found in the entry. CONECT records are 1349 mandatory for HET groups (excluding water) and for other bonds not 1350 specified in the standard residue connectivity table which involve 1351 atoms in standard residues (see Appendix 4 for the list of standard 1352 residues). These records are generated by the PDB. 1353 """ 1354 __slots__ = [] 1355 1356 _name = "CONECT" 1357 _field_list = [ 1358 ("serial", 7, 11, "integer", "rjust", None), 1359 ("serialBond1", 12, 16, "integer", "rjust", None), 1360 ("serialBond2", 17, 21, "integer", "rjust", None), 1361 ("serialBond3", 22, 26, "integer", "rjust", None), 1362 ("serialBond4", 27, 31, "integer", "rjust", None), 1363 ("serialHydBond1", 32, 36, "integer", "rjust", None), 1364 ("serialHydBond2", 37, 41, "integer", "rjust", None), 1365 ("serialSaltBond1", 42, 46, "integer", "rjust", None), 1366 ("serialHydBond3", 47, 51, "integer", "rjust", None), 1367 ("serialHydBond4", 52, 56, "integer", "rjust", None), 1368 ("serialSaltBond2", 57, 61, "integer", "rjust", None)]
1369 1370 1371 ## SECTION 11: Bookkeeping Section
1372 -class MASTER(PDBRecord):
1373 """The MASTER record is a control record for bookkeeping. It lists the 1374 number of lines in the coordinate entry or file for selected record 1375 types. 1376 """ 1377 __slots__ = [] 1378 1379 _name = "MASTER" 1380 _field_list = [ 1381 ("numRemark", 11, 15, "integer", "rjust", None), 1382 ("O", 16, 20, "integer", "rjust", None), 1383 ("numHet", 21, 25, "integer", "rjust", None), 1384 ("numHelix", 26, 30, "integer", "rjust", None), 1385 ("numSheet", 31, 35, "integer", "rjust", None), 1386 ("numTurn", 36, 40, "integer", "rjust", None), 1387 ("numSite", 41, 45, "integer", "rjust", None), 1388 ("numXForm", 46, 50, "integer", "rjust", None), 1389 ("numCoord", 51, 55, "integer", "rjust", None), 1390 ("numTer", 56, 60, "integer", "rjust", None), 1391 ("numConect", 61, 65, "integer", "rjust", None), 1392 ("numSeq", 66, 70, "integer", "rjust", None)]
1393 1394
1395 -class END(PDBRecord):
1396 """The END record marks the end of the PDB file. 1397 """ 1398 __slots__ = [] 1399 1400 _name = "END " 1401 _field_list = []
1402 1403 1404 ## PDB Record Name -> Record Class Map 1405 PDBRecordMap = { 1406 HEADER._name : HEADER, 1407 OBSLTE._name : OBSLTE, 1408 TITLE._name : TITLE, 1409 CAVEAT._name : CAVEAT, 1410 COMPND._name : COMPND, 1411 SOURCE._name : SOURCE, 1412 KEYWDS._name : KEYWDS, 1413 EXPDTA._name : EXPDTA, 1414 AUTHOR._name : AUTHOR, 1415 REVDAT._name : REVDAT, 1416 SPRSDE._name : SPRSDE, 1417 JRNL._name : JRNL, 1418 REMARK._name : REMARK, 1419 DBREF._name : DBREF, 1420 SEQADV._name : SEQADV, 1421 SEQRES._name : SEQRES, 1422 MODRES._name : MODRES, 1423 HET._name : HET, 1424 HETNAM._name : HETNAM, 1425 HETSYN._name : HETSYN, 1426 FORMUL._name : FORMUL, 1427 HELIX._name : HELIX, 1428 SHEET._name : SHEET, 1429 TURN._name : TURN, 1430 SSBOND._name : SSBOND, 1431 LINK._name : LINK, 1432 HYDBND._name : HYDBND, 1433 SLTBRG._name : SLTBRG, 1434 CISPEP._name : CISPEP, 1435 SITE._name : SITE, 1436 CRYST1._name : CRYST1, 1437 CRYST2._name : CRYST2, 1438 CRYST3._name : CRYST3, 1439 ORIGX1._name : ORIGX1, 1440 ORIGX2._name : ORIGX2, 1441 ORIGX3._name : ORIGX3, 1442 SCALE1._name : SCALE1, 1443 SCALE2._name : SCALE2, 1444 SCALE3._name : SCALE3, 1445 MTRIX1._name : MTRIX1, 1446 MTRIX2._name : MTRIX2, 1447 MTRIX3._name : MTRIX3, 1448 MODEL._name : MODEL, 1449 ATOM._name : ATOM, 1450 ANISOU._name : ANISOU, 1451 HETATM._name : HETATM, 1452 SIGATM._name : SIGATM, 1453 SIGUIJ._name : SIGUIJ, 1454 TER._name : TER, 1455 ENDMDL._name : ENDMDL, 1456 CONECT._name : CONECT, 1457 MASTER._name : MASTER, 1458 END._name : END } 1459 1460 ## this list defines the order the records have to appear in the PDB 1461 ## file; there is also an indicator if the record is optional or mandatory 1462 PDBRecordOrder = [ 1463 (HEADER._name, HEADER, "mandatory"), 1464 (OBSLTE._name, OBSLTE, "optional"), 1465 (TITLE._name, TITLE, "mandatory"), 1466 (CAVEAT._name, CAVEAT, "optional"), 1467 (COMPND._name, COMPND, "mandatory"), 1468 (SOURCE._name, SOURCE, "mandatory"), 1469 (KEYWDS._name, KEYWDS, "mandatory"), 1470 (EXPDTA._name, EXPDTA, "mandatory"), 1471 (AUTHOR._name, AUTHOR, "mandatory"), 1472 (REVDAT._name, REVDAT, "mandatory"), 1473 (SPRSDE._name, SPRSDE, "optional"), 1474 (JRNL._name, JRNL, "optional"), 1475 (REMARK._name, REMARK, "optional"), 1476 (DBREF._name, DBREF, "optional"), 1477 (SEQADV._name, SEQADV, "optional"), 1478 (SEQRES._name, SEQRES, "optional"), 1479 (MODRES._name, MODRES, "optional"), 1480 (HET._name, HET, "optional"), 1481 (HETNAM._name, HETNAM, "optional"), 1482 (HETSYN._name, HETSYN, "optional"), 1483 (FORMUL._name, FORMUL, "optional"), 1484 (HELIX._name, HELIX, "optional"), 1485 (SHEET._name, SHEET, "optional"), 1486 (TURN._name, TURN, "optional"), 1487 (SSBOND._name, SSBOND, "optional"), 1488 (LINK._name, LINK, "optional"), 1489 (HYDBND._name, HYDBND, "optional"), 1490 (SLTBRG._name, SLTBRG, "optional"), 1491 (CISPEP._name, CISPEP, "optional"), 1492 (SITE._name, SITE, "optional"), 1493 (CRYST1._name, CRYST1, "mandatory"), 1494 (ORIGX1._name, ORIGX1, "mandatory"), 1495 (ORIGX2._name, ORIGX2, "mandatory"), 1496 (ORIGX3._name, ORIGX3, "mandatory"), 1497 (SCALE1._name, SCALE1, "mandatory"), 1498 (SCALE2._name, SCALE2, "mandatory"), 1499 (SCALE3._name, SCALE3, "mandatory"), 1500 (MTRIX1._name, MTRIX1, "optional"), 1501 (MTRIX2._name, MTRIX2, "optional"), 1502 (MTRIX3._name, MTRIX3, "optional"), 1503 (TVECT._name, TVECT, "optional"), 1504 (MODEL._name, MODEL, "optional"), 1505 (ATOM._name, ATOM, "optional"), 1506 (SIGATM._name, SIGATM, "optional"), 1507 (ANISOU._name, ANISOU, "optional"), 1508 (SIGUIJ._name, SIGUIJ, "optional"), 1509 (TER._name, TER, "optional"), 1510 (HETATM._name, HETATM, "optional"), 1511 (ENDMDL._name, ENDMDL, "optional"), 1512 (CONECT._name, CONECT, "optional"), 1513 (MASTER._name, MASTER, "mandatory"), 1514 (END._name, END, "mandatory") 1515 ] 1516 1517 1518 ## END PDB RECORD DEFINITIONS 1519 ############################################################################### 1520
1521 -def iter_pdb_records(iterable):
1522 """Reads a sequence of PDB lines from iterable sequence and converts 1523 them to the correct PDB record objects, then yields them. 1524 """ 1525 iterable = iter(iterable) 1526 for ln in iterable: 1527 ## find the record data element for the given line 1528 ln = ln.rstrip() 1529 rname = ln[:6].ljust(6) 1530 1531 try: 1532 pdb_record_class = PDBRecordMap[rname] 1533 except KeyError: 1534 continue 1535 1536 ## create/add/parse the record 1537 pdb_record = pdb_record_class() 1538 pdb_record.read(ln) 1539 yield pdb_record
1540 1541
1542 -class PDBFile(list):
1543 """Class for managing a PDB file. This class inherits from a Python 1544 list object, and contains a list of PDBRecord objects. 1545 Load, save, edit, and create PDB files with this class. 1546 """
1547 - def __setattr__(self, i, rec):
1548 assert isinstance(rec, PDBRecord) 1549 list.__setattr__(self, i, rec)
1550
1551 - def append(self, rec):
1552 assert isinstance(rec, PDBRecord) 1553 list.append(self, rec)
1554
1555 - def insert(self, i, rec):
1556 assert isinstance(rec, PDBRecord) 1557 list.insert(self, i, rec)
1558
1559 - def load_file(self, fil):
1560 """Loads a PDB file from File object fil. 1561 """ 1562 if isinstance(fil, str): 1563 fileobj = open(fil, "r") 1564 else: 1565 fileobj = fil 1566 1567 fileiter = iter(fileobj) 1568 for pdb_record in iter_pdb_records(fileiter): 1569 self.append(pdb_record)
1570
1571 - def save_file(self, fil):
1572 """Saves the PDBFile object in PDB file format to File object fil. 1573 """ 1574 if isinstance(fil, str): 1575 fileobj = open(fil, "w") 1576 else: 1577 fileobj = fil 1578 1579 for pdb_record in self: 1580 fileobj.write(str(pdb_record)) 1581 fileobj.write("\n") 1582 1583 fil.flush()
1584 1585
1586 -class RecordProcessor(object):
1587 """ 1588 """
1589 - def __is_sucsessive_record(self, prev_rec, rec):
1590 """Returns True if the current record looks like it is the successive 1591 PDB record in a list of records. Fields like continuation and serNum 1592 are checked, as well as record name. 1593 """ 1594 ## check record names 1595 if rec._name != prev_rec._name: 1596 return False 1597 1598 ## NOTE: perhaps record type specific handlers could be put 1599 ## here to catch common mistakes which are found in PDB 1600 ## files 1601 1602 ## check for "continuation" field continuous records 1603 if prev_rec.has_key("continuation") or rec.has_key("continuation"): 1604 prev_continuation = prev_rec.get("continuation", 1) 1605 continuation = rec.get("continuation", 1) 1606 1607 if (prev_continuation + 1) == continuation: 1608 return True 1609 else: 1610 return False 1611 1612 ## check for "serNum" continuations 1613 if prev_rec.has_key("serNum") or rec.has_key("serNum"): 1614 prev_serial = prev_rec.get("serNum", 0) 1615 serial = rec.get("serNum", 0) 1616 1617 if (prev_serial + 1) == serial: 1618 return True 1619 else: 1620 return False 1621 1622 return False
1623
1624 - def __call_processor_multi(self, record_list):
1625 """Invake callbacks expecting a list of related PDB records. 1626 """ 1627 rec = record_list[0] 1628 1629 ## form method names to search for 1630 name = rec.__class__.__name__ 1631 raw_process_method_symbol = "process_%s" % (name) 1632 process_method_symbol = "preprocess_%s" % (name) 1633 1634 ## call process handler for records 1635 if hasattr(self, raw_process_method_symbol): 1636 getattr(self, raw_process_method_symbol)(record_list) 1637 else: 1638 self.process_default(record_list) 1639 1640 ## call preprocessor and processor for records 1641 if hasattr(rec, "process"): 1642 presult = getattr(rec, "process")(record_list) 1643 if hasattr(self, process_method_symbol): 1644 getattr(self, process_method_symbol)(presult) 1645 else: 1646 self.preprocess_default(presult)
1647
1648 - def __call_processor(self, rec):
1649 """Invoke callbacks on self.processor for the given record list (recs). 1650 """ 1651 ## form method names to search for 1652 name = rec.__class__.__name__ 1653 raw_process_method_symbol = "process_%s" % (name) 1654 process_method_symbol = "preprocess_%s" % (name) 1655 1656 ## call process handler for records 1657 if hasattr(self, raw_process_method_symbol): 1658 getattr(self, raw_process_method_symbol)(rec) 1659 else: 1660 self.process_default(rec) 1661 1662 ## call preprocessor and processor for records 1663 if hasattr(rec, "process"): 1664 presult = getattr(rec, "process")(rec) 1665 if hasattr(self, process_method_symbol): 1666 getattr(self, process_method_symbol)(presult) 1667 else: 1668 self.preprocess_default(presult)
1669
1670 - def process_pdb_records(self, pdb_rec_iter, filter_func = None):
1671 """Iterates the PDB records in self, and searches for handling 1672 methods in the processor object for reading the objects. There 1673 are several choices for methods names for the processor objects. 1674 """ 1675 record_list = None 1676 prev_rec = None 1677 1678 for rec in pdb_rec_iter: 1679 if prev_rec is not None: 1680 if self.__is_sucsessive_record(prev_rec, rec): 1681 record_list.append(rec) 1682 prev_rec = rec 1683 continue 1684 1685 self.__call_processor_multi(record_list) 1686 record_list = None 1687 prev_rec = None 1688 1689 if filter_func and filter_func(rec) is False: 1690 continue 1691 1692 if isinstance(rec, ATOM): 1693 self.process_ATOM(rec) 1694 elif hasattr(rec, "_multi_record"): 1695 record_list = [rec] 1696 prev_rec = rec 1697 else: 1698 self.__call_processor(rec) 1699 1700 if prev_rec: 1701 self.__call_processor_multi(record_list)
1702
1703 - def process_default(self, rec):
1704 pass
1705
1706 - def preprocess_default(self, rec):
1707 pass
1708
1709 - def process_ATOM(self, rec):
1710 self.process_default(rec)
1711 1712 1713 ### <testing>
1714 -def test_module():
1715 import sys 1716 try: 1717 path = sys.argv[1] 1718 except IndexError: 1719 print "usage: PDB.py <PDB file path>" 1720 raise SystemExit 1721 pdbfil = PDBFile() 1722 pdbfil.load_file(path) 1723 pdbfil.save_file(sys.stdout)
1724 1725 if __name__ == "__main__": 1726 test_module() 1727 ### </testing> 1728