1
2
3
4
5 """Brookhaven PDB v2.2 file parser. All records in the PDB v2.2
6 specification have corresponding classes defined here. PDB files are
7 loaded into a list of these cassed, and also can be constrcted/modified
8 and written back out as PDB files.
9 """
10 from __future__ import generators
11 import fpformat
12
13
18
19
28
29
31 """Base class for all PDB file records.
32 """
33 _name = None
34 _field_list = None
35
38
40 """Return a properly formed PDB record string from the instance
41 dictionary values.
42 """
43 ln = self._name
44
45 for (field, start, end, ftype, just, get_func) in self._field_list:
46
47 try:
48 assert len(ln) <= (start - 1)
49 except AssertionError:
50 print "[ASSERT] "+ln
51 raise
52
53
54 ln = ln.ljust(start - 1)
55
56
57 field_char_len = end - start + 1
58
59
60
61
62 if get_func:
63 ln += get_func(self)
64 continue
65
66
67 s = self.get(field, "")
68
69
70 if s is None or s == "":
71 ln += " " * field_char_len
72 continue
73
74
75 if ftype.startswith("string"):
76 pass
77
78 elif ftype.startswith("integer"):
79 s = str(s)
80
81 elif ftype.startswith("float"):
82 try:
83 s = fpformat.fix(s, int(ftype[6]))
84 except ValueError:
85 raise PDBValueError("field=%s %s not float" % (field, s))
86
87
88 try:
89 assert isinstance(s, str)
90 except AssertionError:
91 print "### s",str(type(s)), str(s), ftype, field
92 print ln
93 raise
94
95
96 if len(s) > field_char_len:
97 ln += s[:field_char_len]
98 else:
99 if just.startswith("ljust"):
100 ln += s.ljust(field_char_len)
101 else:
102 ln += s.rjust(field_char_len)
103
104 return ln
105
106 - def read(self, line):
107 """Read the PDB record line and convert the fields to the appropriate
108 dictionary values for this class.
109 """
110 for (field, start, end, ftype, just, get_func) in self._field_list:
111 s = line[start-1:end]
112
113
114 if s == "" or s.isspace():
115 continue
116
117 elif ftype.startswith("string"):
118 if just.endswith("lstrip"):
119 s = s.lstrip()
120 elif just.endswith("rstrip"):
121 s = s.rstrip()
122 else:
123 s = s.strip()
124
125 elif ftype.startswith("integer"):
126 try:
127 s = int(s)
128 except ValueError:
129 continue
130
131 elif ftype.startswith("float"):
132 try:
133 s = float(s)
134 except ValueError:
135 continue
136
137 self[field] = s
138
139 - def reccat(self, rec_list, field):
140 """Return the concatenation of field in all the records in rec_list.
141 """
142 if not isinstance(rec_list, list):
143 rec_list = [rec_list]
144
145 retval = ""
146 for rec in rec_list:
147 x = rec.get(field)
148 if x is not None:
149 retval += x
150 return retval
151
153 """Call reccat, then split the result by the separator.
154 """
155 listx = self.reccat(rec_list, field).split(sep)
156 listx = [x.strip() for x in listx]
157 return listx
158
160 """Call reccat_list with sep1 as the list separator, then split
161 the items into tuples by sep2.
162 """
163 listx = []
164 for x in self.reccat_list(rec_list, field, sep1):
165 i = x.find(sep2)
166 if i == -1:
167 continue
168 key = x[:i].strip()
169 val = x[i+1:].strip()
170 listx.append((key, val))
171 return listx
172
174 listx = []
175 dictx = {}
176 for (key, val) in self.reccat_tuplelist(rec_list, field, ";", ":"):
177 if key == master_key:
178 if dictx:
179 listx.append(dictx)
180 dictx = {}
181 dictx[key] = val
182 if dictx:
183 listx.append(dictx)
184 return listx
185
186 - def reccat_multi(self, rec_list, primary_key, translations):
187 """Create a list of dictionaries from a list of records. This
188 method has complex behavior to support translations of several
189 PDB records into a Python format. The primary key is used to
190 seperate the dictionaries within the list, and the translation
191 argument is a list of strings or 2-tuples. If the translation is a
192 string, the value from the PDB record field is copied to the return
193 dictionary. If the field is a 2-tuple==t, then t[0] is the return
194 dictionary key whose value is a list formed from the list of
195 PDB fields in t[1].
196 """
197 if not isinstance(rec_list, list):
198 rec_list = [rec_list]
199
200 listx = []
201 for rec in rec_list:
202
203
204 try:
205 pkey = rec[primary_key]
206 except KeyError:
207
208
209
210 try:
211 pkey = listx[-1][primary_key]
212 except KeyError:
213 continue
214 except IndexError:
215 continue
216
217
218 dictx = None
219 for dx in listx:
220 if dx[primary_key] == pkey:
221 dictx = dx
222 break
223
224
225 if dictx is None:
226 dictx = {primary_key: pkey}
227 listx.append(dictx)
228
229
230 for trans in translations:
231
232
233
234 if isinstance(trans, tuple):
235 (dest, srcs) = trans
236
237 for sx in srcs:
238 if dictx.has_key(dest):
239 try:
240 dictx[dest].append(rec[sx])
241 except KeyError:
242 pass
243 else:
244 try:
245 dictx[dest] = [rec[sx]]
246 except KeyError:
247 pass
248
249
250
251 else:
252 try:
253 dictx[trans] = rec[trans]
254 except KeyError:
255 pass
256
257 return listx
258
259
260
261
262
263
265 """This section contains records used to describe the experiment and the
266 biological macromolecules present in the entry: HEADER, OBSLTE, TITLE,
267 CAVEAT, COMPND, SOURCE, KEYWDS, EXPDTA, AUTHOR, REVDAT, SPRSDE, JRNL,
268 and REMARK records.
269 """
270 __slots__ = []
271
272 _name = "HEADER"
273 _field_list = [
274 ("classification", 11, 50, "string", "rjust", None),
275 ("depDate", 51, 59, "string", "rjust", None),
276 ("idCode", 63, 66, "string", "rjust", None)]
277
278
280 """OBSLTE appears in entries which have been withdrawn from distribution.
281 This record acts as a flag in an entry which has been withdrawn from the
282 PDB's full release. It indicates which, if any, new entries have replaced
283 the withdrawn entry. The format allows for the case of multiple new
284 entries replacing one existing entry.
285 """
286 __slots__ = []
287
288 _name = "OBSLTE"
289 _multi_record = "continuation"
290 _field_list = [
291 ("continuation", 9, 10, "integer", "rjust", None),
292 ("repDate", 12, 20, "string", "rjust", None),
293 ("idCode", 22, 25, "string", "rjust", None),
294 ("rIdCode1", 32, 35, "string", "rjust", None),
295 ("rIdCode2", 37, 40, "string", "rjust", None),
296 ("rIdCode3", 42, 45, "string", "rjust", None),
297 ("rIdCode4", 47, 50, "string", "rjust", None),
298 ("rIdCode5", 52, 55, "string", "rjust", None),
299 ("rIdCode6", 57, 60, "string", "rjust", None),
300 ("rIdCode7", 62, 65, "string", "rjust", None),
301 ("rIdCode8", 67, 70, "string", "rjust", None)]
302
304 """Processes continued record list to a list of dictionary objects.
305 Each dictionary contains the data from one OBSLTE idCode.
306 """
307 return self.reccat_multi(
308 recs, "idCode",
309 ["repDate",
310 ("rIdCodes", ["rIdCode1", "rIdCode2", "rIdCode3", "rIdCode4",
311 "rIdCode5", "rIdCode6", "rIdCode7", "rIdCode8"])])
312
313
315 """The TITLE record contains a title for the experiment or analysis that is
316 represented in the entry. It should identify an entry in the PDB in the
317 same way that a title identifies a paper.
318 """
319 __slots__ = []
320
321 _name = "TITLE "
322 _multi_record = "continuation"
323 _field_list = [
324 ("continuation", 9, 10, "integer", "rjust", None),
325 ("title", 11, 70, "string", "ljust", None)]
326
328 return self.reccat(recs, "title")
329
330
332 """CAVEAT warns of severe errors in an entry. Use caution when using an
333 entry containing this record.
334 """
335 __slots__ = []
336
337 _name = "CAVEAT"
338 _multi_record = "continuation"
339 _field_list = [
340 ("continuation", 9, 10, "integer", "rjust", None),
341 ("idCode", 12, 15, "string", "rjust", None),
342 ("comment", 20, 70, "string", "ljust", None)]
343
345 """Returns a list of dictionaries with keys idCode and comment.
346 """
347 cavet_list = []
348 for rec in recs:
349 idCode = rec.get("idCode")
350 if idCode is None:
351 continue
352
353
354 cav = None
355 for cavx in cavet_list:
356 if cavx.get("idCode") == idCode:
357 cav = cavx
358 break
359
360
361 if cav is None:
362 cav = {"idCode" : idCode}
363 cavet_list.append(cav)
364
365
366 comment = rec.get("comment")
367 if comment is not None:
368 if cav.has_key("comment"):
369 cav["comment"] += comment
370 else:
371 cav["comment"] = comment
372 return cavet_list
373
374
376 """The COMPND record describes the macromolecular contents of an entry.
377 Each macromolecule found in the entry is described by a set of token: value
378 pairs, and is referred to as a COMPND record component. Since the concept
379 of a molecule is difficult to specify exactly, PDB staff may exercise
380 editorial judgment in consultation with depositors in assigning these
381 names. For each macromolecular component, the molecule name, synonyms,
382 number assigned by the Enzyme Commission (EC), and other relevant details
383 are specified.
384 """
385 __slots__ = []
386
387 _name = "COMPND"
388 _multi_record = "continuation"
389 _field_list = [
390 ("continuation", 9, 10, "integer", "rjust", None),
391 ("compound", 11, 70, "string", "ljust", None)]
392
395
396
398 """The SOURCE record specifies the biological and/or chemical source of
399 each biological molecule in the entry. Sources are described by both the
400 common name and the scientific name, e.g., genus and species. Strain and/or
401 cell-line for immortalized cells are given when they help to uniquely
402 identify the biological entity studied.
403 """
404 __slots__ = []
405
406 _name = "SOURCE"
407 _multi_record = "continuation"
408 _field_list = [
409 ("continuation", 9, 10, "integer", "rjust", None),
410 ("srcName", 11, 70, "string", "ljust", None)]
411
414
415
417 """The KEYWDS record contains a set of terms relevant to the entry. Terms
418 in the KEYWDS record provide a simple means of categorizing entries and may
419 be used to generate index files. This record addresses some of the
420 limitations found in the classification field of the HEADER record. It
421 provides the opportunity to add further annotation to the entry in a
422 concise and computer-searchable fashion.
423 """
424 __slots__ = []
425
426 _name = "KEYWDS"
427 _multi_record = "continuation"
428 _field_list = [
429 ("continuation", 9, 10, "integer", "rjust", None),
430 ("keywds", 11, 70, "string", "ljust", None)]
431
434
435
437 """The EXPDTA record presents information about the experiment. The EXPDTA
438 record identifies the experimental technique used. This may refer to the
439 type of radiation and sample, or include the spectroscopic or modeling
440 technique. Permitted values include:
441 ELECTRON DIFFRACTION
442 FIBER DIFFRACTION
443 FLUORESCENCE TRANSFER
444 NEUTRON DIFFRACTION
445 NMR
446 THEORETICAL MODEL
447 X-RAY DIFFRACTION
448 """
449 __slots__ = []
450
451 _name = "EXPDTA"
452 _multi_record = "continuation"
453 _field_list = [
454 ("continuation", 9, 10, "integer", "rjust", None),
455 ("technique", 11, 70, "string", "ljust", None)]
456 _technique_list = [
457 "ELECTRON DIFFRACTION",
458 "FIBER DIFFRACTION",
459 "FLUORESCENCE TRANSFER",
460 "NEUTRON DIFFRACTION",
461 "NMR",
462 "THEORETICAL MODEL",
463 "X-RAY DIFFRACTION"]
464
466 """Returns a list of 2-tuples: (technique, comment) where technique
467 is one of the accepted techniques.
468 """
469 expdta_list = []
470
471 for item in self.reccat_list(recs, "technique", ";"):
472 tech = None
473 cmnt = None
474
475 for techx in self._technique_list:
476 if item.startswith(techx):
477 tech = techx
478 cmnt = item[len(techx):].strip() or None
479 break
480
481 if tech is not None:
482 expdta_list.append((tech, cmnt))
483
484 return expdta_list
485
486
488 """The AUTHOR record contains the names of the people responsible for the
489 contents of the entry.
490 """
491 __slots__ = []
492
493 _name = "AUTHOR"
494 _multi_record = "continuation"
495 _field_list = [
496 ("continuation", 9, 10, "integer", "rjust", None),
497 ("authorList", 11, 70, "string", "ljust", None)]
498
501
502
504 """REVDAT records contain a history of the modifications made to an entry
505 since its release.
506 """
507 __slots__ = []
508
509 _name = "REVDAT"
510 _multi_record = "continuation"
511 _field_list = [
512 ("modNum", 8, 10, "integer", "rjust", None),
513 ("continuation", 11, 12, "integer", "rjust", None),
514 ("modDate", 14, 22, "string", "rjust", None),
515 ("modID", 24, 28, "string", "rjust", None),
516 ("modType", 32, 32, "integer", "rjust", None),
517 ("record1", 40, 45, "string", "ljust", None),
518 ("record2", 47, 52, "string", "ljust", None),
519 ("record3", 54, 59, "string", "ljust", None),
520 ("record4", 61, 66, "string", "ljust", None)]
521
523 return self.reccat_multi(
524 recs, "modNum",
525 ["modDate",
526 "modID",
527 "modType",
528 ("records", ["record1", "record2", "record3", "record4"])])
529
530
532 """The SPRSDE records contain a list of the ID codes of entries that were
533 made obsolete by the given coordinate entry and withdrawn from the PDB
534 release set. One entry may replace many. It is PDB policy that only the
535 principal investigator of a structure has the authority to withdraw it.
536 """
537 __slots__ = []
538
539 _name = "SPRSDE"
540 _multi_record = "continuation"
541 _field_list = [
542 ("continuation", 9, 10, "integer", "rjust", None),
543 ("sprsdeDate", 12, 20, "string", "rjust", None),
544 ("idCode", 22, 25, "string", "rjust", None),
545 ("sIdCode1", 32, 35, "string", "rjust", None),
546 ("sIdCode2", 37, 40, "string", "rjust", None),
547 ("sIdCode3", 42, 45, "string", "rjust", None),
548 ("sIdCode4", 47, 50, "string", "rjust", None),
549 ("sIdCode5", 52, 55, "string", "rjust", None),
550 ("sIdCode6", 57, 60, "string", "rjust", None),
551 ("sIdCode7", 62, 65, "string", "rjust", None),
552 ("sIdCode8", 67, 70, "string", "rjust", None)]
553
555 return self.reccat_multi(
556 recs, "idCode",
557 ["sprsdeDate",
558 ("sIdCodes", ["sIdCode1", "sIdCode2", "sIdCode3", "sIdCode4",
559 "sIdCode5", "sIdCode6", "sIdCode7", "sIdCode8"])])
560
561
562 -class JRNL(PDBRecord):
563 """The JRNL record contains the primary literature citation that describes
564 the experiment which resulted in the deposited coordinate set. There is at
565 most one JRNL reference per entry. If there is no primary reference, then
566 there is no JRNL reference. Other references are given in REMARK 1.
567 """
568 __slots__ = []
569
570 _name = "JRNL "
571 _field_list = [
572 ("text", 13, 70, "string", "ljust", None)]
573
574
576 """REMARK records present experimental details, annotations, comments, and
577 information not included in other records. In a number of cases, REMARKs
578 are used to expand the contents of other record types. A new level of
579 structure is being used for some REMARK records. This is expected to
580 facilitate searching and will assist in the conversion to a relational
581 database.
582 """
583 __slots__ = []
584
585 _name = "REMARK"
586 _field_list = [
587 ("remarkNum", 8, 10, "integer", "rjust", None),
588 ("text", 12, 70, "string", "ljust", None)]
589
590
591
593 """ The DBREF record provides cross-reference links between PDB sequences
594 and the corresponding database entry or entries. A cross reference to
595 the sequence database is mandatory for each peptide chain with a length
596 greater than ten (10) residues. For nucleic acid entries a DBREF
597 record pointing to the Nucleic Acid Database (NDB) is mandatory when
598 the corresponding entry exists in NDB.
599 """
600 __slots__ = []
601
602 _name = "DBREF "
603 _field_list = [
604 ("idCode", 8, 11, "string", "rjust", None),
605 ("chain_ID", 13, 13, "string", "rjust", None),
606 ("seqBegin", 15, 18, "integer", "rjust", None),
607 ("insertBegin", 19, 19, "string", "rjust", None),
608 ("seqEnd", 21, 24, "integer", "rjust", None),
609 ("insertEnd", 25, 25, "string", "rjust", None),
610 ("database", 27, 32, "string", "ljust", None),
611 ("dbAccession", 34, 41, "string", "ljust", None),
612 ("dbIdCode", 43, 54, "string", "ljust", None),
613 ("dbseqBegin", 56, 60, "integer", "rjust", None),
614 ("idbnsBeg", 61, 61, "string", "rjust", None),
615 ("dbseqEnd", 63, 67, "integer", "rjust", None),
616 ("dbinsEnd", 68, 68, "string", "rjust", None)]
617
618
620 """The SEQADV record identifies conflicts between sequence information
621 in the ATOM records of the PDB entry and the sequence database entry
622 given on DBREF. Please note that these records were designed to
623 identify differences and not errors. No assumption is made as to which
624 database contains the correct data. PDB may include REMARK records in
625 the entry that reflect the depositor's view of which database has the
626 correct sequence.
627 """
628 __slots__ = []
629
630 _name = "SEQADV"
631 _field_list = [
632 ("idCode", 8, 11, "string", "rjust", None),
633 ("resName", 13, 15, "string", "rjust", None),
634 ("chainID", 17, 17, "string", "rjust", None),
635 ("seqNum", 19, 22, "integer", "rjust", None),
636 ("iCode", 23, 23, "string", "rjust", None),
637 ("database", 25, 28, "string", "ljust", None),
638 ("dbIDCode", 30, 38, "string", "ljust", None),
639 ("dbRes", 40, 42, "string", "rjust", None),
640 ("dbSeq", 44, 48, "integer", "rjust", None),
641 ("convlict", 50, 70, "string", "ljust", None)]
642
643
645 """The SEQRES records contain the amino acid or nucleic acid sequence of
646 residues in each chain of the macromolecule that was studied.
647 """
648 __slots__ = []
649
650 _name = "SEQRES"
651 _multi_record = "serNum"
652 _field_list = [
653 ("serNum", 9, 10, "integer", "rjust", None),
654 ("chainID", 12, 12, "string", "rjust", None),
655 ("numRes", 14, 17, "integer", "rjust", None),
656 ("resName1", 20, 22, "string", "rjust", None),
657 ("resName2", 24, 26, "string", "rjust", None),
658 ("resName3", 28, 30, "string", "rjust", None),
659 ("resName4", 32, 34, "string", "rjust", None),
660 ("resName5", 36, 38, "string", "rjust", None),
661 ("resName6", 40, 42, "string", "rjust", None),
662 ("resName7", 44, 46, "string", "rjust", None),
663 ("resName8", 48, 50, "string", "rjust", None),
664 ("resName9", 52, 54, "string", "rjust", None),
665 ("resName10", 56, 58, "string", "rjust", None),
666 ("resName11", 60, 62, "string", "rjust", None),
667 ("resName12", 64, 66, "string", "rjust", None),
668 ("resName13", 68, 70, "string", "rjust", None)]
669
671 """Returns a dictionary with attributes chain_id, num_res, and
672 sequence_list
673 """
674 seqres = {}
675
676 for rec in recs:
677 seqres["chain_id"] = rec.get("chainID", "")
678 seqres["num_res"] = rec.get("numRes", 0)
679
680 for field in ["resName1","resName2","resName3","resName4",
681 "resName5","resName6","resName7","resName8",
682 "resName9","resName10","resName11","resName12",
683 "resName13"]:
684 try:
685 value = rec[field]
686 except KeyError:
687 continue
688 try:
689 seqres["sequence_list"].append(value)
690 except KeyError:
691 seqres["sequence_list"] = [value]
692
693 return seqres
694
695
697 """The MODRES record provides descriptions of modifications (e.g.,
698 chemical or post-translational) to protein and nucleic acid residues.
699 Included are a mapping between residue names given in a PDB entry and
700 standard residues.
701 """
702 __slots__ = []
703
704 _name = "MODRES"
705 _field_list = [
706 ("idCode", 8, 11, "string", "rjust", None),
707 ("resName", 13, 15, "string", "rjust", None),
708 ("chainID", 17, 17, "string", "rjust", None),
709 ("seqNum", 19, 22, "integer", "rjust", None),
710 ("iCode", 23, 23, "string", "rjust", None),
711 ("stdRes", 25, 27, "string", "rjust", None),
712 ("comment", 30, 70, "string", "ljust", None)]
713
714
715
716 -class HET(PDBRecord):
717 """The HET records are used to describe non-standard residues, such as
718 prosthetic groups, inhibitors, solvent molecules, and ions for
719 which coordinates are supplied. Groups are considered HET if they are:
720 - not one of the standard amino acids, and
721 - not one of the nucleic acids (C, G, A, T, U, and I), and
722 - not one of the modified versions of nucleic acids (+C, +G, +A,
723 +T, +U, and +I), and
724 - not an unknown amino acid or nucleic acid where UNK is used to
725 indicate the unknown residue name.
726 Het records also describe heterogens for which the chemical identity
727 is unknown, in which case the group is assigned the hetID UNK.
728 """
729 __slots__ = []
730
731 _name = "HET "
732 _field_list = [
733 ("hetID", 8, 10, "string", "rjust", None),
734 ("chainID", 13, 13, "string", "rjust", None),
735 ("seqNum", 14, 17, "integer", "rjust", None),
736 ("iCode", 18, 18, "string", "rjust", None),
737 ("numHetAtoms", 21, 25, "integer", "rjust", None),
738 ("text", 31, 70, "string", "ljust", None)]
739
740
742 """This record gives the chemical name of the compound with the
743 given hetID.
744 """
745 __slots__ = []
746
747 _name = "HETNAM"
748 _multi_record = "continuation"
749 _field_list = [
750 ("continuation", 9, 10, "integer", "ljust", None),
751 ("hetID", 12, 14, "string", "rjust", None),
752 ("text", 16, 70, "string", "ljust", None)]
753
754
756 """This record provides synonyms, if any, for the compound in the
757 corresponding (i.e., same hetID) HETNAM record. This is to allow
758 greater flexibility in searching for HET groups.
759 """
760 __slots__ = []
761
762 _name = "HETSYN"
763 _multi_record = "continuation"
764 _field_list = [
765 ("continuation", 9, 10, "integer", "ljust", None),
766 ("hetID", 12, 14, "string", "rjust", None),
767 ("hetSynonyms", 16, 70, "string", "ljust", None)]
768
769
785
786
787
789 """HELIX records are used to identify the position of helices in the
790 molecule. Helices are both named and numbered. The residues where the
791 helix begins and ends are noted, as well as the total length.
792 """
793 __slots__ = []
794
795 _name = "HELIX "
796 _field_list = [
797 ("serNum", 8, 10, "integer", "rjust", None),
798 ("helixID", 12, 14, "string", "rjust", None),
799 ("initResName", 16, 18, "string", "rjust", None),
800 ("initChainID", 20, 20, "string", "rjust", None),
801 ("initSeqNum", 22, 25, "integer", "rjust", None),
802 ("initICode", 26, 26, "string", "rjust", None),
803 ("endResName", 28, 30, "string", "rjust", None),
804 ("endChainID", 32, 32, "string", "rjust", None),
805 ("endSeqNum", 34, 37, "integer", "rjust", None),
806 ("endICode", 38, 38, "string", "rjust", None),
807 ("helixClass", 39, 40, "integer", "rjust", None),
808 ("comment", 41, 70, "string", "ljust", None),
809 ("length", 72, 76, "integer", "rjust", None)]
810
811
813 """SHEET records are used to identify the position of sheets in the
814 molecule. Sheets are both named and numbered. The residues where the
815 sheet begins and ends are noted.
816 """
817 __slots__ = []
818
819 _name = "SHEET "
820 _field_list = [
821 ("strand", 8, 10, "integer", "rjust", None),
822 ("sheetID", 12, 14, "string", "rjust", None),
823 ("numStrands", 15, 16, "integer", "rjust", None),
824 ("initResName", 18, 20, "string", "rjust", None),
825 ("initChainID", 22, 22, "string", "rjust", None),
826 ("initSeqNum", 23, 26, "integer", "rjust", None),
827 ("initICode", 27, 27, "string", "rjust", None),
828 ("endResName", 29, 31, "string", "rjust", None),
829 ("endChainID", 33, 33, "string", "rjust", None),
830 ("endSeqNum", 34, 37, "integer", "rjust", None),
831 ("endICode", 38, 38, "string", "rjust", None),
832 ("sense", 39, 40, "integer", "rjust", None),
833 ("curAtom", 42, 45, "string", "rjust", None),
834 ("curResName", 46, 48, "string", "rjust", None),
835 ("curChainID", 50 ,50, "string", "rjust", None),
836 ("curResSeq", 51, 54, "integer", "rjust", None),
837 ("curICode", 55, 55, "string", "rjust", None),
838 ("prevAtom", 57, 60, "string", "rjust", None),
839 ("prevResName", 61, 63, "string", "rjust", None),
840 ("prevChainID", 65, 65, "string", "rjust", None),
841 ("prevResSeq", 66, 69, "integer", "rjust", None),
842 ("prevICode", 70, 70, "string", "rjust", None)]
843
844
845 -class TURN(PDBRecord):
846 """The TURN records identify turns and other short loop turns which
847 normally connect other secondary structure segments.
848 """
849 __slots__ = []
850
851 _name = "TURN "
852 _field_list = [
853 ("seq", 8, 10, "integer", "rjust", None),
854 ("turnID", 12, 14, "string", "rjust", None),
855 ("initResName", 16, 18, "string", "rjust", None),
856 ("initChainID", 20, 20, "string", "rjust", None),
857 ("initSeqNum", 21, 24, "integer", "rjust", None),
858 ("initICode", 25, 25, "string", "rjust", None),
859 ("endResName", 27, 29, "string", "rjust", None),
860 ("endChainID", 31, 31, "string", "rjust", None),
861 ("endSeqNum", 32, 35, "integer", "rjust", None),
862 ("endICode", 36, 36, "string", "rjust", None),
863 ("comment", 41, 70, "string", "ljust", None)]
864
865
866
868 """The SSBOND record identifies each disulfide bond in protein and
869 polypeptide structures by identifying the two residues involved in the
870 bond.
871 """
872 __slots__ = []
873
874 _name = "SSBOND"
875 _field_list = [
876 ("serNum", 8, 10, "integer", "rjust", None),
877 ("resName1", 12, 14, "string", "rjust", None),
878 ("chainID1", 16, 16, "string", "rjust", None),
879 ("seqNum1", 18, 21, "integer", "rjust", None),
880 ("iCode1", 22, 22, "string", "rjust", None),
881 ("resName2", 26, 28, "string", "rjust", None),
882 ("chainID2", 30, 30, "string", "rjust", None),
883 ("seqNum2", 32, 35, "integer", "rjust", None),
884 ("iCode2", 36, 36, "string", "rjust", None),
885 ("sym1", 60, 65, "string", "rjust", None),
886 ("sym2", 67, 72, "string", "rjust", None)]
887
888
889 -class LINK(PDBRecord):
890 """The LINK records specify connectivity between residues that is not
891 implied by the primary structure. Connectivity is expressed in terms of
892 the atom names. This record supplements information given in CONECT
893 records and is provided here for convenience in searching.
894 """
895 __slots__ = []
896
897 _name = "LINK "
898 _field_list = [
899 ("name1", 13, 16, "string", "rjust", None),
900 ("altLoc1", 17, 17, "string", "rjust", None),
901 ("resName1", 18, 20, "string", "rjust", None),
902 ("chainID1", 22, 22, "string", "rjust", None),
903 ("resSeq1", 23, 26, "integer", "rjust", None),
904 ("iCode1", 27, 27, "string", "rjust", None),
905 ("name2", 43, 46, "string", "rjust", None),
906 ("altLoc2", 47, 47, "string", "rjust", None),
907 ("resName2", 48, 50, "string", "rjust", None),
908 ("chainID2", 52, 52, "string", "rjust", None),
909 ("resSeq2", 53, 56, "integer", "rjust", None),
910 ("iCode2", 57, 57, "string", "rjust", None),
911 ("sym1", 60, 65, "string", "rjust", None),
912 ("sym2", 67, 72, "string", "rjust", None)]
913
914
916 """The HYDBND records specify hydrogen bonds in the entry.
917 """
918 __slots__ = []
919
920 _name = "HYDBND"
921 _field_list = [
922 ("name1", 13, 16, "string", "rjust", None),
923 ("altLoc1", 17, 17, "string", "rjust", None),
924 ("resName1", 18, 20, "string", "rjust", None),
925 ("chainID1", 22, 22, "string", "rjust", None),
926 ("resSeq1", 23, 27, "integer", "rjust", None),
927 ("iCode1", 28, 28, "string", "rjust", None),
928 ("nameH", 30, 33, "string", "rjust", None),
929 ("altLocH", 34, 34, "string", "rjust", None),
930 ("chainH", 36, 36, "string", "rjust", None),
931 ("resSeqH", 37, 41, "integer", "rjust", None),
932 ("iCodeH", 42, 42, "string", "rjust", None),
933 ("name2", 44, 47, "string", "rjust", None),
934 ("altLoc2", 48, 48, "string", "rjust", None),
935 ("resName2", 49, 51, "string", "rjust", None),
936 ("chainID2", 53, 53, "string", "rjust", None),
937 ("resSeq2", 54, 58, "integer", "rjust", None),
938 ("iCode2", 59, 59, "string", "rjust", None),
939 ("sym1", 60, 65, "string", "rjust", None),
940 ("sym2", 67, 72, "string", "rjust", None)]
941
942
944 """The SLTBRG records specify salt bridges in the entry.
945 """
946 __slots__ = []
947
948 _name = "SLTBRG"
949 _field_list = [
950 ("name1", 13, 16, "string", "rjust", None),
951 ("altLoc1", 17, 17, "string", "rjust", None),
952 ("resName1", 18, 20, "string", "rjust", None),
953 ("chainID1", 22, 22, "string", "rjust", None),
954 ("resSeq1", 23, 26, "integer", "rjust", None),
955 ("iCode1", 27, 27, "string", "rjust", None),
956 ("name2", 43, 46, "string", "rjust", None),
957 ("altLoc2", 47, 47, "string", "rjust", None),
958 ("resName2", 48, 50, "string", "rjust", None),
959 ("chainID2", 52, 52, "string", "rjust", None),
960 ("resSeq2", 53, 56, "integer", "rjust", None),
961 ("iCode2", 57, 57, "string", "rjust", None),
962 ("sym1", 60, 65, "string", "rjust", None),
963 ("sym2", 67, 72, "string", "rjust", None)]
964
965
967 """CISPEP records specify the prolines and other peptides found to be
968 in the cis conformation. This record replaces the use of footnote records
969 to list cis peptides.
970 """
971 __slots__ = []
972
973 _name = "CISPEP"
974 _field_list = [
975 ("serial", 8, 10, "integer", "rjust", None),
976 ("resName1", 12, 14, "string", "rjust", None),
977 ("chainID1", 16, 16, "string", "rjust", None),
978 ("seqNum1", 18, 21, "integer", "rjust", None),
979 ("iCode1", 22, 22, "string", "rjust", None),
980 ("resName2", 26, 28, "string", "rjust", None),
981 ("chainID2", 30, 30, "string", "rjust", None),
982 ("seqNum2", 32, 35, "integer", "rjust", None),
983 ("iCode2", 36, 36, "string", "rjust", None),
984 ("modNum", 44, 46, "integer", "rjust", None),
985 ("measure", 54, 59, "float.2", "rjust", None)]
986
987
988
989 -class SITE(PDBRecord):
990 """The SITE records supply the identification of groups comprising
991 important sites in the macromolecule.
992 """
993 __slots__ = []
994
995 _name = "SITE "
996 _field_list = [
997 ("seqNum", 8, 10, "integer", "rjust", None),
998 ("siteID", 12, 14, "string", "rjust", None),
999 ("numRes", 16, 17, "integer", "rjust", None),
1000 ("resName1", 19, 21, "string", "rjust", None),
1001 ("chainID1", 23, 23, "string", "rjust", None),
1002 ("seq1", 24, 27, "integer", "rjust", None),
1003 ("iCode1", 28, 28, "string", "rjust", None),
1004 ("resName2", 30, 32, "string", "rjust", None),
1005 ("chainID2", 34, 34, "string", "rjust", None),
1006 ("seq2", 35, 38, "integer", "rjust", None),
1007 ("iCode2", 39, 39, "string", "rjust", None),
1008 ("resName3", 41, 43, "string", "rjust", None),
1009 ("chainID3", 45, 45, "string", "rjust", None),
1010 ("seq3", 46, 49, "integer", "rjust", None),
1011 ("iCode3", 50, 50, "string", "rjust", None),
1012 ("resName4", 52, 54, "string", "rjust", None),
1013 ("chainID4", 56, 56, "string", "rjust", None),
1014 ("seq4", 57, 60, "integer", "rjust", None),
1015 ("iCode4", 61, 61, "string", "rjust", None)]
1016
1017
1018
1020 """The CRYSTn (n=1,2,3) record presents the unit cell parameters, space
1021 group, and Z value. If the structure was not determined by crystallographic
1022 means, CRYSTn simply defines a unit cube.
1023 """
1024 __slots__ = []
1025
1026 _field_list = [
1027 ("a", 7, 15, "float.3", "rjust", None),
1028 ("b", 16, 24, "float.3", "rjust", None),
1029 ("c", 25, 33, "float.3", "rjust", None),
1030 ("alpha", 34, 40, "float.3", "rjust", None),
1031 ("beta", 41, 47, "float.3", "rjust", None),
1032 ("gamma", 48, 54, "float.3", "rjust", None),
1033 ("sgroup", 56, 66, "string", "ljust", None),
1034 ("z", 67, 70, "integer", "ljust", None)]
1035
1036
1038 __slots__ = []
1039
1040 _name = "CRYST1"
1041
1042
1044 __slots__ = []
1045
1046 _name = "CRYST2"
1047
1048
1050 __slots__ = []
1051
1052 _name = "CRYST3"
1053
1054
1056 """The ORIGXn (n = 1, 2, or 3) records present the transformation from
1057 the orthogonal coordinates contained in the entry to the submitted
1058 coordinates.
1059 """
1060 __slots__ = []
1061
1062 _field_list = [
1063 ("o[n][1]", 11, 20, "float.6", "rjust", None),
1064 ("o[n][2]", 21, 30, "float.6", "rjust", None),
1065 ("o[n][3]", 31, 40, "float.6", "rjust", None),
1066 ("t[n]", 46, 55, "float.5", "rjust", None)]
1067
1068
1070 __slots__ = []
1071
1072 _name = "ORIGX1"
1073
1074
1076 __slots__ = []
1077
1078 _name = "ORIGX2"
1079
1080
1082 __slots__ = []
1083
1084 _name = "ORIGX3"
1085
1086
1088 """The SCALEn (n = 1, 2, or 3) records present the transformation from
1089 the orthogonal coordinates as contained in the entry to fractional
1090 crystallographic coordinates. Non-standard coordinate systems should
1091 be explained in the remarks.
1092 """
1093 __slots__ = []
1094
1095 _field_list = [
1096 ("s[n][1]", 11, 20, "float.6", "rjust", None),
1097 ("s[n][2]", 21, 30, "float.6", "rjust", None),
1098 ("s[n][3]", 31, 40, "float.6", "rjust", None),
1099 ("u[n]", 46, 55, "float.5", "rjust", None)]
1100
1101
1103 __slots__ = []
1104
1105 _name = "SCALE1"
1106
1107
1109 __slots__ = []
1110
1111 _name = "SCALE2"
1112
1113
1115 __slots__ = []
1116
1117 _name = "SCALE3"
1118
1119
1121 """The MTRIXn (n = 1, 2, or 3) records present transformations expressing
1122 non-crystallographic symmetry.
1123 """
1124 __slots__ = []
1125
1126 _field_list = [
1127 ("serial", 8, 10, "integer", "rjust", None),
1128 ("s[n][1]", 11, 20, "float.6", "rjust", None),
1129 ("s[n][2]", 21, 30, "float.6", "rjust", None),
1130 ("s[n][3]", 31, 40, "float.6", "rjust", None),
1131 ("v[n]", 46, 55, "float.5", "rjust", None),
1132 ("iGiven", 60, 60, "integer", "rjust", None)]
1133
1134
1136 __slots__ = []
1137
1138 _name = "MTRIX1"
1139
1140
1142 __slots__ = []
1143
1144 _name = "MTRIX2"
1145
1146
1148 __slots__ = []
1149
1150 _name = "MTRIX3"
1151
1152
1154 """The TVECT records present the translation vector for infinite
1155 covalently connected structures.
1156 """
1157 __slots__ = []
1158
1159 _name = "TVECT "
1160 _field_list = [
1161 ("serial", 8, 10, "integer", "rjust", None),
1162 ("t[1]", 11, 20, "float.5", "rjust", None),
1163 ("t[2]", 21, 30, "float.5", "rjust", None),
1164 ("t[3]", 31, 40, "float.5", "rjust", None),
1165 ("text", 41, 70, "string", "rjust", None)]
1166
1167
1169 """This should help older applications which do not use
1170 the element field of the ATOM record, these applications
1171 used column alignment to distinguish calcium (CA) from, say,
1172 an alpha-carbon (CA)
1173 """
1174 name = rec.get("name") or ""
1175 element = rec.get("element") or ""
1176
1177 if len(element) == 2:
1178 name = name.ljust(4)[:4]
1179 else:
1180 l = len(name)
1181 if l == 0: name = "".ljust(4)
1182 elif name[0].isdigit(): name = name.ljust(4)[:4]
1183 elif l < 4: name = " " + name.ljust(3)[:3]
1184
1185 return name
1186
1187
1189 """The MODEL record specifies the model serial number when multiple
1190 structures are presented in a single coordinate entry, as is often
1191 the case with structures determined by NMR.
1192 """
1193 __slots__ = []
1194
1195 _name = "MODEL "
1196 _field_list = [
1197 ("serial", 11, 14, "integer", "rjust", None)]
1198
1199
1200 -class ATOM(PDBRecord):
1201 """The ATOM records present the atomic coordinates for standard residues.
1202 They also present the occupancy and temperature factor for each atom.
1203 Heterogen coordinates use the HETATM record type. The element symbol
1204 is always present on each ATOM record; segment identifier and charge
1205 are optional.
1206 """
1207 __slots__ = []
1208
1209 _name = "ATOM "
1210 _field_list = [
1211 ("serial", 7, 11, "integer", "rjust", None),
1212 ("name", 13, 16, "string", "ljust.rstrip", ATOM_get_name),
1213 ("altLoc", 17, 17, "string", "rjust", None),
1214 ("resName", 18, 20, "string", "rjust", None),
1215 ("chainID", 22, 22, "string", "rjust", None),
1216 ("resSeq", 23, 26, "integer", "rjust", None),
1217 ("iCode", 27, 27, "string", "rjust", None),
1218 ("x", 31, 38, "float.3", "rjust", None),
1219 ("y", 39, 46, "float.3", "rjust", None),
1220 ("z", 47, 54, "float.3", "rjust", None),
1221 ("occupancy", 55, 60, "float.2", "rjust", None),
1222 ("tempFactor", 61, 66, "float.2", "rjust", None),
1223 ("column6768", 67, 68, "string", "rjust", None),
1224 ("segID", 73, 76, "string", "rjust", None),
1225 ("element", 77, 78, "string", "rjust", None),
1226 ("charge", 79, 80, "string", "rjust", None)]
1227
1228
1230 """The ANISOU records present the anisotropic temperature factors.
1231 Columns 7 - 27 and 73 - 80 are identical to the corresponding
1232 ATOM/HETATM record.
1233 """
1234 __slots__ = []
1235
1236 _name = "ANISOU"
1237 _field_list = [
1238 ("serial", 7, 11, "integer", "rjust", None),
1239 ("name", 13, 16, "string", "ljust", ATOM_get_name),
1240 ("altLoc", 17, 17, "string", "rjust", None),
1241 ("resName", 18, 20, "string", "rjust", None),
1242 ("chainID", 22, 22, "string", "rjust", None),
1243 ("resSeq", 23, 26, "integer", "rjust", None),
1244 ("iCode", 27, 27, "string", "rjust", None),
1245 ("u[0][0]", 29, 35, "integer", "rjust", None),
1246 ("u[1][1]", 36, 42, "integer", "rjust", None),
1247 ("u[2][2]", 43, 49, "integer", "rjust", None),
1248 ("u[0][1]", 50, 56, "integer", "rjust", None),
1249 ("u[0][2]", 57, 63, "integer", "rjust", None),
1250 ("u[1][2]", 64, 70, "integer", "rjust", None),
1251 ("segID", 73, 76, "string", "rjust", None),
1252 ("element", 77, 78, "string", "rjust", None),
1253 ("charge", 79, 80, "string", "rjust", None)]
1254
1255
1257 """The HETATM records present the atomic coordinate records for atoms
1258 within "non-standard" groups. These records are used for water
1259 molecules and atoms presented in HET groups.
1260 """
1261 __slots__ = []
1262
1263 _name = "HETATM"
1264
1265
1267 """The SIGATM records present the standard deviation
1268 of atomic parameters as they appear in ATOM and HETATM records.
1269 Columns 7 - 27 and 73 - 80 are identical to the corresponding
1270 ATOM/HETATM record.
1271 """
1272 _name = "SIGATM"
1273 _field_list = [
1274 ("serial", 7, 11, "integer", "rjust", None),
1275 ("name", 13, 16, "string", "ljust", ATOM_get_name),
1276 ("altLoc", 17, 17, "string", "rjust", None),
1277 ("resName", 18, 20, "string", "rjust", None),
1278 ("chainID", 22, 22, "string", "rjust", None),
1279 ("resSeq", 23, 26, "integer", "rjust", None),
1280 ("iCode", 27, 27, "string", "rjust", None),
1281 ("sigX", 31, 38, "float.3", "rjust", None),
1282 ("sigY", 39, 46, "float.3", "rjust", None),
1283 ("sigZ", 47, 54, "float.3", "rjust", None),
1284 ("sigOccupancy", 55, 60, "float.2", "rjust", None),
1285 ("sigTempFactor", 61, 66, "float.2", "rjust", None),
1286 ("segID", 73, 76, "string", "rjust", None),
1287 ("element", 77, 78, "string", "rjust", None),
1288 ("charge", 79, 80, "string", "rjust", None)]
1289
1290
1292 """The SIGUIJ records present the standard deviations of anisotropic
1293 temperature factors scaled by a factor of 10**4 (Angstroms**2).
1294 Columns 7 - 27 and 73 - 80 are identical to the corresponding
1295 ATOM/HETATM record.
1296 """
1297 __slots__ = []
1298
1299 _name = "SIGUIJ"
1300 _field_list = [
1301 ("serial", 7, 11, "integer", "rjust", None),
1302 ("name", 13, 16, "string", "ljust", ATOM_get_name),
1303 ("altLoc", 17, 17, "string", "rjust", None),
1304 ("resName", 18, 20, "string","rjust", None),
1305 ("chainID", 22, 22, "string", "rjust", None),
1306 ("resSeq", 23, 26, "integer", "rjust", None),
1307 ("iCode", 27, 27, "string", "rjust", None),
1308 ("sig[1][1]", 29, 35, "integer", "rjust", None),
1309 ("sig[2][2]", 36, 42, "integer", "rjust", None),
1310 ("sig[3][3]", 43, 49, "integer", "rjust", None),
1311 ("sig[1][2]", 50, 56, "integer", "rjust", None),
1312 ("sig[1][3]", 57, 63, "integer", "rjust", None),
1313 ("sig[2][3]", 64, 70, "integer", "rjust", None),
1314 ("segID", 73, 76, "string", "rjust", None),
1315 ("element", 77, 78, "string", "rjust", None),
1316 ("charge", 79, 80, "string", "rjust", None)]
1317
1318
1319 -class TER(PDBRecord):
1320 """The TER record indicates the end of a list of ATOM/HETATM records
1321 for a chain.
1322 """
1323 __slots__ = []
1324
1325 _name = "TER "
1326 _field_list = [
1327 ("serial", 7, 11, "integer", "rjust", None),
1328 ("resName", 18, 20, "string", "rjust", None),
1329 ("chainID", 22, 22, "string", "rjust", None),
1330 ("resSeq", 23, 26, "integer", "rjust", None),
1331 ("iCode", 27, 27, "string", "rjust", None)]
1332
1333
1335 """The ENDMDL records are paired with MODEL records to group individual
1336 structures found in a coordinate entry.
1337 """
1338 __slots__ = []
1339
1340 _name = "ENDMDL"
1341 _field_list = []
1342
1343
1344
1346 """The CONECT records specify connectivity between atoms for which
1347 coordinates are supplied. The connectivity is described using the
1348 atom serial number as found in the entry. CONECT records are
1349 mandatory for HET groups (excluding water) and for other bonds not
1350 specified in the standard residue connectivity table which involve
1351 atoms in standard residues (see Appendix 4 for the list of standard
1352 residues). These records are generated by the PDB.
1353 """
1354 __slots__ = []
1355
1356 _name = "CONECT"
1357 _field_list = [
1358 ("serial", 7, 11, "integer", "rjust", None),
1359 ("serialBond1", 12, 16, "integer", "rjust", None),
1360 ("serialBond2", 17, 21, "integer", "rjust", None),
1361 ("serialBond3", 22, 26, "integer", "rjust", None),
1362 ("serialBond4", 27, 31, "integer", "rjust", None),
1363 ("serialHydBond1", 32, 36, "integer", "rjust", None),
1364 ("serialHydBond2", 37, 41, "integer", "rjust", None),
1365 ("serialSaltBond1", 42, 46, "integer", "rjust", None),
1366 ("serialHydBond3", 47, 51, "integer", "rjust", None),
1367 ("serialHydBond4", 52, 56, "integer", "rjust", None),
1368 ("serialSaltBond2", 57, 61, "integer", "rjust", None)]
1369
1370
1371
1373 """The MASTER record is a control record for bookkeeping. It lists the
1374 number of lines in the coordinate entry or file for selected record
1375 types.
1376 """
1377 __slots__ = []
1378
1379 _name = "MASTER"
1380 _field_list = [
1381 ("numRemark", 11, 15, "integer", "rjust", None),
1382 ("O", 16, 20, "integer", "rjust", None),
1383 ("numHet", 21, 25, "integer", "rjust", None),
1384 ("numHelix", 26, 30, "integer", "rjust", None),
1385 ("numSheet", 31, 35, "integer", "rjust", None),
1386 ("numTurn", 36, 40, "integer", "rjust", None),
1387 ("numSite", 41, 45, "integer", "rjust", None),
1388 ("numXForm", 46, 50, "integer", "rjust", None),
1389 ("numCoord", 51, 55, "integer", "rjust", None),
1390 ("numTer", 56, 60, "integer", "rjust", None),
1391 ("numConect", 61, 65, "integer", "rjust", None),
1392 ("numSeq", 66, 70, "integer", "rjust", None)]
1393
1394
1395 -class END(PDBRecord):
1396 """The END record marks the end of the PDB file.
1397 """
1398 __slots__ = []
1399
1400 _name = "END "
1401 _field_list = []
1402
1403
1404
1405 PDBRecordMap = {
1406 HEADER._name : HEADER,
1407 OBSLTE._name : OBSLTE,
1408 TITLE._name : TITLE,
1409 CAVEAT._name : CAVEAT,
1410 COMPND._name : COMPND,
1411 SOURCE._name : SOURCE,
1412 KEYWDS._name : KEYWDS,
1413 EXPDTA._name : EXPDTA,
1414 AUTHOR._name : AUTHOR,
1415 REVDAT._name : REVDAT,
1416 SPRSDE._name : SPRSDE,
1417 JRNL._name : JRNL,
1418 REMARK._name : REMARK,
1419 DBREF._name : DBREF,
1420 SEQADV._name : SEQADV,
1421 SEQRES._name : SEQRES,
1422 MODRES._name : MODRES,
1423 HET._name : HET,
1424 HETNAM._name : HETNAM,
1425 HETSYN._name : HETSYN,
1426 FORMUL._name : FORMUL,
1427 HELIX._name : HELIX,
1428 SHEET._name : SHEET,
1429 TURN._name : TURN,
1430 SSBOND._name : SSBOND,
1431 LINK._name : LINK,
1432 HYDBND._name : HYDBND,
1433 SLTBRG._name : SLTBRG,
1434 CISPEP._name : CISPEP,
1435 SITE._name : SITE,
1436 CRYST1._name : CRYST1,
1437 CRYST2._name : CRYST2,
1438 CRYST3._name : CRYST3,
1439 ORIGX1._name : ORIGX1,
1440 ORIGX2._name : ORIGX2,
1441 ORIGX3._name : ORIGX3,
1442 SCALE1._name : SCALE1,
1443 SCALE2._name : SCALE2,
1444 SCALE3._name : SCALE3,
1445 MTRIX1._name : MTRIX1,
1446 MTRIX2._name : MTRIX2,
1447 MTRIX3._name : MTRIX3,
1448 MODEL._name : MODEL,
1449 ATOM._name : ATOM,
1450 ANISOU._name : ANISOU,
1451 HETATM._name : HETATM,
1452 SIGATM._name : SIGATM,
1453 SIGUIJ._name : SIGUIJ,
1454 TER._name : TER,
1455 ENDMDL._name : ENDMDL,
1456 CONECT._name : CONECT,
1457 MASTER._name : MASTER,
1458 END._name : END }
1459
1460
1461
1462 PDBRecordOrder = [
1463 (HEADER._name, HEADER, "mandatory"),
1464 (OBSLTE._name, OBSLTE, "optional"),
1465 (TITLE._name, TITLE, "mandatory"),
1466 (CAVEAT._name, CAVEAT, "optional"),
1467 (COMPND._name, COMPND, "mandatory"),
1468 (SOURCE._name, SOURCE, "mandatory"),
1469 (KEYWDS._name, KEYWDS, "mandatory"),
1470 (EXPDTA._name, EXPDTA, "mandatory"),
1471 (AUTHOR._name, AUTHOR, "mandatory"),
1472 (REVDAT._name, REVDAT, "mandatory"),
1473 (SPRSDE._name, SPRSDE, "optional"),
1474 (JRNL._name, JRNL, "optional"),
1475 (REMARK._name, REMARK, "optional"),
1476 (DBREF._name, DBREF, "optional"),
1477 (SEQADV._name, SEQADV, "optional"),
1478 (SEQRES._name, SEQRES, "optional"),
1479 (MODRES._name, MODRES, "optional"),
1480 (HET._name, HET, "optional"),
1481 (HETNAM._name, HETNAM, "optional"),
1482 (HETSYN._name, HETSYN, "optional"),
1483 (FORMUL._name, FORMUL, "optional"),
1484 (HELIX._name, HELIX, "optional"),
1485 (SHEET._name, SHEET, "optional"),
1486 (TURN._name, TURN, "optional"),
1487 (SSBOND._name, SSBOND, "optional"),
1488 (LINK._name, LINK, "optional"),
1489 (HYDBND._name, HYDBND, "optional"),
1490 (SLTBRG._name, SLTBRG, "optional"),
1491 (CISPEP._name, CISPEP, "optional"),
1492 (SITE._name, SITE, "optional"),
1493 (CRYST1._name, CRYST1, "mandatory"),
1494 (ORIGX1._name, ORIGX1, "mandatory"),
1495 (ORIGX2._name, ORIGX2, "mandatory"),
1496 (ORIGX3._name, ORIGX3, "mandatory"),
1497 (SCALE1._name, SCALE1, "mandatory"),
1498 (SCALE2._name, SCALE2, "mandatory"),
1499 (SCALE3._name, SCALE3, "mandatory"),
1500 (MTRIX1._name, MTRIX1, "optional"),
1501 (MTRIX2._name, MTRIX2, "optional"),
1502 (MTRIX3._name, MTRIX3, "optional"),
1503 (TVECT._name, TVECT, "optional"),
1504 (MODEL._name, MODEL, "optional"),
1505 (ATOM._name, ATOM, "optional"),
1506 (SIGATM._name, SIGATM, "optional"),
1507 (ANISOU._name, ANISOU, "optional"),
1508 (SIGUIJ._name, SIGUIJ, "optional"),
1509 (TER._name, TER, "optional"),
1510 (HETATM._name, HETATM, "optional"),
1511 (ENDMDL._name, ENDMDL, "optional"),
1512 (CONECT._name, CONECT, "optional"),
1513 (MASTER._name, MASTER, "mandatory"),
1514 (END._name, END, "mandatory")
1515 ]
1516
1517
1518
1519
1520
1522 """Reads a sequence of PDB lines from iterable sequence and converts
1523 them to the correct PDB record objects, then yields them.
1524 """
1525 iterable = iter(iterable)
1526 for ln in iterable:
1527
1528 ln = ln.rstrip()
1529 rname = ln[:6].ljust(6)
1530
1531 try:
1532 pdb_record_class = PDBRecordMap[rname]
1533 except KeyError:
1534 continue
1535
1536
1537 pdb_record = pdb_record_class()
1538 pdb_record.read(ln)
1539 yield pdb_record
1540
1541
1543 """Class for managing a PDB file. This class inherits from a Python
1544 list object, and contains a list of PDBRecord objects.
1545 Load, save, edit, and create PDB files with this class.
1546 """
1550
1554
1558
1560 """Loads a PDB file from File object fil.
1561 """
1562 if isinstance(fil, str):
1563 fileobj = open(fil, "r")
1564 else:
1565 fileobj = fil
1566
1567 fileiter = iter(fileobj)
1568 for pdb_record in iter_pdb_records(fileiter):
1569 self.append(pdb_record)
1570
1572 """Saves the PDBFile object in PDB file format to File object fil.
1573 """
1574 if isinstance(fil, str):
1575 fileobj = open(fil, "w")
1576 else:
1577 fileobj = fil
1578
1579 for pdb_record in self:
1580 fileobj.write(str(pdb_record))
1581 fileobj.write("\n")
1582
1583 fil.flush()
1584
1585
1587 """
1588 """
1590 """Returns True if the current record looks like it is the successive
1591 PDB record in a list of records. Fields like continuation and serNum
1592 are checked, as well as record name.
1593 """
1594
1595 if rec._name != prev_rec._name:
1596 return False
1597
1598
1599
1600
1601
1602
1603 if prev_rec.has_key("continuation") or rec.has_key("continuation"):
1604 prev_continuation = prev_rec.get("continuation", 1)
1605 continuation = rec.get("continuation", 1)
1606
1607 if (prev_continuation + 1) == continuation:
1608 return True
1609 else:
1610 return False
1611
1612
1613 if prev_rec.has_key("serNum") or rec.has_key("serNum"):
1614 prev_serial = prev_rec.get("serNum", 0)
1615 serial = rec.get("serNum", 0)
1616
1617 if (prev_serial + 1) == serial:
1618 return True
1619 else:
1620 return False
1621
1622 return False
1623
1625 """Invake callbacks expecting a list of related PDB records.
1626 """
1627 rec = record_list[0]
1628
1629
1630 name = rec.__class__.__name__
1631 raw_process_method_symbol = "process_%s" % (name)
1632 process_method_symbol = "preprocess_%s" % (name)
1633
1634
1635 if hasattr(self, raw_process_method_symbol):
1636 getattr(self, raw_process_method_symbol)(record_list)
1637 else:
1638 self.process_default(record_list)
1639
1640
1641 if hasattr(rec, "process"):
1642 presult = getattr(rec, "process")(record_list)
1643 if hasattr(self, process_method_symbol):
1644 getattr(self, process_method_symbol)(presult)
1645 else:
1646 self.preprocess_default(presult)
1647
1649 """Invoke callbacks on self.processor for the given record list (recs).
1650 """
1651
1652 name = rec.__class__.__name__
1653 raw_process_method_symbol = "process_%s" % (name)
1654 process_method_symbol = "preprocess_%s" % (name)
1655
1656
1657 if hasattr(self, raw_process_method_symbol):
1658 getattr(self, raw_process_method_symbol)(rec)
1659 else:
1660 self.process_default(rec)
1661
1662
1663 if hasattr(rec, "process"):
1664 presult = getattr(rec, "process")(rec)
1665 if hasattr(self, process_method_symbol):
1666 getattr(self, process_method_symbol)(presult)
1667 else:
1668 self.preprocess_default(presult)
1669
1671 """Iterates the PDB records in self, and searches for handling
1672 methods in the processor object for reading the objects. There
1673 are several choices for methods names for the processor objects.
1674 """
1675 record_list = None
1676 prev_rec = None
1677
1678 for rec in pdb_rec_iter:
1679 if prev_rec is not None:
1680 if self.__is_sucsessive_record(prev_rec, rec):
1681 record_list.append(rec)
1682 prev_rec = rec
1683 continue
1684
1685 self.__call_processor_multi(record_list)
1686 record_list = None
1687 prev_rec = None
1688
1689 if filter_func and filter_func(rec) is False:
1690 continue
1691
1692 if isinstance(rec, ATOM):
1693 self.process_ATOM(rec)
1694 elif hasattr(rec, "_multi_record"):
1695 record_list = [rec]
1696 prev_rec = rec
1697 else:
1698 self.__call_processor(rec)
1699
1700 if prev_rec:
1701 self.__call_processor_multi(record_list)
1702
1705
1708
1711
1712
1713
1715 import sys
1716 try:
1717 path = sys.argv[1]
1718 except IndexError:
1719 print "usage: PDB.py <PDB file path>"
1720 raise SystemExit
1721 pdbfil = PDBFile()
1722 pdbfil.load_file(path)
1723 pdbfil.save_file(sys.stdout)
1724
1725 if __name__ == "__main__":
1726 test_module()
1727
1728