1
2
3
4
5 """Classes for building a mmLib.Structure representation of biological
6 macromolecules.
7 """
8 import ConsoleOutput
9 import Library
10 import Structure
11 import UnitCell
12
13
15 """Base class of errors raised by Structure objects.
16 """
18 Exception.__init__(self)
19 self.message = message
20
23
24
26 """Builder class for the mmLib.Structure object hierarchy.
27 StructureBuilder must be subclassed with a working parse_format()
28 method to implement a working builder.
29 """
30 - def __init__(self,
31 sequence_from_structure = False,
32 library_bonds = False,
33 distance_bonds = False,
34 auto_sort = True,
35 **args):
36
37
38
39 if args.has_key("structure"):
40 self.struct = args["structure"]
41 elif args.has_key("struct"):
42 self.struct = args["struct"]
43 else:
44 self.struct = Structure.Structure()
45
46
47 if args.has_key("structure_id"):
48 self.struct.structure_id = args["structure_id"]
49
50
51 self.calc_sequence = sequence_from_structure
52 self.library_bonds = library_bonds
53 self.distance_bonds = distance_bonds
54 self.auto_sort = auto_sort
55
56
57 self.cache_chain = None
58 self.cache_frag = None
59
60
61 self.halt = False
62
63
64 self.read_start(args["fil"])
65
66 if not self.halt: self.read_start_finalize()
67 if not self.halt: self.read_atoms()
68 if not self.halt: self.read_atoms_finalize()
69 if not self.halt: self.read_metadata()
70 if not self.halt: self.read_metadata_finalize()
71 if not self.halt: self.read_end()
72 if not self.halt: self.read_end_finalize()
73
74
75 if self.halt == True:
76 ConsoleOutput.fatal("self.halt == True")
77
79 """This methods needs to be reimplemented in a functional subclass.
80 This function is called with the file object (or any other object
81 passed in to build a Structure from) to begin the reading process.
82 This is usually used to open the source file.
83 """
84 pass
85
87 """Called after the read_start method. Does nothing currently, but may
88 be used in the future.
89 """
90 self.name_service_list = []
91
93 """This method needs to be reimplemented in a functional subclass.
94 The subclassed read_atoms method should call load_atom once for
95 every atom in the structure, and should not call any other
96 load_* methods.
97 """
98 pass
99
101 """Called repeatedly by the implementation of read_atoms to load all
102 the data for a single atom. The data is contained in the atm_map
103 argument, and is not well documented at this point.
104 Look at this function and you'll figure it out.
105 """
106
107 atm = Structure.Atom(**atm_map)
108
109
110
111 if not atm.fragment_id or not atm.chain_id:
112 self.name_service_list.append(atm)
113 return atm
114
115 try:
116 self.struct.add_atom(atm, True)
117
118 except Structure.FragmentOverwrite:
119 ConsoleOutput.warning("FragmentOverwrite: %s" % (atm))
120 self.name_service_list.append(atm)
121
122 except Structure.AtomOverwrite, err:
123 ConsoleOutput.warning("AtomOverwrite: %s" % (err))
124 self.name_service_list.append(atm)
125
126 return atm
127
129 """Runs the name service on all atoms needing to be named. This is a
130 complicated function which corrects most commonly found errors and
131 omissions from PDB files.
132 """
133 if len(self.name_service_list) == 0:
134 return
135
136
137
138 def next_chain_id(suggest_chain_id):
139 if suggest_chain_id != "":
140 chain = self.struct.get_chain(suggest_chain_id)
141 if not chain:
142 return suggest_chain_id
143
144
145 for chain_id in "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789":
146 chain = self.struct.get_chain(chain_id)
147 if not chain:
148 return chain_id
149
150 raise StructureBuilderError("name_service exhausted new chain_ids")
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165 current_polymer_type = None
166 current_polymer_model_id = None
167 current_polymer_chain_id = None
168 current_polymer_frag_id = None
169 current_polymer_res_name = None
170 current_polymer_name_dict = None
171
172 polymer_model_dict = {}
173 current_frag = None
174 current_frag_list = None
175
176 for atm in self.name_service_list[:]:
177
178 if Library.library_is_amino_acid(atm.res_name):
179 polymer_type = "protein"
180 elif Library.library_is_nucleic_acid(atm.res_name):
181 polymer_type = "dna"
182 else:
183
184
185 current_polymer_type = None
186 current_polymer_model_id = None
187 current_polymer_chain_id = None
188 current_polymer_frag_id = None
189 current_polymer_res_name = None
190 current_polymer_name_dict = None
191 current_frag = None
192 current_frag_list = None
193 continue
194
195 fragment_id = Structure.FragmentID(atm.fragment_id)
196
197
198
199 if polymer_type!=current_polymer_type or \
200 atm.model_id!=current_polymer_model_id or \
201 atm.chain_id!=current_polymer_chain_id or \
202 fragment_id<current_polymer_frag_id:
203
204 current_polymer_type = polymer_type
205 current_polymer_model_id = atm.model_id
206 current_polymer_chain_id = atm.chain_id
207 current_polymer_frag_id = Structure.FragmentID(atm.fragment_id)
208 current_polymer_res_name = atm.res_name
209 current_polymer_name_dict = {atm.name: True}
210
211
212 current_frag = [atm]
213 current_frag_list = [current_frag]
214
215
216 try:
217 model = polymer_model_dict[atm.model_id]
218 except KeyError:
219 model = [current_frag_list]
220 polymer_model_dict[atm.model_id] = model
221 else:
222 model.append(current_frag_list)
223
224
225
226 self.name_service_list.remove(atm)
227 continue
228
229
230
231
232
233
234
235
236 if atm.res_name != current_polymer_res_name or current_polymer_name_dict.has_key(atm.name):
237 current_polymer_res_name = atm.res_name
238 current_polymer_name_dict = {atm.name: True}
239
240
241 current_frag = [atm]
242 current_frag_list.append(current_frag)
243
244
245
246 self.name_service_list.remove(atm)
247 continue
248
249
250 current_frag.append(atm)
251 self.name_service_list.remove(atm)
252
253
254 model_ids = polymer_model_dict.keys()
255 model_ids.sort()
256 model_list = [polymer_model_dict[model_id] for model_id in model_ids]
257
258 num_chains = 0
259 for frag_list in polymer_model_dict.itervalues():
260 num_chains = max(num_chains, len(frag_list))
261
262 for chain_index in xrange(num_chains):
263
264 chain_id = next_chain_id("")
265
266
267
268 for model in model_list:
269 frag_list = model[chain_index]
270
271 for frag in frag_list:
272 for atm in frag:
273 atm.chain_id = chain_id
274 self.struct.add_atom(atm, True)
275
276
277 del polymer_model_dict
278 del model_list
279
280
281
282
283
284
285
286
287
288
289
290
291 cr_dict = {}
292 cr_key_list = []
293
294 frag_id = None
295 frag = None
296 name_dict = {}
297
298
299 for atm in self.name_service_list:
300 atm_id = (atm.name, atm.alt_loc)
301 atm_frag_id = (atm.model_id, atm.chain_id, atm.fragment_id, atm.res_name)
302
303
304
305
306 if atm_frag_id==frag_id and not name_dict.has_key(atm_id):
307 frag.append(atm)
308 name_dict[atm_id] = True
309
310 else:
311 cr_key = (atm.chain_id, atm.res_name)
312
313
314 if frag:
315 msg = "name_service: fragment detected in cr=%s" % (
316 str(cr_key))
317 ConsoleOutput.debug(msg)
318 for a in frag:
319 ConsoleOutput.debug(" " + str(a))
320
321
322 try:
323 model_dict = cr_dict[cr_key]
324 except KeyError:
325 model_dict = cr_dict[cr_key] = {}
326 cr_key_list.append(cr_key)
327
328 try:
329 frag_list = model_dict[atm.model_id]
330 except KeyError:
331 frag_list = model_dict[atm.model_id] = []
332
333 name_dict = {atm_id: True}
334 frag_id = atm_frag_id
335 frag = [atm]
336 frag_list.append(frag)
337
338
339 del self.name_service_list
340
341 new_chain_id = None
342 fragment_id_num = None
343
344 for cr_key in cr_key_list:
345
346 msg = "name_service: chain_id / res_name keys\n"
347 msg += " cr_key: chain_id='%s' res_name='%s'" % (
348 cr_key[0], cr_key[1])
349 ConsoleOutput.debug(msg)
350
351
352
353
354 chain_id = next_chain_id(cr_key[0])
355
356
357
358 if chain_id != None:
359 new_chain_id = chain_id
360 fragment_id_num = 0
361
362 elif new_chain_id == None or fragment_id_num == None:
363 ConsoleOutput.fatal("name_service: unable to assign any chain ids")
364
365
366 model_dict = cr_dict[cr_key]
367
368
369
370
371
372 max_frags = -1
373 for (model, frag_list) in model_dict.iteritems():
374 frag_list_len = len(frag_list)
375
376 if max_frags == -1:
377 max_frags = frag_list_len
378 continue
379
380 if max_frags != frag_list_len:
381 strx = "name_service: model fragments not identical"
382 ConsoleOutput.debug(strx)
383 ConsoleOutput.warning(strx)
384 max_frags = max(max_frags, frag_list_len)
385
386
387
388 for i in xrange(max_frags):
389 fragment_id_num += 1
390
391 for frag_list in model_dict.itervalues():
392 try:
393 frag = frag_list[i]
394 except IndexError:
395 continue
396
397
398
399 for atm in frag:
400 atm.chain_id = new_chain_id
401 atm.fragment_id = str(fragment_id_num)
402 self.struct.add_atom(atm, True)
403
404
405 ConsoleOutput.warning("name_service(): added chain_id=%s, res_name=%s, num_residues=%d" % (
406 new_chain_id, cr_key[1], fragment_id_num))
407
409 """After loading all atom records, use the list of atom records to
410 build the structure.
411 """
412
413
414 self.name_service()
415
416
417 if self.auto_sort:
418 self.struct.sort()
419
426
428 """
429 """
430 assert isinstance(structure_id, str)
431 self.struct.structure_id = structure_id
432
434 """Called by the implementation of load_metadata to load the unit cell
435 parameters for the structure.
436 """
437 for key in ("a", "b", "c", "alpha", "beta", "gamma"):
438 if not ucell_map.has_key(key):
439 ConsoleOutput.debug("ucell_map missing: %s" % (key))
440 return
441
442 if ucell_map.has_key("space_group"):
443 self.struct.unit_cell = UnitCell.UnitCell(
444 a = ucell_map["a"],
445 b = ucell_map["b"],
446 c = ucell_map["c"],
447 alpha = ucell_map["alpha"],
448 beta = ucell_map["beta"],
449 gamma = ucell_map["gamma"],
450 space_group = ucell_map["space_group"])
451 else:
452 self.struct.unit_cell = UnitCell.UnitCell(
453 a = ucell_map["a"],
454 b = ucell_map["b"],
455 c = ucell_map["c"],
456 alpha = ucell_map["alpha"],
457 beta = ucell_map["beta"],
458 gamma = ucell_map["gamma"])
459
461 """Call by the implementation of load_metadata to load bond
462 information on the structure. The keys of the bond map are a 2-tuple
463 of the bonded Atom instances, and the value is a dictionary
464 containing information on the type of bond, which may also
465 be a symmetry operator.
466
467 [bond_map]
468 keys: (atm1, atm2)
469 values: bond_data_map(s)
470
471 [bond_data_map]
472 bond_type -> text description of bond type: covalent, salt bridge,
473 hydrogen, cispeptide
474
475 atm1_symop -> symmetry operation (if any) to be applied to atm1
476 atm2_symop -> same as above, for atom 2
477
478 The symmetry operations themselves are a 3x4 array of floating point
479 values composed of the 3x3 rotation matrix and the 3x1 translation.
480 """
481
482
483 for ((atm1, atm2), bd_map) in bond_map.iteritems():
484
485
486
487 if atm1 == atm2:
488 ConsoleOutput.warning("silly file defines self bonded atom")
489 continue
490
491 atm1.create_bonds(
492 atom = atm2,
493 bond_type = bd_map.get("bond_type"),
494 atom1_symop = bd_map.get("atm1_symop"),
495 atom2_symop = bd_map.get("atm2_symop"),
496 standard_res_bond = False)
497
499 """The sequence map contains the following keys: chain_id: the
500 chain ID fo the sequence; num_res: the number of residues in the
501 sequence; sequence_list: a list of 3-letter codes of the residues
502 in the sequence.
503 """
504 try:
505 chain_id = sequence_map["chain_id"]
506 sequence_list = sequence_map["sequence_list"]
507 except KeyError:
508 return
509
510
511
512 for model in self.struct.iter_models():
513 chain = model.get_chain(chain_id)
514 if chain:
515 chain.sequence.set_from_three_letter(sequence_list)
516
518 """The argument helix_list is a list of Python dictionaries with
519 information to build build AlphaHelix objects into the Structure.
520
521 The dictionary has attributes:
522 helix_id: The ID of the helix
523 chain_id: The chain_id where the helix is located
524 frag_id1: The start fragment_id of the helix
525 frag_id2: The end fragment_id of the helix
526 helix_class: The PDB helix class number
527 detaisl: Text commont about the helix
528 """
529 for helix in helix_list:
530
531 try:
532 helix["helix_id"]
533 helix["chain_id1"]
534 helix["frag_id1"]
535 helix["chain_id2"]
536 helix["frag_id2"]
537 except KeyError:
538 continue
539
540
541 for model in self.struct.iter_models():
542 alpha_helix = Structure.AlphaHelix(model_id=model.model_id, **helix)
543 model.add_alpha_helix(alpha_helix)
544 alpha_helix.construct_segment()
545
547 """The argument beta_sheet_list is a list of Python dictionaries with
548 information to build build BetaSheet objects into the Structure.
549
550 The dictionary has attributes:
551 sheet_id: ID of the sheet
552 num_strands: total number of strands in the beta sheet
553 strand_list: list of dictionaries describing the strand with
554 the following attributes:
555
556 chain_id1/frag_id1: chain_id and fragment_id of inital residue
557 in the strand
558 chain_id2/frag_id2: chain_id and fragment_id of end residue
559 in the strand
560 sense: the sense of the strand with respect to the
561 previous strand, either the string
562 parallel or anti_parallel
563
564 reg_chain_id, reg_frag_id, reg_atom:
565 registration atom in current strand
566 reg_prev_chain_id, reg_prev_frag_id, reg_prev_atom:
567 registration atom in previous strand
568 """
569 for sheet in beta_sheet_list:
570
571 try:
572 sheet["sheet_id"]
573 sheet["strand_list"]
574 except KeyError:
575 continue
576
577
578
579 for model in self.struct.iter_models():
580 beta_sheet = Structure.BetaSheet(model=model.model_id, **sheet)
581
582 for strand in sheet["strand_list"]:
583
584 try:
585 strand["chain_id1"]
586 strand["frag_id1"]
587 strand["frag_id1"]
588 strand["frag_id2"]
589 except KeyError:
590 continue
591
592 beta_strand = Structure.Strand(**strand)
593 beta_sheet.add_strand(beta_strand)
594
595 model.add_beta_sheet(beta_sheet)
596 beta_sheet.construct_segments()
597
599 """The argument site_list is a list of Python dictionaries with
600 information to build build Site objects into the Structure.
601 """
602 for site_desc in site_list:
603
604 try:
605 site_desc["site_id"]
606 site_desc["fragment_list"]
607 except KeyError:
608 continue
609
610 for model in self.struct.iter_models():
611 site = Structure.Site(**site_desc)
612 model.add_site(site)
613 site.construct_fragments()
614
619
621 """This method needs to be reimplemented in a functional subclass.
622 The subclassed read_end method can be used for any clean up from
623 the file loading process you need, or may be left unimplemented.
624 """
625 pass
626
647