Package mmLib :: Module PDBBuilder
[hide private]
[frames] | no frames]

Source Code for Module mmLib.PDBBuilder

   1  ## Copyright 2002-2010 by PyMMLib Development Group (see AUTHORS file) 
   2  ## This code is part of the PyMMLib distribution and governed by 
   3  ## its license.  Please see the LICENSE file that should have been 
   4  ## included as part of this package. 
   5  """Convert a Structure object to its PDBFile description. 
   6  """ 
   7  import ConsoleOutput 
   8  import Library 
   9  import PDB 
  10  import mmCIF 
  11  import Structure 
  12  import StructureBuilder 
  13   
  14   
  15  ## class specification for alpha helices mapping mmLib classification 
  16  ## strings with PDB helix class integers, -1 where no PDB helix class 
  17  ## would apply; here is the PDB helix class description 
  18  ## 
  19  ##     TYPE OF HELIX             CLASS NUMBER (COLUMNS 39 - 40) 
  20  ##     -------------------------------------------------------------- 
  21  ##     Right-handed alpha (default)                1 
  22  ##     Right-handed omega                          2 
  23  ##     Right-handed pi                             3 
  24  ##     Right-handed gamma                          4 
  25  ##     Right-handed 310                            5 
  26  ##     Left-handed alpha                           6 
  27  ##     Left-handed omega                           7 
  28  ##     Left-handed gamma                           8 
  29  ##     27 ribbon/helix                             9 
  30  ##     Polyproline                                10 
  31  ## 
  32   
  33  HELIX_CLASS_LIST = [ 
  34      ## protein helix classes 
  35      ("HELIX_P",      -1), 
  36      ("HELX_OT_P",    -1), 
  37      ("HELX_RH_P",    -1), 
  38      ("HELX_RH_OT_P", -1), 
  39   
  40      ("HELX_RH_AL_P",  1), 
  41      ("HELX_RH_GA_P",  4), 
  42      ("HELX_RH_OM_P",  2), 
  43      ("HELX_RH_PI_P",  3), 
  44      ("HELX_RH_27_P",  9), 
  45      ("HELX_RH_3T_P",  5), 
  46      ("HELX_RH_PP_P", 10), 
  47       
  48      ("HELX_LH_P",    -1), 
  49      ("HELX_LH_OT_P", -1), 
  50   
  51      ("HELX_LH_AL_P",  6), 
  52      ("HELX_LH_GA_P",  8), 
  53      ("HELX_LH_OM_P",  7), 
  54      ("HELX_LH_PI_P", -1), 
  55      ("HELX_LH_27_P",  9), 
  56      ("HELX_LH_3T_P", -1), 
  57      ("HELX_LH_PP_P", 10), 
  58   
  59      ## nucleic acid helix classes 
  60      ("HELX_N",       -1), 
  61      ("HELX_OT_N",    -1), 
  62      ("HELX_RH_N",    -1), 
  63      ("HELX_RH_OT_N", -1), 
  64      ("HELX_RH_A_N",  -1), 
  65      ("HELX_RH_B_N",  -1), 
  66      ("HELX_RH_Z_N",  -1), 
  67      ("HELX_LH_N",    -1), 
  68      ("HELX_LH_OT_N", -1), 
  69      ("HELX_LH_A_N",  -1), 
  70      ("HELX_LH_B_N",  -1), 
  71      ("HELX_LH_Z_N",  -1) 
  72      ] 
  73   
  74   
75 -def setmap(smap, skey, dmap, dkey):
76 """Sets the dmap/dkey with the value from smap/skey/ 77 """ 78 if smap.has_key(skey): 79 dmap[dkey] = str(smap[skey]) 80 return True 81 return False
82 83
84 -def setmaps(smap, skey, dmap, dkey):
85 """Sets the dmap/dkey with the string value from smap/skey/ 86 """ 87 if smap.has_key(skey): 88 try: 89 dmap[dkey] = str(smap[skey]) 90 except ValueError: 91 print "setmaps(): ValueError" 92 return False 93 return True 94 return False
95 96
97 -def setmapi(smap, skey, dmap, dkey):
98 """Sets the dmap/dkey with the integer value from smap/skey. 99 """ 100 if smap.has_key(skey) and smap[skey]!="": 101 try: 102 dmap[dkey] = int(smap[skey]) 103 except ValueError: 104 print "setmapi(): ValueError" 105 return False 106 return True 107 return False
108 109
110 -def setmapf(smap, skey, dmap, dkey):
111 """Sets the dmap/dkey with the float value from smap/skey or default if 112 not smap/skey value is found. 113 """ 114 if smap.has_key(skey) and smap[skey]!="": 115 try: 116 dmap[dkey] = float(smap[skey]) 117 except ValueError: 118 print "setmapf(): ValueError dmap[%s]=smap[%s]=%s" % ( 119 dkey, skey, smap[skey]) 120 return False 121 return True 122 return False
123 124
125 -class PDBStructureBuilder(StructureBuilder.StructureBuilder, 126 PDB.RecordProcessor):
127 """Builds a new Structure object by loading a PDB file. 128 """
129 - def pdb_error(self, rec_name, text):
130 ConsoleOutput.warning("PDB::%s %s" % (rec_name, text))
131
132 - def get_fragment_id(self, rec, res_seq = "resSeq", icode = "iCode"):
133 fragment_id = None 134 if rec.has_key(res_seq): 135 fragment_id = str(rec[res_seq]) 136 if rec.has_key(icode): 137 fragment_id += rec[icode] 138 return fragment_id
139
140 - def read_start(self, fil, update_cb = None):
141 self.pdb_file = PDB.PDBFile() 142 self.pdb_file.load_file(fil)
143
144 - def load_atom(self, atm_map):
145 """Override load_atom to maintain a serial_num->atm map. 146 """ 147 atm = StructureBuilder.StructureBuilder.load_atom(self, atm_map) 148 ## map PDB atom serial number -> Atom object 149 try: 150 self.atom_serial_map[atm_map["serial"]] = atm 151 except KeyError: 152 pass
153
154 - def read_atoms(self):
155 ## map PDB atom serial numbers to the structure atom classes 156 self.atom_serial_map = {} 157 ## current atom map 158 self.atm_map = {} 159 ## current model number 160 self.model_num = None 161 162 def filter_func(rec): 163 if isinstance(rec, PDB.ATOM) or \ 164 isinstance(rec, PDB.SIGATM) or \ 165 isinstance(rec, PDB.ANISOU) or \ 166 isinstance(rec, PDB.SIGUIJ) or \ 167 isinstance(rec, PDB.TER) or \ 168 isinstance(rec, PDB.MODEL) or \ 169 isinstance(rec, PDB.ENDMDL): 170 return True 171 return False
172 173 ## process the coordinate records 174 self.process_pdb_records(self.pdb_file, filter_func) 175 176 ## load last atom read 177 if self.atm_map: 178 self.load_atom(self.atm_map) 179 180 ## cleanup 181 del self.model_num 182 del self.atm_map
183
184 - def read_metadata(self):
185 ## store extracted bond information 186 self.bond_map = {} 187 188 ## secondary structure annotation 189 self.helix_list = [] 190 self.beta_sheet_list = [] 191 self.site_list = [] 192 193 def filter_func(rec): 194 if isinstance(rec, PDB.ATOM) or \ 195 isinstance(rec, PDB.SIGATM) or \ 196 isinstance(rec, PDB.ANISOU) or \ 197 isinstance(rec, PDB.SIGUIJ) or \ 198 isinstance(rec, PDB.TER) or \ 199 isinstance(rec, PDB.MODEL) or \ 200 isinstance(rec, PDB.ENDMDL): 201 return False 202 return True
203 204 ## process the non-coordinate records 205 self.process_pdb_records(self.pdb_file, filter_func) 206 207 ## load chemical bond information 208 self.load_bonds(self.bond_map) 209 del self.bond_map 210 211 ## load secondary structure annotation 212 self.load_alpha_helicies(self.helix_list) 213 del self.helix_list 214 215 self.load_beta_sheets(self.beta_sheet_list) 216 del self.beta_sheet_list 217 218 self.load_sites(self.site_list) 219 del self.site_list 220
221 - def process_ATOM(self, rec):
222 ## load current atom since this record indicates a new atom 223 if self.atm_map: 224 self.load_atom(self.atm_map) 225 self.atm_map = {} 226 227 ## optimization 228 atm_map = self.atm_map 229 230 ## always derive element from atom name for PDB files -- they are 231 ## too messed up to use the element column 232 try: 233 name = rec["name"] 234 except KeyError: 235 atm_map["name"] = "" 236 atm_map["element"] = "" 237 else: 238 atm_map["name"] = name.strip() 239 240 res_name = rec.get("resName", "") 241 gelement = Library.library_guess_element_from_name(name, res_name) 242 if gelement != None: 243 atm_map["element"] = gelement 244 245 ## additional atom information 246 if rec.has_key("serial"): 247 atm_map["serial"] = rec["serial"] 248 249 if rec.has_key("altLoc"): 250 atm_map["alt_loc"] = rec["altLoc"] 251 252 if rec.has_key("resName"): 253 atm_map["res_name"] = rec["resName"] 254 255 if rec.has_key("chainID"): 256 atm_map["chain_id"] = rec["chainID"] 257 258 ## construct fragment_id 259 if rec.has_key("resSeq"): 260 if rec.has_key("iCode"): 261 atm_map["fragment_id"] = "%d%s" % (rec["resSeq"],rec["iCode"]) 262 else: 263 atm_map["fragment_id"] = "%d" % (rec["resSeq"]) 264 265 ## add the model number for the atom 266 if self.model_num != None: 267 atm_map["model_id"] = self.model_num 268 269 ## position 270 if rec.has_key("x"): 271 atm_map["x"] = rec["x"] 272 if rec.has_key("y"): 273 atm_map["y"] = rec["y"] 274 if rec.has_key("z"): 275 atm_map["z"] = rec["z"] 276 277 if rec.has_key("occupancy"): 278 atm_map["occupancy"] = rec["occupancy"] 279 if rec.has_key("tempFactor"): 280 atm_map["temp_factor"] = rec["tempFactor"] 281 282 ## columns 67 and 68. Can be used for anything. 283 if rec.has_key("column6768"): 284 atm_map["column6768"] = rec["column6768"]
285
286 - def process_HETATM(self, rec):
287 self.process_ATOM(rec)
288
289 - def process_SIGATM(self, rec):
290 setmapf(rec, "sigX", self.atm_map, "sig_x") 291 setmapf(rec, "sigY", self.atm_map, "sig_y") 292 setmapf(rec, "sigZ", self.atm_map, "sig_z") 293 setmapf(rec, "sigOccupancy", self.atm_map, "sig_occupancy") 294 setmapf(rec, "sigTempFactor", self.atm_map, "sig_temp_factor")
295
296 - def process_ANISOU(self, rec):
297 self.atm_map["u11"] = rec.get("u[0][0]", 0.0) / 10000.0 298 self.atm_map["u22"] = rec.get("u[1][1]", 0.0) / 10000.0 299 self.atm_map["u33"] = rec.get("u[2][2]", 0.0) / 10000.0 300 self.atm_map["u12"] = rec.get("u[0][1]", 0.0) / 10000.0 301 self.atm_map["u13"] = rec.get("u[0][2]", 0.0) / 10000.0 302 self.atm_map["u23"] = rec.get("u[1][2]", 0.0) / 10000.0
303
304 - def process_SIGUIJ(self, rec):
305 self.atm_map["sig_u11"] = rec.get("sig[1][1]", 0.0) / 10000.0 306 self.atm_map["sig_u22"] = rec.get("sig[2][2]", 0.0) / 10000.0 307 self.atm_map["sig_u33"] = rec.get("sig[3][3]", 0.0) / 10000.0 308 self.atm_map["sig_u12"] = rec.get("sig[1][2]", 0.0) / 10000.0 309 self.atm_map["sig_u13"] = rec.get("sig[1][3]", 0.0) / 10000.0 310 self.atm_map["sig_u23"] = rec.get("sig[2][3]", 0.0) / 10000.0
311
312 - def process_MODEL(self, rec):
313 self.model_num = rec.get("serial")
314
315 - def process_ENDMDL(self, rec):
316 self.model_num = None
317
318 - def process_HEADER(self, rec):
319 self.struct.header = "%s:%s:%s" % (rec.get("idCode", ""), 320 rec.get("classification", ""), 321 rec.get("depDate", "")) 322 323 if rec.get("idCode"): 324 self.load_structure_id(rec["idCode"]) 325 326 self.struct.cifdb.set_single( 327 "struct_keywords", "pdbx_keywords", rec.get("classification")) 328 self.struct.cifdb.set_single( 329 "database_pdb_rev", "date_original", rec.get("depDate")) 330 self.struct.cifdb.set_single( 331 "entry", "id", rec.get("idCode"))
332
333 - def preprocess_TITLE(self, title):
334 self.struct.title = title 335 self.struct.cifdb.set_single("struct", "title", title)
336
337 - def preprocess_COMPND(self, compnd_list):
338 entity = self.struct.cifdb.confirm_table("entity") 339 entity_keywords = self.struct.cifdb.confirm_table("entity_keywords") 340 341 for compnd in compnd_list: 342 erow = mmCIF.mmCIFRow() 343 ekrow = mmCIF.mmCIFRow() 344 345 setmaps(compnd, "MOLECULE", erow, "pdbx_description") 346 if erow: 347 entity.append(erow) 348 349 setmaps(compnd, "FRAGMENT", ekrow, "pdbx_fragment") 350 setmaps(compnd, "EC", ekrow, "pdbx_ec") 351 setmaps(compnd, "MUTATION", ekrow, "pdbx_mutation") 352 if ekrow: 353 entity_keywords.append(ekrow)
354
355 - def preprocess_SOURCE(self, source_list):
356 entity_src_nat = self.struct.cifdb.confirm_table("entity_src_nat") 357 entity_src_gen = self.struct.cifdb.confirm_table("entity_src_gen") 358 359 for source in source_list: 360 nrow = mmCIF.mmCIFRow() 361 grow = mmCIF.mmCIFRow() 362 363 setmaps(source, "FRAGMENT", 364 grow, "pdbx_gene_src_fragment") 365 setmaps(source, "ORGANISM_SCIENTIFIC", 366 grow, "pdbx_gene_src_scientific_name") 367 setmaps(source, "ORGANISM_COMMON", 368 grow, "pdbx_gene_src_common_name") 369 setmaps(source, "GENUS", 370 grow, "pdbx_gene_src_genus") 371 setmaps(source, "GENUS", 372 grow, "pdbx_gene_src_genus") 373 setmaps(source, "SPECIES", 374 grow, "pdbx_gene_src_species") 375 setmaps(source, "STRAIN", 376 grow, "pdbx_gene_src_strain") 377 setmaps(source, "VARIANT", 378 grow, "pdbx_gene_src_variant") 379 setmaps(source, "CELL_LINE", 380 grow, "pdbx_gene_src_cell_line") 381 setmaps(source, "ATCC", 382 grow, "pdbx_gene_src_atcc") 383 setmaps(source, "ORGAN", 384 grow, "pdbx_gene_src_organ") 385 setmaps(source, "TISSUE", 386 grow, "pdbx_gene_src_tissue") 387 setmaps(source, "CELL", 388 grow, "pdbx_gene_src_cell") 389 setmaps(source, "ORGANELLE", 390 grow, "pdbx_gene_src_organelle") 391 setmaps(source, "SECRETION", 392 nrow, "pdbx_secretion") 393 setmaps(source, "CELLULAR_LOCATION", 394 grow, "pdbx_gene_src_cellular_location") 395 setmaps(source, "PLASMID", 396 nrow, "pdbx_plasmid_name") 397 setmaps(source, "GENE", 398 grow, "pdbx_gene_src_gene") 399 setmaps(source, "EXPRESSION_SYSTEM", 400 grow, "pdbx_host_org_scientific_name") 401 setmaps(source, "EXPRESSION_SYSTEM_COMMON", 402 grow, "pdbx_host_org_common_name") 403 setmaps(source, "EXPRESSION_SYSTEM_GENUS", 404 grow, "pdbx_host_org_genus") 405 setmaps(source, "EXPRESSION_SYSTEM_SPECIES", 406 grow, "pdbx_host_org_species") 407 setmaps(source, "EXPRESSION_SYSTEM_STRAIN", 408 grow, "pdbx_host_org_strain") 409 setmaps(source, "EXPRESSION_SYSTEM_VARIANT", 410 grow, "pdbx_host_org_variant") 411 setmaps(source, "EXPRESSION_SYSTEM_CELL_LINE", 412 grow, "pdbx_host_org_cell_line") 413 setmaps(source, "EXPRESSION_SYSTEM_ATCC_NUMBER", 414 grow, "pdbx_host_org_atcc") 415 setmaps(source, "EXPRESSION_SYSTEM_ORGAN", 416 grow, "pdbx_host_org_organ") 417 setmaps(source, "EXPRESSION_SYSTEM_TISSUE", 418 grow, "pdbx_host_org_tissue") 419 setmaps(source, "EXPRESSION_SYSTEM_CELL", 420 grow, "pdbx_host_org_cell") 421 setmaps(source, "EXPRESSION_SYSTEM_ORGANELLE", 422 grow, "pdbx_host_org_organelle") 423 setmaps(source, "EXPRESSION_SYSTEM_CELLULAR_LOCATION", 424 grow, "pdbx_host_org_cellular_location") 425 setmaps(source, "EXPRESSION_SYSTEM_VECTOR_TYPE", 426 grow, "pdbx_host_org_vector_type") 427 setmaps(source, "EXPRESSION_SYSTEM_VECTOR", 428 grow, "pdbx_host_org_vector") 429 setmaps(source, "EXPRESSION_SYSTEM_PLASMID", 430 grow, "plasmid") 431 setmaps(source, "EXPRESSION_SYSTEM_GENE", 432 grow, "pdbx_host_org_gene") 433 setmaps(source, "OTHER_DETAILS", 434 grow, "pdbx_description") 435 436 if nrow: 437 entity_src_nat.append(nrow) 438 if grow: 439 entity_src_gen.append(grow)
440
441 - def preprocess_KEYWDS(self, keywds_list):
442 struct_keywords = self.struct.cifdb.confirm_table("struct_keywords") 443 for keywds in keywds_list: 444 struct_keywords.append(mmCIF.mmCIFRow({"text": keywds}))
445
446 - def preprocess_AUTHOR(self, author_list):
447 audit_author = self.struct.cifdb.confirm_table("audit_author") 448 for author in author_list: 449 audit_author.append(mmCIF.mmCIFRow({"name": author}))
450
451 - def preprocess_EXPDTA(self, expdta_list):
452 for technique, details in expdta_list: 453 self.struct.experimental_method = technique 454 break 455 456 exptl = self.struct.cifdb.confirm_table("exptl") 457 for (technique, details) in expdta_list: 458 row = mmCIF.mmCIFRow({"method": technique}) 459 if details: 460 row["details"] = details 461 exptl.append(row)
462
463 - def preprocess_SEQRES(self, seqres):
464 self.load_sequence(seqres)
465
466 - def process_CRYST1(self, rec):
467 ucell_map = {} 468 469 setmapf(rec, "a", ucell_map, "a") 470 setmapf(rec, "b", ucell_map, "b") 471 setmapf(rec, "c", ucell_map, "c") 472 setmapf(rec, "alpha", ucell_map, "alpha") 473 setmapf(rec, "beta", ucell_map, "beta") 474 setmapf(rec, "gamma", ucell_map, "gamma") 475 476 setmaps(rec, "sgroup", ucell_map, "space_group") 477 setmapi(rec, "z", ucell_map, "z") 478 479 self.load_unit_cell(ucell_map)
480
481 - def process_HELIX(self, rec):
482 ## the helixID field is mandatory 483 try: 484 helix_id = rec["helixID"] 485 except KeyError: 486 return 487 488 ## get the dictionary describing this helix or create it if it does 489 ## not exist 490 helix = None 491 for helix_x in self.helix_list: 492 if helix_x["helix_id"]==helix_id: 493 helix = helix_x 494 break 495 496 ## new helix dictionary 497 if helix is None: 498 helix = {"helix_id": helix_id} 499 self.helix_list.append(helix) 500 501 setmaps(rec, "initResName", helix, "res_name1") 502 setmaps(rec, "endResName", helix, "res_name2") 503 504 setmaps(rec, "initChainID", helix, "chain_id1") 505 setmaps(rec, "endChainID", helix, "chain_id2") 506 507 frag_id1 = self.get_fragment_id(rec, "initSeqNum", "initICode") 508 if frag_id1 is not None: 509 helix["frag_id1"] = frag_id1 510 511 frag_id2 = self.get_fragment_id(rec, "endSeqNum", "endICode") 512 if frag_id2 is not None: 513 helix["frag_id2"] = frag_id2 514 515 setmaps(rec, "helixClass", helix, "helix_class") 516 setmaps(rec, "comment", helix, "details")
517
518 - def process_SHEET(self, rec):
519 ## the sheetID field is mandatory 520 try: 521 sheet_id = rec["sheetID"] 522 except KeyError: 523 return 524 525 ## get the dictionary describing this sheet or create it if it does 526 ## not exist 527 sheet = None 528 for sheet_x in self.beta_sheet_list: 529 if sheet_x["sheet_id"]==sheet_id: 530 sheet = sheet_x 531 break 532 533 ## new sheet dictionary 534 if sheet is None: 535 sheet = {"sheet_id": sheet_id} 536 self.beta_sheet_list.append(sheet) 537 setmapi(rec, "numStrands", sheet, "num_strands") 538 539 ## create the dictionary for this strand 540 strand = {} 541 542 setmaps(rec, "initResName", strand, "res_name1") 543 setmaps(rec, "initChainID", strand, "chain_id1") 544 frag_id1 = self.get_fragment_id(rec, "initSeqNum", "initICode") 545 if frag_id1 is not None: 546 strand["frag_id1"] = frag_id1 547 548 setmaps(rec, "endResName", strand, "res_name2") 549 setmaps(rec, "endChainID", strand, "chain_id2") 550 frag_id2 = self.get_fragment_id(rec, "endSeqNum", "endICode") 551 if frag_id2 is not None: 552 strand["frag_id2"] = frag_id2 553 554 ## sense 555 if rec.has_key("sense"): 556 if rec["sense"]==1: 557 strand["sense"] = "parallel" 558 elif rec["sense"]==-1: 559 strand["sense"] = "anti_parallel" 560 561 ## registration with previous strand 562 setmaps(rec, "curResName", strand, "reg_res_name") 563 setmaps(rec, "curChainID", strand, "reg_chain_id") 564 reg_frag_id = self.get_fragment_id(rec, "curResSeq", "curICode") 565 if reg_frag_id is not None: 566 strand["reg_frag_id"] = reg_frag_id 567 setmaps(rec, "curAtom", strand, "reg_atom") 568 569 setmaps(rec, "prevResName", strand, "reg_prev_res_name") 570 setmaps(rec, "prevChainID", strand, "reg_prev_chain_id") 571 reg_prev_frag_id = self.get_fragment_id(rec, "prevResSeq", "prevICode") 572 if reg_prev_frag_id is not None: 573 strand["reg_prev_frag_id"] = reg_prev_frag_id 574 setmaps(rec, "prevAtom", strand, "reg_prev_atom") 575 576 ## append to the strand list 577 try: 578 sheet["strand_list"].append(strand) 579 except KeyError: 580 sheet["strand_list"] = [strand]
581
582 - def process_SITE(self, rec):
583 ## the siteID field is mandatory 584 try: 585 site_id = rec["siteID"] 586 except KeyError: 587 return 588 589 ## get the dictionary describing this site or create it if it does 590 ## not exist 591 site = None 592 for site_x in self.site_list: 593 if site_x["site_id"] == site_id: 594 site = site_x 595 break 596 597 ## new site dictionary 598 if site == None: 599 site = {"site_id": site_id} 600 self.site_list.append(site) 601 setmapi(rec, "numRes", site, "num_residues") 602 603 ## add the residue descriptions 604 for i in (1, 2, 3, 4): 605 chain_key = "chainID%d" % (i) 606 res_name = "resName%d" % (i) 607 seq_key = "seq%d" % (i) 608 icode_key = "icode%d" % (i) 609 610 ## check for mandatory fields 611 try: 612 rec[chain_key] 613 rec[seq_key] 614 except KeyError: 615 break 616 617 ## get resiude information and create dictionary 618 residue = {} 619 620 setmaps(rec, chain_key, residue, "chain_id") 621 setmaps(rec, res_name, residue, "res_name") 622 residue["frag_id"] = self.get_fragment_id( 623 rec, seq_key, icode_key) 624 625 ## add the fragment description to the site description 626 ## fragment list 627 try: 628 site["fragment_list"].append(residue) 629 except KeyError: 630 site["fragment_list"] = [residue]
631
632 - def bond_processor(self, **args):
633 """Complicated method. Required arguments are: 634 rec = PDB record 635 atm1/2 = Atom object, if you want to override the lookup 636 chain_id_field1/2: PDB field name for the chain ID 637 res_seq1/2_field: PDB field for the residue sequence num 638 icode1/2_field: PDB field for the residue insertion code 639 name1/2_field: PDB field for the atom name 640 atl_loc1/2: PDB filed name for the atom alt_loc 641 symop1/2_field: PDB field name for the atom symmetry operation 642 643 chain_id1/2: override the chain ID 644 frag_id1/2: override the fragmetn ID 645 name1/2: override the atom name 646 alt_loc1/2: override the atom alt_loc 647 """ 648 rec = args["rec"] 649 650 def get_atom(chain_id, frag_id, name, alt_loc): 651 try: 652 atm = self.struct[chain_id][frag_id][name] 653 except KeyError: 654 return None 655 except TypeError: 656 return None 657 658 if alt_loc: 659 try: 660 atm = atm[alt_loc] 661 except KeyError: 662 pass 663 664 return atm
665 666 ## get atm1 667 try: 668 atm1 = args["atm1"] 669 except KeyError: 670 chain_id1 = args.get("chain_id1") or rec.get(args["chain_id1_field"]) 671 frag_id1 = args.get("frag_id1") or self.get_fragment_id(rec, args["res_seq1_field"],args["icode1_field"]) 672 name1 = args.get("name1") or rec.get("name1_field") 673 alt_loc1 = args.get("alt_loc1") or rec.get(args["alt_loc1_field"]) 674 atm1 = get_atom(chain_id1, frag_id1, name1, alt_loc1) 675 676 ## get atm2 677 try: 678 atm2 = args["atm2"] 679 except KeyError: 680 chain_id2 = args.get("chain_id2") or rec.get(args["chain_id2_field"]) 681 frag_id2 = args.get("frag_id2") or self.get_fragment_id(rec, args["res_seq2_field"],args["icode2_field"]) 682 name2 = args.get("name2") or rec.get("name2_field") 683 alt_loc2 = args.get("alt_loc2") or rec.get(args["alt_loc2_field"]) 684 atm2 = get_atom(chain_id2, frag_id2, name2, alt_loc2) 685 686 ## unable to retrieve the atoms? 687 if not (atm1 and atm2): 688 return None 689 690 ## the bond map is keyed from the 2-tuple of the atoms involved in 691 ## the bond; they are sorted by their object ID just to have a 692 ## definite order 693 if id(atm1) < id(atm2): 694 bkey = (atm1, atm2) 695 else: 696 bkey = (atm2, atm1) 697 698 try: 699 bond = self.bond_map[bkey] 700 except KeyError: 701 bond = self.bond_map[bkey] = {} 702 703 ## set bond type 704 bond["bond_type"] = args["bond_type"] 705 706 ## symmetry operations 707 symop1 = args.get("symop1") or rec.get(args["symop1_field"]) 708 symop2 = args.get("symop2") or rec.get(args["symop2_field"]) 709 710 if symop1: 711 bond["symop1"] = symop1 712 if symop2: 713 bond["symop2"] = symop2 714 715 return bkey 716
717 - def process_SSBOND(self, rec):
718 x = self.bond_processor( 719 rec = rec, 720 721 bond_type = "disulf", 722 723 chain_id1_field = "chainID1", 724 res_seq1_field = "seqNum1", 725 icode1_field = "iCode1", 726 name1 = "SG", 727 alt_loc1_field = None, 728 symop1_field = "sym1", 729 730 chain_id2_field = "chainID2", 731 res_seq2_field = "seqNum2", 732 icode2_field = "iCode2", 733 name2 = "SG", 734 alt_loc2_field = None, 735 symop2_field = "sym2") 736 737 if not x: 738 self.pdb_error("SSBOND", "Atom not found")
739 762
763 - def process_HYDBND(self, rec):
764 ## retrieve the hydrogen atom 765 try: 766 name = rec["nameH"] 767 alt_loc = rec.get("altLocH", "") 768 chain_id = rec["chainH"] 769 frag_id = self.get_fragment_id(rec, "resSeqH", "iCodeH") 770 atmh = self.struct[chain_id][frag_id][name][alt_loc] 771 except KeyError: 772 atmh = None 773 774 x = self.bond_processor( 775 rec = rec, 776 777 bond_type = "hydrog", 778 779 chain_id1_field = "chainID1", 780 res_seq1_field = "resSeq1", 781 icode1_field = "iCode1", 782 name1_field = "name1", 783 alt_loc1_field = "altLoc1", 784 symop1_field = "sym1", 785 786 chain_id2_field = "chainID2", 787 res_seq2_field = "resSeq2", 788 icode2_field = "iCode2", 789 name2_field = "name2", 790 alt_loc2_field = "altLoc2", 791 symop2_field = "sym2") 792 793 if not x: 794 self.pdb_error("HYDBND", "Atom not found")
795
796 - def process_SLTBRG(self, rec):
797 x = self.bond_processor( 798 rec = rec, 799 800 bond_type = "saltbr", 801 802 chain_id1_field = "chainID1", 803 res_seq1_field = "resSeq1", 804 icode1_field = "iCode1", 805 name1_field = "name1", 806 alt_loc1_field = "altLoc1", 807 symop1_field = "sym1", 808 809 chain_id2_field = "chainID2", 810 res_seq2_field = "resSeq2", 811 icode2_field = "iCode2", 812 name2_field = "name2", 813 alt_loc2_field = "altLoc2", 814 symop2_field = "sym2") 815 816 if not x: 817 self.pdb_error("SLTBRG", "Atom not found")
818
819 - def process_CONECT(self, rec):
820 try: 821 serial = rec["serial"] 822 except KeyError: 823 self.pdb_error("CONECT", "missing serial field") 824 return 825 826 try: 827 atm1 = self.atom_serial_map[serial] 828 except KeyError: 829 self.pdb_error("CONECT", "incorrect serial number") 830 return 831 832 def helper_func(field_list, bond_type): 833 for field in field_list: 834 try: 835 serial2 = rec[field] 836 except KeyError: 837 continue 838 839 try: 840 atm2 = self.atom_serial_map[serial2] 841 except KeyError: 842 self.pdb_error("CONECT", "incorrect serial number") 843 continue 844 845 self.bond_processor( 846 rec = rec, 847 bond_type = bond_type, 848 atm1 = atm1, 849 atm2 = atm2, 850 symop1_field = None, 851 symop2_field = None)
852 853 helper_func( 854 ["serialBond1","serialBond2", 855 "serialBond3","serialBond4"], "covale") 856 helper_func( 857 ["serialHydBond1","serialHydBond2", 858 "serialHydBond3","serialHydBond4"], "hydrog") 859 helper_func( 860 ["serialSaltBond1","serialSaltBond2"], "saltbr") 861 862
863 -class PDBFileBuilder(object):
864 """Builds a PDBFile object from a Structure object. 865 """
866 - def __init__(self, struct, pdb_file):
867 self.struct = struct 868 self.pdb_file = pdb_file 869 870 self.atom_count = 0 871 self.atom_serial_num = 0 872 self.atom_serial_map = {} 873 874 self.add_title_section() 875 self.add_primary_structure_section() 876 self.add_heterogen_section() 877 self.add_secondary_structure_section() 878 self.add_connectivity_annotation_section() 879 self.add_miscellaneous_fatures_section() 880 self.add_crystallographic_coordinate_transformation_section() 881 self.add_coordinate_section() 882 self.add_connectivity_section() 883 self.bookkeeping_section()
884
885 - def next_serial_number(self):
886 self.atom_serial_num += 1 887 return self.atom_serial_num
888
889 - def new_atom_serial(self, atm):
890 """Gets the next available atom serial number for the given atom 891 instance, and stores a map from atm->atom_serial_num for use 892 when creating PDB records which require serial number identification 893 of the atoms. 894 """ 895 assert isinstance(atm, Structure.Atom) 896 897 try: 898 return self.atom_serial_map[atm] 899 except KeyError: 900 pass 901 atom_serial_num = self.next_serial_number() 902 self.atom_serial_map[atm] = atom_serial_num 903 return atom_serial_num
904
905 - def set_from_cifdb(self, rec, field, ctbl, ccol):
906 try: 907 rec[field] = self.struct.cifdb[ctbl][ccol] 908 except KeyError: 909 pass
910
911 - def add_title_section(self):
912 """ HEADER, TITLE, EXPDTA, AUTHOR 913 """ 914 ## add HEADER records 915 header = PDB.HEADER() 916 self.pdb_file.append(header) 917 918 header["idCode"] = self.struct.structure_id 919 self.set_from_cifdb(header, "depDate", "database_pdb_rev", "date_original") 920 self.set_from_cifdb(header, "classification", "struct_keywords", "pdbx_keywords") 921 922 ## add TITLE records 923 try: 924 struct_title = self.struct.cifdb["struct"]["title"] 925 except KeyError: 926 pass 927 else: 928 cont = 0 929 while len(struct_title): 930 stx = struct_title[:60] 931 struct_title = struct_title[60:] 932 933 title = PDB.TITLE() 934 self.pdb_file.append(title) 935 936 cont += 1 937 if cont > 1: 938 title["continuation"] = cont 939 940 title["title"] = stx 941 942 ## add EXPDTA records 943 try: 944 exptl_method = self.struct.cifdb["exptl"]["method"] 945 except KeyError: 946 pass 947 else: 948 expdta = PDB.EXPDTA() 949 self.pdb_file.append(expdta) 950 expdta["technique"] = exptl_method 951 952 ## add AUTHOR records 953 ## XXX: need to write a function to fix author names to PDB format 954 try: 955 audit_author = self.struct.cifdb["audit_author"] 956 except KeyError: 957 pass 958 else: 959 name_list = [] 960 for cif_row in audit_author: 961 try: 962 name_list.append(cif_row["name"]) 963 except KeyError: 964 pass 965 966 author = PDB.AUTHOR() 967 self.pdb_file.append(author) 968 author["authorList"] = ",".join(name_list)
969
971 """DBREF, SEQADV, SEQRES, MODRES 972 """ 973 for chain in self.struct.iter_chains(): 974 if len(chain.sequence) == 0: 975 continue 976 977 sernum = 0 978 seq_len = len(chain.sequence) 979 seq_index = 0 980 while seq_index < seq_len: 981 seqres = PDB.SEQRES() 982 self.pdb_file.append(seqres) 983 984 sernum += 1 985 seqres["serNum"] = sernum 986 seqres["chainID"] = chain.chain_id 987 seqres["numRes"] = seq_len 988 989 for field in ["resName1","resName2","resName3","resName4", 990 "resName5","resName6","resName7","resName8", 991 "resName9","resName10","resName11","resName12", 992 "resName13"]: 993 try: 994 seqres[field] = chain.sequence[seq_index] 995 except IndexError: 996 break 997 seq_index += 1
998
999 - def add_heterogen_section(self):
1000 """HET, HETNAM, HETSYN, FORMUL 1001 """ 1002 pass
1003
1005 """HELIX, SHEET, TURN 1006 PDB files do not put separate secondary structure descriptions 1007 within MODEL definitions, so you have to hope the models 1008 do not differ in secondary structure. mmLib allows separate 1009 MODELs to have different secondary structure, but one MODEL must 1010 be chosen for the PDF file, so the default Model of the Structure 1011 is used. 1012 """ 1013 1014 ## HELIX 1015 serial_num = 0 1016 for alpha_helix in self.struct.iter_alpha_helicies(): 1017 serial_num += 1 1018 1019 helix = PDB.HELIX() 1020 self.pdb_file.append(helix) 1021 1022 helix["serNum"] = serial_num 1023 helix["helixID"] = alpha_helix.helix_id 1024 helix["helixClass"] = alpha_helix.helix_class 1025 1026 helix["initResName"] = alpha_helix.res_name1 1027 helix["initChainID"] = alpha_helix.chain_id1 1028 try: 1029 helix["initSeqNum"], helix["initICode"] = Structure.fragment_id_split(alpha_helix.fragment_id1) 1030 except ValueError: 1031 pass 1032 1033 helix["endResName"] = alpha_helix.res_name2 1034 helix["endChainID"] = alpha_helix.chain_id2 1035 try: 1036 helix["endSeqNum"], helix["endICode"] = Structure.fragment_id_split(alpha_helix.fragment_id2) 1037 except ValueError: 1038 pass 1039 1040 helix["comment"] = alpha_helix.details 1041 helix["initChainID"] = alpha_helix.chain_id1 1042 helix["length"] = alpha_helix.helix_length 1043 1044 ## SHEET 1045 for beta_sheet in self.struct.iter_beta_sheets(): 1046 num_strands = len(beta_sheet.strand_list) 1047 1048 strand_num = 0 1049 for strand in beta_sheet.iter_strands(): 1050 strand_num += 1 1051 1052 sheet = PDB.SHEET() 1053 self.pdb_file.append(sheet) 1054 1055 sheet["strand"] = strand_num 1056 sheet["sheetID"] = beta_sheet.sheet_id 1057 sheet["numStrands"] = num_strands 1058 1059 sheet["initResName"] = strand.res_name1 1060 sheet["initChainID"] = strand.chain_id1 1061 try: 1062 sheet["initSeqNum"], sheet["initICode"] = Structure.fragment_id_split(strand.fragment_id1) 1063 except ValueError: 1064 pass 1065 1066 sheet["endResName"] = strand.res_name2 1067 sheet["endChainID"] = strand.chain_id2 1068 try: 1069 sheet["endSeqNum"], sheet["endICode"] = Structure.fragment_id_split(strand.fragment_id2) 1070 except ValueError: 1071 pass 1072 1073 sheet["curAtom"] = strand.reg_atom 1074 sheet["curResName"] = strand.reg_res_name 1075 sheet["curChainID"] = strand.reg_chain_id 1076 1077 try: 1078 sheet["curSeqNum"], sheet["curICode"] = Structure.fragment_id_split(strand.reg_fragment_id) 1079 except ValueError: 1080 pass 1081 1082 sheet["prevAtom"] = strand.reg_prev_atom 1083 sheet["prevResName"] = strand.reg_prev_res_name 1084 sheet["prevChainID"] = strand.reg_prev_chain_id 1085 try: 1086 sheet["prevSeqNum"],sheet["prevICode"] = Structure.fragment_id_split(strand.reg_prev_fragment_id) 1087 except ValueError: 1088 pass
1089
1091 """SSBOND, LINK, SLTBRG, CISPEP 1092 """ 1093 pass
1094
1096 """SITE 1097 """ 1098 serial_num = 0 1099 for site in self.struct.iter_sites(): 1100 num_fragments = len(site.fragment_dict_list) 1101 1102 site_pdb = None 1103 key_index = 0 1104 for frag_dict in site.fragment_dict_list: 1105 1106 if site_pdb is None or key_index==4: 1107 serial_num += 1 1108 1109 key_index = 0 1110 1111 site_pdb = PDB.SITE() 1112 self.pdb_file.append(site_pdb) 1113 1114 site_pdb["serNum"] = serial_num 1115 site_pdb["siteID"] = site.site_id 1116 site_pdb["numRes"] = num_fragments 1117 1118 chain_id = "chainID%d" % (key_index) 1119 res_name = "resName%d" % (key_index) 1120 res_seq = "seq%d" % (key_index) 1121 icode = "icode%d" % (key_index) 1122 1123 site_pdb[chain_id] = frag_dict["chain_id"] 1124 site_pdb[res_name] = frag_dict["res_name"] 1125 try: 1126 site_pdb[res_seq], site_pdb[icode] = Structure.fragment_id_split(frag_dict["frag_id"]) 1127 except KeyError: 1128 pass
1129
1131 """CRYST1, ORIGXn, SCALEn, MTRIXn, TVECT 1132 """ 1133 cryst1 = PDB.CRYST1() 1134 self.pdb_file.append(cryst1) 1135 1136 unit_cell = self.struct.unit_cell 1137 1138 cryst1["a"] = self.struct.unit_cell.a 1139 cryst1["b"] = self.struct.unit_cell.b 1140 cryst1["c"] = self.struct.unit_cell.c 1141 cryst1["alpha"] = self.struct.unit_cell.calc_alpha_deg() 1142 cryst1["beta"] = self.struct.unit_cell.calc_beta_deg() 1143 cryst1["gamma"] = self.struct.unit_cell.calc_gamma_deg() 1144 cryst1["sgroup"] = self.struct.unit_cell.space_group.pdb_name
1145
1146 - def add_coordinate_section(self):
1147 """MODEL, ATOM, SIGATM, ANISOU, SIGUIJ, TER, HETATM, ENDMDL 1148 """ 1149 if len(self.struct.model_list) > 1: 1150 ## case 1: multiple models 1151 orig_model = self.struct.default_model 1152 1153 for model in self.struct.iter_models(): 1154 self.struct.default_model = model 1155 1156 model_rec = PDB.MODEL() 1157 self.pdb_file.append(model_rec) 1158 model_rec["serial"] = model.model_id 1159 1160 self.add_atom_records() 1161 1162 endmdl = PDB.ENDMDL() 1163 self.pdb_file.append(endmdl) 1164 1165 self.struct.default_model = orig_model 1166 1167 else: 1168 ## case 2: single model 1169 self.add_atom_records()
1170
1171 - def add_connectivity_section(self):
1172 """CONECT 1173 """ 1174 pass
1175
1176 - def bookkeeping_section(self):
1177 """MASTER, END 1178 """ 1179 ## END 1180 end = PDB.END() 1181 self.pdb_file.append(end)
1182
1183 - def add_atom_records(self):
1184 """With a default model set, output all the ATOM and associated 1185 records for the model. 1186 """ 1187 ## atom records for standard groups 1188 for chain in self.struct.iter_chains(): 1189 res = None 1190 1191 for res in chain.iter_standard_residues(): 1192 for atm in res.iter_all_atoms(): 1193 self.add_ATOM("ATOM", atm) 1194 1195 ## chain termination record 1196 if res: 1197 ter_rec = PDB.TER() 1198 self.pdb_file.append(ter_rec) 1199 res_seq, icode = Structure.fragment_id_split(res.fragment_id) 1200 ter_rec["serial"] = self.next_serial_number() 1201 ter_rec["resName"] = res.res_name 1202 ter_rec["chainID"] = res.chain_id 1203 ter_rec["resSeq"] = res_seq 1204 ter_rec["iCode"] = icode 1205 1206 ## HETATM records for non-standard groups 1207 for chain in self.struct.iter_chains(): 1208 for frag in chain.iter_non_standard_residues(): 1209 for atm in frag.iter_all_atoms(): 1210 self.add_ATOM("HETATM", atm)
1211
1212 - def add_ATOM(self, rec_type, atm):
1213 """Adds ATOM/SIGATM/ANISOU/SIGUIJ/TER/HETATM 1214 """ 1215 self.atom_count += 1 1216 1217 if rec_type == "ATOM": 1218 atom_rec = PDB.ATOM() 1219 elif rec_type == "HETATM": 1220 atom_rec = PDB.HETATM() 1221 1222 self.pdb_file.append(atom_rec) 1223 1224 serial = self.new_atom_serial(atm) 1225 res_seq, icode = Structure.fragment_id_split(atm.fragment_id) 1226 1227 atom_rec["serial"] = serial 1228 atom_rec["chainID"] = atm.chain_id 1229 atom_rec["resName"] = atm.res_name 1230 atom_rec["resSeq"] = res_seq 1231 atom_rec["iCode"] = icode 1232 atom_rec["name"] = atm.name 1233 atom_rec["element"] = atm.element 1234 atom_rec["altLoc"] = atm.alt_loc 1235 1236 if atm.position is not None: 1237 if atm.position[0] is not None: 1238 atom_rec["x"] = atm.position[0] 1239 if atm.position[1] is not None: 1240 atom_rec["y"] = atm.position[1] 1241 if atm.position[2] is not None: 1242 atom_rec["z"] = atm.position[2] 1243 1244 if atm.occupancy is not None: 1245 atom_rec["occupancy"] = atm.occupancy 1246 1247 if atm.temp_factor is not None: 1248 atom_rec["tempFactor"] = atm.temp_factor 1249 1250 if atm.column6768 is not None: 1251 atom_rec["column6768"] = atm.column6768 1252 1253 if atm.charge is not None: 1254 atom_rec["charge"] = atm.charge 1255 1256 def atom_common(arec1, arec2): 1257 if arec1.has_key("serial"): 1258 arec2["serial"] = arec1["serial"] 1259 if arec1.has_key("chainID"): 1260 arec2["chainID"] = arec1["chainID"] 1261 if arec1.has_key("resName"): 1262 arec2["resName"] = arec1["resName"] 1263 if arec1.has_key("resSeq"): 1264 arec2["resSeq"] = arec1["resSeq"] 1265 if arec1.has_key("iCode"): 1266 arec2["iCode"] = arec1["iCode"] 1267 if arec1.has_key("name"): 1268 arec2["name"] = arec1["name"] 1269 if arec1.has_key("altLoc"): 1270 arec2["altLoc"] = arec1["altLoc"] 1271 if arec1.has_key("element"): 1272 arec2["element"] = arec1["element"] 1273 if arec1.has_key("charge"): 1274 arec2["charge"] = arec1["charge"]
1275 1276 if atm.sig_position is not None: 1277 sigatm_rec = PDB.SIGATM() 1278 self.pdb_file.append(sigatm_rec) 1279 atom_common(atom_rec, sigatm_rec) 1280 1281 if atm.sig_position[0] is not None: 1282 sigatm_rec["sigX"] = atm.sig_position[0] 1283 if atm.sig_position[1] is not None: 1284 sigatm_rec["sigY"] = atm.sig_position[1] 1285 if atm.sig_position[2] is not None: 1286 sigatm_rec["sigZ"] = atm.sig_position[2] 1287 if atm.sig_temp_factor is not None: 1288 sigatm_rec["sigTempFactor"] = atm.sig_temp_factor 1289 if atm.sig_occupancy is not None: 1290 sigatm_rec["sigOccupancy"] = atm.sig_occupancy 1291 1292 if atm.U is not None: 1293 anisou_rec = PDB.ANISOU() 1294 self.pdb_file.append(anisou_rec) 1295 atom_common(atom_rec, anisou_rec) 1296 1297 if atm.U[0,0] is not None: 1298 anisou_rec["u[0][0]"] = int(round(atm.U[0,0] * 10000.0)) 1299 if atm.U[1,1] is not None: 1300 anisou_rec["u[1][1]"] = int(round(atm.U[1,1] * 10000.0)) 1301 if atm.U[2,2] is not None: 1302 anisou_rec["u[2][2]"] = int(round(atm.U[2,2] * 10000.0)) 1303 if atm.U[0,1] is not None: 1304 anisou_rec["u[0][1]"] = int(round(atm.U[0,1] * 10000.0)) 1305 if atm.U[0,2] is not None: 1306 anisou_rec["u[0][2]"] = int(round(atm.U[0,2] * 10000.0)) 1307 if atm.U[1,2] is not None: 1308 anisou_rec["u[1][2]"] = int(round(atm.U[1,2] * 10000.0)) 1309 1310 if atm.sig_U is not None: 1311 siguij_rec = PDB.SIGUIJ() 1312 self.pdb_file.append(siguij_rec) 1313 atom_common(atom_rec, siguij_rec) 1314 1315 if atm.sig_U[0,0] is not None: 1316 siguij_rec["u[0][0]"] = int(round(atm.sig_U[0,0] * 10000.0)) 1317 if atm.sig_U[1,1] is not None: 1318 siguij_rec["u[1][1]"] = int(round(atm.sig_U[1,1] * 10000.0)) 1319 if atm.sig_U[2,2] is not None: 1320 siguij_rec["u[2][2]"] = int(round(atm.sig_U[2,2] * 10000.0)) 1321 if atm.sig_U[0,1] is not None: 1322 siguij_rec["u[0][1]"] = int(round(atm.sig_U[0,1] * 10000.0)) 1323 if atm.sig_U[0,2] is not None: 1324 siguij_rec["u[0][2]"] = int(round(atm.sig_U[0,2] * 10000.0)) 1325 if atm.sig_U[1,2] is not None: 1326 siguij_rec["u[1][2]"] = int(round(atm.sig_U[1,2] * 10000.0))
1327