Package mmLib :: Module Library
[hide private]
[frames] | no frames]

Source Code for Module mmLib.Library

  1  ## Copyright 2002-2010 by PyMMLib Development Group (see AUTHORS file) 
  2  ## This code is part of the PyMMLib distribution and governed by 
  3  ## its license.  Please see the LICENSE file that should have been 
  4  ## included as part of this package. 
  5  """Monomer and element library data classes.  The Library classes are used 
  6  for the identification and construction of biopolymers and ligands. 
  7  """ 
  8  import os 
  9  import sys 
 10  import types 
 11   
 12  import ConsoleOutput 
 13  import mmCIF 
 14   
 15   
 16  ############################################################################### 
 17  ## Library Data Locations 
 18  ## 
 19  (MMLIB_PATH, JUNK) = os.path.split(__file__) 
 20  DATA_PATH               = os.path.join(MMLIB_PATH, "Data") 
 21  ELEMENT_DATA_PATH       = os.path.join(MMLIB_PATH, "Data", "elements.cif") 
 22  MMLIB_MONOMER_DATA_PATH = os.path.join(MMLIB_PATH, "Data", "monomers.cif") 
 23  RCSB_MONOMER_DATA_FILE  = os.path.join(MMLIB_PATH, "Data", "Monomers.zip")  
 24  RCSB_MONOMER_DATA_PATH  = os.path.join(MMLIB_PATH, "Data", "Monomers")  
 25   
 26  ############################################################################### 
 27  ## Caches 
 28  ## 
 29  ELEMENT_CACHE          = {} 
 30  MONOMER_RES_NAME_CACHE = {} 
 31   
 32  ELEMENT_CIF_FILE = mmCIF.mmCIFFile() 
 33  ELEMENT_CIF_FILE.load_file(open(ELEMENT_DATA_PATH, "r")) 
 34   
 35  MMLIB_MONOMERS_CIF = mmCIF.mmCIFFile() 
 36  MMLIB_MONOMERS_CIF.load_file(open(MMLIB_MONOMER_DATA_PATH, "r")) 
 37   
 38  RCSB_USE_ZIP = None 
 39  RCSB_ZIP = None 
 40   
 41  ############################################################################### 
 42  ## Constants 
 43  ## 
 44  ELEMENT_SYMBOL_DICT = { 
 45      "H" : True, "h" : True, 
 46      "He": True, "he": True, "HE": True, 
 47      "Li": True, "li": True, "LI": True, 
 48      "Be": True, "be": True, "BE": True, 
 49      "B" : True, "b" : True, 
 50      "C" : True, "c" : True, 
 51      "N" : True, "n" : True, 
 52      "O" : True, "o" : True, 
 53      "F" : True, "f" : True, 
 54      "Ne": True, "ne": True, "NE": True, 
 55      "Na": True, "na": True, "NA": True, 
 56      "Mg": True, "mg": True, "MG": True, 
 57      "Al": True, "al": True, "AL": True, 
 58      "Si": True, "si": True, "SI": True, 
 59      "P" : True, "p" : True, 
 60      "S" : True, "s" : True, 
 61      "Cl": True, "cl": True, "CL": True, 
 62      "Ar": True, "ar": True, "AR": True, 
 63      "K" : True, "k" : True, 
 64      "Ca": True, "ca": True, "CA": True, 
 65      "Sc": True, "sc": True, "SC": True, 
 66      "Ti": True, "ti": True, "TI": True, 
 67      "V" : True, "v" : True, 
 68      "Cr": True, "cr": True, "CR": True, 
 69      "Mn": True, "mn": True, "MN": True, 
 70      "Fe": True, "fe": True, "FE": True, 
 71      "Co": True, "co": True, "CO": True, 
 72      "Ni": True, "ni": True, "NI": True, 
 73      "Cu": True, "cu": True, "CU": True, 
 74      "Zn": True, "zn": True, "ZN": True, 
 75      "Ga": True, "ga": True, "GA": True, 
 76      "Ge": True, "ge": True, "GE": True, 
 77      "As": True, "as": True, "AS": True, 
 78      "Se": True, "se": True, "SE": True, 
 79      "Br": True, "br": True, "BR": True, 
 80      "Kr": True, "kr": True, "KR": True, 
 81      "Rb": True, "rb": True, "RB": True, 
 82      "Sr": True, "sr": True, "SR": True, 
 83      "Y" : True, "y" : True, 
 84      "Zr": True, "zr": True, "ZR": True, 
 85      "Nb": True, "nb": True, "NB": True, 
 86      "Mo": True, "mo": True, "MO": True, 
 87      "Tc": True, "tc": True, "TC": True, 
 88      "Ru": True, "ru": True, "RU": True, 
 89      "Rh": True, "rh": True, "RH": True, 
 90      "Pd": True, "pd": True, "PD": True, 
 91      "Ag": True, "ag": True, "AG": True, 
 92      "Cd": True, "cd": True, "CD": True, 
 93      "In": True, "in": True, "IN": True, 
 94      "Sn": True, "sn": True, "SN": True, 
 95      "Sb": True, "sb": True, "SB": True, 
 96      "Te": True, "te": True, "TE": True, 
 97      "I" : True, "i" : True, 
 98      "Xe": True, "xe": True, "XE": True, 
 99      "Cs": True, "cs": True, "CS": True, 
100      "Ba": True, "ba": True, "BA": True, 
101      "La": True, "la": True, "LA": True, 
102      "Ce": True, "ce": True, "CE": True, 
103      "Pr": True, "pr": True, "PR": True, 
104      "Nd": True, "nd": True, "ND": True, 
105      "Pm": True, "pm": True, "PM": True, 
106      "Sm": True, "sm": True, "SM": True, 
107      "Eu": True, "eu": True, "EU": True, 
108      "Gd": True, "gd": True, "GD": True, 
109      "Tb": True, "tb": True, "TB": True, 
110      "Dy": True, "dy": True, "DY": True, 
111      "Ho": True, "ho": True, "HO": True, 
112      "Er": True, "er": True, "ER": True, 
113      "Tm": True, "tm": True, "TM": True, 
114      "Yb": True, "yb": True, "YB": True, 
115      "Lu": True, "lu": True, "LU": True, 
116      "Hf": True, "hf": True, "HF": True, 
117      "Ta": True, "ta": True, "TA": True, 
118      "W" : True, "w" : True, 
119      "Re": True, "re": True, "RE": True, 
120      "Os": True, "os": True, "OS": True, 
121      "Ir": True, "ir": True, "IR": True, 
122      "Pt": True, "pt": True, "PT": True, 
123      "Au": True, "au": True, "AU": True, 
124      "Hg": True, "hg": True, "HG": True, 
125      "Tl": True, "tl": True, "TL": True, 
126      "Pb": True, "pb": True, "PB": True, 
127      "Bi": True, "bi": True, "BI": True, 
128      "Po": True, "po": True, "PO": True, 
129      "At": True, "at": True, "AT": True, 
130      "Rn": True, "rn": True, "RN": True, 
131      "Fr": True, "fr": True, "FR": True, 
132      "Ra": True, "ra": True, "RA": True, 
133      "Ac": True, "ac": True, "AC": True, 
134      "Th": True, "th": True, "TH": True, 
135      "Pa": True, "pa": True, "PA": True, 
136      "U" : True, "u" : True } 
137   
138  AMINO_ACID3_LIST = [ 
139      "GLY", "ALA", "VAL", "LEU", "ILE", "PRO", "PHE", "TYR", "TRP", 
140      "MET", "CYS", "SER", "THR", "ASP", "GLU", "HIS", "LYS", "ARG", 
141      "ASN", "GLN" 
142      ] 
143   
144  AMINO_ACID31_DICT = { 
145      "GLY":"G", "ALA":"A", "VAL":"V", "LEU":"L", "ILE":"I", "PRO":"P", 
146      "PHE":"F", "TYR":"Y", "TRP":"W", "MET":"M", "CYS":"C", "SER":"S", 
147      "THR":"T", "ASP":"D", "GLU":"E", "HIS":"H", "LYS":"K", "ARG":"R", 
148      "ASN":"N", "GLN":"Q" 
149      } 
150   
151  AMINO_ACID13_DICT = { 
152      'A': 'ALA', 'C': 'CYS', 'E': 'GLU', 'D': 'ASP', 'G': 'GLY', 
153      'F': 'PHE', 'I': 'ILE', 'H': 'HIS', 'K': 'LYS', 'M': 'MET', 
154      'L': 'LEU', 'N': 'ASN', 'Q': 'GLN', 'P': 'PRO', 'S': 'SER', 
155      'R': 'ARG', 'T': 'THR', 'W': 'TRP', 'V': 'VAL', 'Y': 'TYR'} 
156   
157  NUCLEIC_ACID_LIST = ["A", "G", "C", "T", "U"] 
158   
159  NUCLEIC_ACID_RES_NAME_DICT = { 
160      "C": "C", "C+": "C", "Cr": "C", "+C": "C", 
161      "G": "G", "G+": "G", "Gr": "G", "+G": "G", 
162      "A": "A", "A+": "A", "Ar": "A", "+A": "A", 
163      "T": "T", "T+": "T", "Tr": "T", "+T": "T", 
164      "U": "U", "U+": "U", "Ur": "U", "+U": "U", 
165      } 
166   
167  ## Add alternate residue monomer names here: 
168  ALT_RES_NAME_DICT = { 
169      "C+": "C", "Cr": "C", "+C": "C", 
170      "G+": "G", "Gr": "G", "+G": "G", 
171      "A+": "A", "Ar": "A", "+A": "A", 
172      "T+": "T", "Tr": "T", "+T": "T", 
173      "U+": "U", "Ur": "U", "+U": "U", 
174      "Ad": "A", "Td": "T", "Gd": "G", "Cd": "C", 
175      } 
176   
177  ############################################################################### 
178  ## Library Description Objects 
179  ## 
180   
181 -class ElementDesc(object):
182 """Element description class returned by library_get_element_desc(). 183 """
184 - def __init__(self):
185 self.cif_data = None 186 self.name = None 187 self.symbol = None 188 self.group = None 189 self.period = None 190 self.atomic_number = None 191 self.atomic_weight = None 192 self.atomic_radius = None 193 self.covalent_radius = None 194 self.van_der_waals_radius = None 195 self.covalent_radius = None 196 self.electronegativity = None 197 self.color_rgbf = None
198 199
200 -class MonomerDesc(object):
201 """Monomer description class returned by library_get_monomer_desc(). 202 """
203 - def __init__(self):
204 self.res_name = None 205 self.full_name = None 206 self.one_letter_code = None 207 self.type = None 208 self.pdbx_type = None 209 self.formula = None 210 self.rcsb_class_1 = None 211 self.chem_type = None 212 self.atom_list = [] 213 self.atom_dict = {} 214 self.alt_atom_dict = {} 215 self.bond_list = [] 216 self.torsion_angle_dict = {} 217 218 self.amino_acid = False 219 self.nucleic_acid = False 220 self.water = False
221
222 - def is_amino_acid(self):
223 """Returns True if the Monomer is an amino acid, otherwise returns 224 False. 225 """ 226 return self.amino_acid
227
228 - def is_nucleic_acid(self):
229 """Returns True if the Monomer is a nucleic acid, otherwise returns 230 False. 231 """ 232 return self.nucleic_acid
233
234 - def is_standard_residue(self):
235 """ 236 """ 237 return self.amino_acid or self.nucleic_acid
238
239 - def is_non_standard_residue(self):
240 """ 241 """ 242 return not self.amino_acid and not self.nucleic_acid
243
244 - def is_water(self):
245 """Returns True if the Monomer is a water molecule, 246 otherwise returns False. 247 """ 248 return self.water
249 250 251 ############################################################################### 252 ## Library API 253 ## 254
255 -def library_construct_element_desc(symbol):
256 """Constructs the ElementDesc object for the given element symbol. 257 """ 258 cif_data = ELEMENT_CIF_FILE.get_data(symbol) 259 if cif_data is None: 260 ConsoleOutput.warning("element description not found for %s" % (symbol)) 261 return None 262 263 ## create element description 264 element_desc = ElementDesc() 265 266 element_desc.cif_data = cif_data 267 268 element = cif_data.get_table("element") 269 element_desc.name = element["name"] 270 element_desc.symbol = element["symbol"] 271 element_desc.number = int(element["number"]) 272 element_desc.atomic_weight = float(element["atomic_weight"]) 273 element_desc.vdw_radius = float(element["van_der_walls_radius"]) 274 element_desc.covalent_radius = float(element.get("covalent_radius", 0.0)) 275 276 rgb8 = element["color_rgb"] 277 element_desc.color_rgbf = (int(rgb8[1:3], 16) / 255.0, 278 int(rgb8[3:5], 16) / 255.0, 279 int(rgb8[5:7], 16) / 255.0) 280 281 return element_desc
282 283
284 -def library_get_element_desc(symbol):
285 """Loads/caches/returns an instance of the ElementDesc class for the given 286 element symbol. The source of the element data is the 287 mmLib/Data/elements.cif file. 288 """ 289 assert isinstance(symbol, str) 290 291 try: 292 return ELEMENT_CACHE[symbol] 293 except KeyError: 294 pass 295 296 element_desc = library_construct_element_desc(symbol) 297 if element_desc is None: 298 ConsoleOutput.warning("element description not found for %s" % (symbol)) 299 return None 300 301 ELEMENT_CACHE[symbol] = element_desc 302 return element_desc
303 304
305 -def library_use_monomer_zipfile():
306 """Returns True if the zipfile version of the monomer library should be used, 307 or False if the uncompressed directory hierarchy should be used. If the 308 """ 309 ## check if monomers are available in a zip file 310 global RCSB_USE_ZIP 311 global RCSB_ZIP 312 ## this should only run once 313 if RCSB_USE_ZIP is None: 314 import zipfile 315 try: 316 RCSB_ZIP = zipfile.ZipFile(RCSB_MONOMER_DATA_FILE) 317 except IOError: 318 RCSB_USE_ZIP = False 319 else: 320 RCSB_USE_ZIP = True 321 return RCSB_USE_ZIP
322 323
324 -def library_open_monomer_lib_zipfile(monomer_name):
325 """Returns the open file object for the mmCIF monomer library file if it 326 is found in the monomer library zipfile. 327 """ 328 if library_use_monomer_zipfile(): 329 ## read data from zip file 330 try: 331 blob = RCSB_ZIP.read(monomer_name.upper()) 332 except KeyError: 333 ConsoleOutput.warning("monomer description not found in zipfile for '%s'" % (monomer_name)) 334 else: 335 from cStringIO import StringIO 336 return StringIO(blob) 337 return None
338 339
340 -def library_open_monomer_lib_directory(monomer_name):
341 """Returns the open file object for the mmCIF monomer library file if it 342 is found as an uncompressed mmCIF file at the path: 343 mmLib/Data/Monomers/NAME[0]/NAME.cif 344 """ 345 assert len(monomer_name) > 0 346 fil_name = "%s.cif" % (monomer_name.upper()) 347 path = os.path.join(RCSB_MONOMER_DATA_PATH, fil_name[0], fil_name) 348 if os.path.isfile(path): 349 return open(path, "r") 350 return None
351 352
353 -def library_open_monomer_lib_file(monomer_name):
354 """Returns the open file object for the mmCIF monomer library file if it 355 is found from library_open_monomer_lib_directory() or 356 library_open_monomer_lib_zipfile(). library_open_monomer_lib_directory() 357 is checked first because loading the file from the directory sturcture 358 is much faster than loading it from a zipfile. 359 """ 360 libfil = library_open_monomer_lib_directory(monomer_name) 361 if libfil is not None: 362 return libfil 363 libfil = library_open_monomer_lib_zipfile(monomer_name) 364 return libfil
365 366
367 -def library_construct_monomer_desc(res_name):
368 """Constructs the MonomerDesc object for the given residue name. 369 """ 370 ## return None when the res_name is an empty string 371 if len(res_name) < 1: 372 return None 373 374 if ALT_RES_NAME_DICT.has_key(res_name): 375 lookup_name = ALT_RES_NAME_DICT[res_name] 376 else: 377 lookup_name = res_name.upper() 378 379 libfil = library_open_monomer_lib_file(lookup_name) 380 if libfil is None: 381 ConsoleOutput.warning("monomer description not found for '%s'" % (res_name)) 382 return None 383 384 ## generate monomer description 385 mon_desc = MonomerDesc() 386 ## data from RCSB library 387 rcsb_cif_file = mmCIF.mmCIFFile() 388 rcsb_cif_file.load_file(libfil) 389 rcsb_cif_data = rcsb_cif_file[0] 390 libfil.close() 391 392 chem_comp = rcsb_cif_data.get_table("chem_comp")[0] 393 mon_desc.res_name = chem_comp.get_lower("res_name") 394 mon_desc.full_name = chem_comp.get_lower("name") 395 mon_desc.type = chem_comp.get_lower("type") 396 mon_desc.pdbx_type = chem_comp.get_lower("pdbx_type") 397 mon_desc.formula = chem_comp.get_lower("formula") 398 mon_desc.rcsb_class_1 = chem_comp.get_lower("rcsb_class_1") 399 400 chem_comp_atom = rcsb_cif_data.get_table("chem_comp_atom") 401 if chem_comp_atom is not None: 402 for cif_row in chem_comp_atom: 403 name = cif_row.getitem_lower("atom_id") 404 405 try: 406 symbol = cif_row.getitem_lower("type_symbol") 407 except KeyError: 408 ## this should occur when an atom name does not match the ones 409 ## found in a monomer file 410 symbol = name 411 msg = "unrecognized atom name: '%s' in residue '%s'" % ( 412 symbol, res_name) 413 ConsoleOutput.warning(msg) 414 415 mon_desc.atom_list.append({"name": name, "symbol": symbol}) 416 mon_desc.atom_dict[name] = symbol 417 try: 418 alt_name = cif_row.getitem_lower("alt_atom_id") 419 except KeyError: 420 pass 421 else: 422 mon_desc.alt_atom_dict[name] = alt_name 423 424 chem_comp_bond = rcsb_cif_data.get_table("chem_comp_bond") 425 if chem_comp_bond is not None: 426 for cif_row in chem_comp_bond: 427 atom1 = cif_row.getitem_lower("atom_id_1") 428 atom2 = cif_row.getitem_lower("atom_id_2") 429 mon_desc.bond_list.append({"atom1": atom1, "atom2": atom2}) 430 431 ## data from mmLib supplemental library in mmLib/Data/monomers.cif 432 mmlib_cif_data = MMLIB_MONOMERS_CIF.get_data(res_name) 433 if mmlib_cif_data is not None: 434 ## get additional chemical information on amino acids 435 chem_comp = mmlib_cif_data.get_table("chem_comp") 436 if chem_comp is not None: 437 mon_desc.one_letter_code = chem_comp["one_letter_code"] 438 mon_desc.chem_type = chem_comp["chem_type"] 439 440 ## get torsion angle definitions 441 torsion_angles = mmlib_cif_data.get_table("torsion_angles") 442 if torsion_angles is not None: 443 for cif_row in torsion_angles: 444 mon_desc.torsion_angle_dict[cif_row["name"]] = ( 445 cif_row["atom1"], cif_row["atom2"], 446 cif_row["atom3"], cif_row["atom4"]) 447 448 ## set some derived flags on the monomer description 449 mon_type = mon_desc.type.upper() 450 451 if mon_type == "L-PEPTIDE LINKING": 452 mon_desc.amino_acid = True 453 454 elif mon_type == "DNA LINKING" or mon_type == "RNA LINKING": 455 mon_desc.nucleic_acid = True 456 457 elif mon_type == "HOH" or mon_type == "WAT": 458 mon_desc.water = True 459 460 return mon_desc
461
462 -def library_get_monomer_desc(res_name):
463 """Loads/caches/returns the monomer description objec MonomerDesc 464 for the given monomer residue name. 465 """ 466 assert isinstance(res_name, str) 467 468 try: 469 return MONOMER_RES_NAME_CACHE[res_name] 470 except KeyError: 471 pass 472 473 mon_desc = library_construct_monomer_desc(res_name) 474 if mon_desc is None: 475 return None 476 477 MONOMER_RES_NAME_CACHE[res_name] = mon_desc 478 return mon_desc
479 480
481 -def library_is_amino_acid(res_name):
482 """Returns True if the res_name is an amino acid. 483 """ 484 assert isinstance(res_name, str) 485 486 mdesc = library_get_monomer_desc(res_name) 487 if mdesc is None: 488 return False 489 490 return mdesc.is_amino_acid()
491 492
493 -def library_is_nucleic_acid(res_name):
494 """Returns True if the res_name is a nucleic acid. 495 """ 496 assert isinstance(res_name, str) 497 498 mdesc = library_get_monomer_desc(res_name) 499 if mdesc is None: 500 return False 501 502 return mdesc.is_nucleic_acid()
503 504
505 -def library_is_standard_residue(res_name):
506 """Returns True if the res_name is a standard amino or nucleic acid. 507 """ 508 assert isinstance(res_name, str) 509 510 mdesc = library_get_monomer_desc(res_name) 511 if mdesc is None: 512 return False 513 514 return mdesc.is_standard_residue()
515 516
517 -def library_is_water(res_name):
518 """Return True if the res_name is water. 519 """ 520 assert isinstance(res_name, str) 521 522 if res_name == "HOH" or res_name == "WAT": 523 return True 524 525 return False
526 527
528 -def library_guess_element_from_name(name0, res_name):
529 """Try everything we can possibly think of to extract the element 530 symbol from the atom name. If available, use the monomer dictionary to 531 help narrow down the search. 532 """ 533 ## strip any space from the name, and return now if there 534 ## is nothing left to work with 535 name = name0.strip() 536 if name == "": 537 return None 538 539 if name0 != res_name: 540 ## try the easy way out -- look up the atom in the monomer dictionary 541 mdesc = library_get_monomer_desc(res_name) 542 if mdesc is not None: 543 if mdesc.atom_dict.has_key(name): 544 symbol = mdesc.atom_dict[name] 545 if symbol is not None: 546 return symbol 547 548 if mdesc.is_amino_acid() and name == "OXT": 549 return "O" 550 551 if mdesc.is_amino_acid(): 552 msg = "invalid amino acid atom name '%s' in residue '%s'" % ( 553 name, res_name) 554 ConsoleOutput.warning(msg) 555 556 ## okay, that didn't work... 557 558 ## set the space_flag to true if the name starts with a space, which can 559 ## indicate the name of the atom is only 1 character long. 560 if name0.startswith(" "): 561 space_flag = True 562 else: 563 space_flag = False 564 565 ## remove all non-alpha chars from the name 566 alpha_name = "" 567 for c in name: 568 if c.isalpha() == True: 569 alpha_name += c 570 571 ## look up two possible element symbols in the library: 572 ## e1 is the possible one-character symbol 573 ## e2 is the possible two-character symbol 574 if len(alpha_name) == 0: 575 return None 576 577 e1_symbol = alpha_name[0] 578 e1_valid = ELEMENT_SYMBOL_DICT.has_key(e1_symbol) 579 580 if len(alpha_name) > 1: 581 e2_symbol = alpha_name[:2] 582 e2_valid = ELEMENT_SYMBOL_DICT.has_key(e2_symbol) 583 else: 584 e2_symbol = None 585 e2_valid = False 586 587 ## e1 or e2 must return something for us to proceed, otherwise, 588 ## there's just no possible element symbol contained in the atom 589 ## name 590 if e1_valid == False and e2_valid == False: 591 return None 592 593 elif e1_valid == True and e2_valid == False: 594 return e1_symbol 595 596 elif e1_valid == False and e2_valid == True: 597 return e2_symbol 598 599 ## if we get here, then e1 and e2 are both valid elements 600 601 ## we're out of choices, go by the space_flag: if there is a space 602 ## before the atom name, then use the 1-char element symbol; 603 ## if there is no space, then use the 2-char element symbol 604 if space_flag == True: 605 return e1_symbol 606 607 return e2_symbol
608 609 610 ## <TESTING>
611 -def test_module():
612 h = library_get_element_desc("H") 613 614 for cif_data in ELEMENT_CIF_FILE: 615 if len(cif_data.name) == 1: 616 print ' "%s" : True, "%s" : True,' % ( 617 cif_data.name, cif_data.name.lower()) 618 else: 619 print ' "%s": True, "%s": True, "%s": True,' % ( 620 cif_data.name, cif_data.name.lower(), cif_data.name.upper())
621 622 if __name__ == "__main__": 623 test_module() 624 ## </TESTING> 625